FROMAT_RGBA: use precomputed alpha mask

performance optimization - do not compute the target alpha mask for
every pixel because it is constant and may be precomputed
This commit is contained in:
Martin Pulec
2022-11-07 09:18:08 +01:00
parent b8d39ee5d2
commit fdca666d94
2 changed files with 17 additions and 9 deletions

View File

@@ -118,8 +118,10 @@ static_assert(sizeof(comp_type_t) * 8 >= COMP_BASE + 18, "comp_type_t not wide e
#define FULL_HEAD(depth) ((255<<((depth)-8))-1)
#define CLAMP_FULL(val, depth) CLAMP((val), FULL_FOOT(depth), FULL_HEAD(depth))
/// @todo the alpha mask can be precomputed and passed as a parameter
#define FORMAT_RGBA(r, g, b, depth) (~(0xFFU << (rgb_shift[R]) | 0xFFU << (rgb_shift[G]) | 0xFFU << (rgb_shift[B])) | \
/**
* @param alpha_mask alpha mask already positioned at target bit offset
*/
#define FORMAT_RGBA(r, g, b, alpha_mask, depth) ((alpha_mask) | \
(CLAMP_FULL((r), (depth)) << rgb_shift[R] | CLAMP_FULL((g), (depth)) << rgb_shift[G] | CLAMP_FULL((b), (depth)) << rgb_shift[B]))
/// @}

View File

@@ -971,7 +971,8 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
{
assert((uintptr_t) dst_buffer % 4 == 0);
UNUSED(rgb_shift);
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
for(int y = 0; y < height; ++y) {
unsigned char *src_y = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y;
unsigned char *src_cbcr = (unsigned char *) in_frame->data[1] + in_frame->linesize[1] * (y / 2);
@@ -985,7 +986,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
if (rgba) {
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8);
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8);
dst += 4;
} else {
*dst++ = CLAMP_FULL(r, 8);
@@ -995,7 +996,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
y = (*src_y++ - 16) * Y_SCALE;
if (rgba) {
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8);
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8);
dst += 4;
} else {
*dst++ = CLAMP_FULL(r, 8);
@@ -1029,6 +1030,8 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A
static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, AVFrame * __restrict in_frame,
int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba)
{
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
for(int y = 0; y < height / 2; ++y) {
unsigned char *src_y1 = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y * 2;
unsigned char *src_y2 = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * (y * 2 + 1);
@@ -1054,7 +1057,7 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A
g >>= COMP_BASE;\
b >>= COMP_BASE;\
if (rgba) {\
*((uint32_t *)(void *) DST) = FORMAT_RGBA(r, g, b, 8);\
*((uint32_t *)(void *) DST) = FORMAT_RGBA(r, g, b, alpha_mask, 8);\
DST += 4;\
} else {\
*DST++ = CLAMP_FULL(r, 8);\
@@ -1135,7 +1138,8 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest
{
assert((uintptr_t) dst_buffer % 4 == 0);
UNUSED(rgb_shift);
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
for(int y = 0; y < height; ++y) {
unsigned char *src_y = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y;
unsigned char *src_cb = (unsigned char *) in_frame->data[1] + in_frame->linesize[1] * y;
@@ -1150,7 +1154,7 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
if (rgba) {
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8);
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8);
dst += 4;
} else {
*dst++ = CLAMP(r, 1, 254);
@@ -1540,6 +1544,8 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __
static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __restrict in_frame,
int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba)
{
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
for (int y = 0; y < height; y++) {
uint16_t *src_y = (uint16_t *)(void *)(in_frame->data[0] + in_frame->linesize[0] * y);
uint16_t *src_cb = (uint16_t *)(void *)(in_frame->data[1] + in_frame->linesize[1] * y);
@@ -1554,7 +1560,7 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
if (rgba) {
*(uint32_t *)(void *) dst = FORMAT_RGBA(r, g, b, 8);
*(uint32_t *)(void *) dst = FORMAT_RGBA(r, g, b, alpha_mask, 8);
dst += 4;
} else {
*dst++ = CLAMP_FULL(r, 8);