From fdca666d94be102af174fc86e23be5c9411fe541 Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Mon, 7 Nov 2022 09:18:08 +0100 Subject: [PATCH] FROMAT_RGBA: use precomputed alpha mask performance optimization - do not compute the target alpha mask for every pixel because it is constant and may be precomputed --- src/color.h | 6 ++++-- src/libavcodec/from_lavc_vid_conv.c | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/color.h b/src/color.h index 8e07da803..44f8ee608 100644 --- a/src/color.h +++ b/src/color.h @@ -118,8 +118,10 @@ static_assert(sizeof(comp_type_t) * 8 >= COMP_BASE + 18, "comp_type_t not wide e #define FULL_HEAD(depth) ((255<<((depth)-8))-1) #define CLAMP_FULL(val, depth) CLAMP((val), FULL_FOOT(depth), FULL_HEAD(depth)) -/// @todo the alpha mask can be precomputed and passed as a parameter -#define FORMAT_RGBA(r, g, b, depth) (~(0xFFU << (rgb_shift[R]) | 0xFFU << (rgb_shift[G]) | 0xFFU << (rgb_shift[B])) | \ +/** + * @param alpha_mask alpha mask already positioned at target bit offset + */ +#define FORMAT_RGBA(r, g, b, alpha_mask, depth) ((alpha_mask) | \ (CLAMP_FULL((r), (depth)) << rgb_shift[R] | CLAMP_FULL((g), (depth)) << rgb_shift[G] | CLAMP_FULL((b), (depth)) << rgb_shift[B])) /// @} diff --git a/src/libavcodec/from_lavc_vid_conv.c b/src/libavcodec/from_lavc_vid_conv.c index 16a4cb17b..f54e5e590 100644 --- a/src/libavcodec/from_lavc_vid_conv.c +++ b/src/libavcodec/from_lavc_vid_conv.c @@ -971,7 +971,8 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric { assert((uintptr_t) dst_buffer % 4 == 0); - UNUSED(rgb_shift); + uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]); + for(int y = 0; y < height; ++y) { unsigned char *src_y = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y; unsigned char *src_cbcr = (unsigned char *) in_frame->data[1] + in_frame->linesize[1] * (y / 2); @@ -985,7 +986,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE; comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE; if (rgba) { - *((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8); + *((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8); dst += 4; } else { *dst++ = CLAMP_FULL(r, 8); @@ -995,7 +996,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric y = (*src_y++ - 16) * Y_SCALE; if (rgba) { - *((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8); + *((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8); dst += 4; } else { *dst++ = CLAMP_FULL(r, 8); @@ -1029,6 +1030,8 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, AVFrame * __restrict in_frame, int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba) { + uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]); + for(int y = 0; y < height / 2; ++y) { unsigned char *src_y1 = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y * 2; unsigned char *src_y2 = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * (y * 2 + 1); @@ -1054,7 +1057,7 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A g >>= COMP_BASE;\ b >>= COMP_BASE;\ if (rgba) {\ - *((uint32_t *)(void *) DST) = FORMAT_RGBA(r, g, b, 8);\ + *((uint32_t *)(void *) DST) = FORMAT_RGBA(r, g, b, alpha_mask, 8);\ DST += 4;\ } else {\ *DST++ = CLAMP_FULL(r, 8);\ @@ -1135,7 +1138,8 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest { assert((uintptr_t) dst_buffer % 4 == 0); - UNUSED(rgb_shift); + uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]); + for(int y = 0; y < height; ++y) { unsigned char *src_y = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y; unsigned char *src_cb = (unsigned char *) in_frame->data[1] + in_frame->linesize[1] * y; @@ -1150,7 +1154,7 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE; comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE; if (rgba) { - *((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8); + *((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8); dst += 4; } else { *dst++ = CLAMP(r, 1, 254); @@ -1540,6 +1544,8 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __restrict in_frame, int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba) { + uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]); + for (int y = 0; y < height; y++) { uint16_t *src_y = (uint16_t *)(void *)(in_frame->data[0] + in_frame->linesize[0] * y); uint16_t *src_cb = (uint16_t *)(void *)(in_frame->data[1] + in_frame->linesize[1] * y); @@ -1554,7 +1560,7 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __ comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE; comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE; if (rgba) { - *(uint32_t *)(void *) dst = FORMAT_RGBA(r, g, b, 8); + *(uint32_t *)(void *) dst = FORMAT_RGBA(r, g, b, alpha_mask, 8); dst += 4; } else { *dst++ = CLAMP_FULL(r, 8);