mirror of
https://github.com/outbackdingo/UltraGrid.git
synced 2026-03-22 00:40:25 +00:00
FROMAT_RGBA: use precomputed alpha mask
performance optimization - do not compute the target alpha mask for every pixel because it is constant and may be precomputed
This commit is contained in:
@@ -118,8 +118,10 @@ static_assert(sizeof(comp_type_t) * 8 >= COMP_BASE + 18, "comp_type_t not wide e
|
||||
#define FULL_HEAD(depth) ((255<<((depth)-8))-1)
|
||||
#define CLAMP_FULL(val, depth) CLAMP((val), FULL_FOOT(depth), FULL_HEAD(depth))
|
||||
|
||||
/// @todo the alpha mask can be precomputed and passed as a parameter
|
||||
#define FORMAT_RGBA(r, g, b, depth) (~(0xFFU << (rgb_shift[R]) | 0xFFU << (rgb_shift[G]) | 0xFFU << (rgb_shift[B])) | \
|
||||
/**
|
||||
* @param alpha_mask alpha mask already positioned at target bit offset
|
||||
*/
|
||||
#define FORMAT_RGBA(r, g, b, alpha_mask, depth) ((alpha_mask) | \
|
||||
(CLAMP_FULL((r), (depth)) << rgb_shift[R] | CLAMP_FULL((g), (depth)) << rgb_shift[G] | CLAMP_FULL((b), (depth)) << rgb_shift[B]))
|
||||
/// @}
|
||||
|
||||
|
||||
@@ -971,7 +971,8 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
|
||||
{
|
||||
assert((uintptr_t) dst_buffer % 4 == 0);
|
||||
|
||||
UNUSED(rgb_shift);
|
||||
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
|
||||
|
||||
for(int y = 0; y < height; ++y) {
|
||||
unsigned char *src_y = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y;
|
||||
unsigned char *src_cbcr = (unsigned char *) in_frame->data[1] + in_frame->linesize[1] * (y / 2);
|
||||
@@ -985,7 +986,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
|
||||
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
|
||||
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
|
||||
if (rgba) {
|
||||
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8);
|
||||
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8);
|
||||
dst += 4;
|
||||
} else {
|
||||
*dst++ = CLAMP_FULL(r, 8);
|
||||
@@ -995,7 +996,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
|
||||
|
||||
y = (*src_y++ - 16) * Y_SCALE;
|
||||
if (rgba) {
|
||||
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8);
|
||||
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8);
|
||||
dst += 4;
|
||||
} else {
|
||||
*dst++ = CLAMP_FULL(r, 8);
|
||||
@@ -1029,6 +1030,8 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A
|
||||
static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, AVFrame * __restrict in_frame,
|
||||
int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba)
|
||||
{
|
||||
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
|
||||
|
||||
for(int y = 0; y < height / 2; ++y) {
|
||||
unsigned char *src_y1 = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y * 2;
|
||||
unsigned char *src_y2 = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * (y * 2 + 1);
|
||||
@@ -1054,7 +1057,7 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A
|
||||
g >>= COMP_BASE;\
|
||||
b >>= COMP_BASE;\
|
||||
if (rgba) {\
|
||||
*((uint32_t *)(void *) DST) = FORMAT_RGBA(r, g, b, 8);\
|
||||
*((uint32_t *)(void *) DST) = FORMAT_RGBA(r, g, b, alpha_mask, 8);\
|
||||
DST += 4;\
|
||||
} else {\
|
||||
*DST++ = CLAMP_FULL(r, 8);\
|
||||
@@ -1135,7 +1138,8 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest
|
||||
{
|
||||
assert((uintptr_t) dst_buffer % 4 == 0);
|
||||
|
||||
UNUSED(rgb_shift);
|
||||
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
|
||||
|
||||
for(int y = 0; y < height; ++y) {
|
||||
unsigned char *src_y = (unsigned char *) in_frame->data[0] + in_frame->linesize[0] * y;
|
||||
unsigned char *src_cb = (unsigned char *) in_frame->data[1] + in_frame->linesize[1] * y;
|
||||
@@ -1150,7 +1154,7 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest
|
||||
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
|
||||
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
|
||||
if (rgba) {
|
||||
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, 8);
|
||||
*((uint32_t *)(void *) dst) = FORMAT_RGBA(r, g, b, alpha_mask, 8);
|
||||
dst += 4;
|
||||
} else {
|
||||
*dst++ = CLAMP(r, 1, 254);
|
||||
@@ -1540,6 +1544,8 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __
|
||||
static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __restrict in_frame,
|
||||
int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba)
|
||||
{
|
||||
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
|
||||
|
||||
for (int y = 0; y < height; y++) {
|
||||
uint16_t *src_y = (uint16_t *)(void *)(in_frame->data[0] + in_frame->linesize[0] * y);
|
||||
uint16_t *src_cb = (uint16_t *)(void *)(in_frame->data[1] + in_frame->linesize[1] * y);
|
||||
@@ -1554,7 +1560,7 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __
|
||||
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
|
||||
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
|
||||
if (rgba) {
|
||||
*(uint32_t *)(void *) dst = FORMAT_RGBA(r, g, b, 8);
|
||||
*(uint32_t *)(void *) dst = FORMAT_RGBA(r, g, b, alpha_mask, 8);
|
||||
dst += 4;
|
||||
} else {
|
||||
*dst++ = CLAMP_FULL(r, 8);
|
||||
|
||||
Reference in New Issue
Block a user