diff --git a/src/color.h b/src/color.h index ccf5a7a28..c6ff2774d 100644 --- a/src/color.h +++ b/src/color.h @@ -68,11 +68,13 @@ static_assert(sizeof(comp_type_t) * 8 >= COMP_BASE + 18, "comp_type_t not wide e #define KG(kr,kb) (1.-kr-kb) #ifdef YCBCR_FULL -#define Y_LIMIT 1.0 -#define CBCR_LIMIT 1.0 +#define Y_LIMIT(out_depth) 1.0 +#define CBCR_LIMIT(out_depth) 1.0 #else -#define Y_LIMIT (219.0/255.0) -#define CBCR_LIMIT (224.0/255.0) +#define Y_LIMIT(out_depth) \ + (219. * (1 << ((out_depth) - 8)) / ((1 << (out_depth)) - 1)) +#define CBCR_LIMIT(out_depth) \ + (224. * (1 << ((out_depth) - 8)) / ((1 << (out_depth)) - 1)) #endif // !defined YCBCR_FULL #define KR_709 .212639 @@ -85,43 +87,76 @@ static_assert(sizeof(comp_type_t) * 8 >= COMP_BASE + 18, "comp_type_t not wide e #define KG_709 KG(KR_709,KB_709) #define D (2.*(KR_709+KG_709)) #define E (2.*(1.-KR_709)) -#define Y_R ((comp_type_t) ((KR_709*Y_LIMIT) * (1<> (COMP_BASE-10+depth); - comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE-10+depth); - comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE-10+depth); + comp_type_t r = YCBCR_TO_R_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-10+depth); + comp_type_t g = YCBCR_TO_G_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-10+depth); + comp_type_t b = YCBCR_TO_B_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-10+depth); // r g b is now on 10 bit scale r = CLAMP_FULL(r, 10); @@ -339,12 +339,12 @@ static inline void yuv444pXXle_to_r12l(int depth, char * __restrict dst_buffer, comp_type_t g[8]; comp_type_t b[8]; OPTIMIZED_FOR (int j = 0; j < 8; ++j) { - comp_type_t y = (Y_SCALE * (*src_y++ - (1<<(depth-4)))); + comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4)))); comp_type_t cr = *src_cr++ - (1<<(depth-1)); comp_type_t cb = *src_cb++ - (1<<(depth-1)); - comp_type_t rr = YCBCR_TO_R_709_SCALED(y, cb, cr) >> (COMP_BASE-12+depth); - comp_type_t gg = YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE-12+depth); - comp_type_t bb = YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE-12+depth); + comp_type_t rr = YCBCR_TO_R_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-12+depth); + comp_type_t gg = YCBCR_TO_G_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-12+depth); + comp_type_t bb = YCBCR_TO_B_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-12+depth); r[j] = CLAMP_FULL(rr, 12); g[j] = CLAMP_FULL(gg, 12); b[j] = CLAMP_FULL(bb, 12); @@ -430,13 +430,13 @@ static inline void yuv444pXXle_to_rg48(int depth, char * __restrict dst_buffer, uint16_t *dst = (uint16_t *)(void *) (dst_buffer + y * pitch); OPTIMIZED_FOR (int x = 0; x < width; ++x) { - comp_type_t y = (Y_SCALE * (*src_y++ - (1<<(depth-4)))); + comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4)))); comp_type_t cr = *src_cr++ - (1<<(depth-1)); comp_type_t cb = *src_cb++ - (1<<(depth-1)); - comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr) >> (COMP_BASE-16+depth); - comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE-16+depth); - comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE-16+depth); + comp_type_t r = YCBCR_TO_R_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-16+depth); + comp_type_t g = YCBCR_TO_G_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-16+depth); + comp_type_t b = YCBCR_TO_B_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-16+depth); // r g b is now on 16 bit scale *dst++ = CLAMP_FULL(r, 16); @@ -1026,6 +1026,9 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restrict in_frame, int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba) { + enum { + S_DEPTH = 8, + }; assert((uintptr_t) dst_buffer % 4 == 0); uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]); @@ -1038,10 +1041,10 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) { comp_type_t cb = *src_cbcr++ - 128; comp_type_t cr = *src_cbcr++ - 128; - comp_type_t y = (*src_y++ - 16) * Y_SCALE; - comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr) >> COMP_BASE; - comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE; - comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE; + comp_type_t y = (*src_y++ - 16) * Y_SCALE(S_DEPTH); + comp_type_t r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE; + comp_type_t g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE; + comp_type_t b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE; if (rgba) { *((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8); dst += 4; @@ -1051,7 +1054,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric *dst++ = CLAMP_FULL(b, 8); } - y = (*src_y++ - 16) * Y_SCALE; + y = (*src_y++ - 16) * Y_SCALE(S_DEPTH); if (rgba) { *((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8); dst += 4; @@ -1087,6 +1090,9 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, AVFrame * __restrict in_frame, int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba) { + enum { + S_DEPTH = 8, + }; uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]); for(int y = 0; y < height / 2; ++y) { @@ -1126,32 +1132,32 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) { comp_type_t cb = *src_cb1++ - 128; comp_type_t cr = *src_cr1++ - 128; - comp_type_t y = (*src_y1++ - 16) * Y_SCALE; - comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr); - comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr); - comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr); + comp_type_t y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH); + comp_type_t r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr); + comp_type_t g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr); + comp_type_t b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr); WRITE_RES_YUV8P_TO_RGB(dst1) - y = (*src_y1++ - 16) * Y_SCALE; - r = YCBCR_TO_R_709_SCALED(y, cb, cr); - g = YCBCR_TO_G_709_SCALED(y, cb, cr); - b = YCBCR_TO_B_709_SCALED(y, cb, cr); + y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH); + r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr); + g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr); + b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr); WRITE_RES_YUV8P_TO_RGB(dst1) if (subsampling == 422) { cb = *src_cb2++ - 128; cr = *src_cr2++ - 128; } - y = (*src_y2++ - 16) * Y_SCALE; - r = YCBCR_TO_R_709_SCALED(y, cb, cr); - g = YCBCR_TO_G_709_SCALED(y, cb, cr); - b = YCBCR_TO_B_709_SCALED(y, cb, cr); + y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH); + r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr); + g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr); + b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr); WRITE_RES_YUV8P_TO_RGB(dst2) - y = (*src_y2++ - 16) * Y_SCALE; - r = YCBCR_TO_R_709_SCALED(y, cb, cr); - g = YCBCR_TO_G_709_SCALED(y, cb, cr); - b = YCBCR_TO_B_709_SCALED(y, cb, cr); + y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH); + r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr); + g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr); + b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr); WRITE_RES_YUV8P_TO_RGB(dst2) } } @@ -1193,6 +1199,9 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __restrict in_frame, int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba) { + enum { + S_DEPTH = 8, + }; assert((uintptr_t) dst_buffer % 4 == 0); uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]); @@ -1206,10 +1215,10 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest OPTIMIZED_FOR (int x = 0; x < width; ++x) { int cb = *src_cb++ - 128; int cr = *src_cr++ - 128; - int y = *src_y++ * Y_SCALE; - comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr) >> COMP_BASE; - comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE; - comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE; + int y = *src_y++ * Y_SCALE(S_DEPTH); + comp_type_t r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE; + comp_type_t g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE; + comp_type_t b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE; if (rgba) { *((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8); dst += 4; @@ -1539,6 +1548,9 @@ static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer, AVFrame * __restrict frame, int width, int height, int pitch, const int * __restrict rgb_shift, int out_bit_depth) { + enum { + S_DEPTH = 10, + }; assert((uintptr_t) dst_buffer % 4 == 0); assert((uintptr_t) frame->linesize[0] % 2 == 0); assert((uintptr_t) frame->linesize[1] % 2 == 0); @@ -1571,9 +1583,9 @@ static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) { comp_type_t cr = *src_cr1++ - (1<<9); comp_type_t cb = *src_cb1++ - (1<<9); - comp_type_t rr = YCBCR_TO_R_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp)); - comp_type_t gg = YCBCR_TO_G_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp)); - comp_type_t bb = YCBCR_TO_B_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp)); + comp_type_t rr = YCBCR_TO_R_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp)); + comp_type_t gg = YCBCR_TO_G_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp)); + comp_type_t bb = YCBCR_TO_B_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp)); # define WRITE_RES_YUV10P_TO_RGB(Y, DST) {\ comp_type_t r = Y + rr;\ @@ -1596,24 +1608,24 @@ static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer }\ } - comp_type_t y1 = (Y_SCALE * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y1 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y1, dst1) - comp_type_t y11 = (Y_SCALE * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y11 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y11, dst1) if (subsampling == 422) { cr = *src_cr2++ - (1<<9); cb = *src_cb2++ - (1<<9); - rr = YCBCR_TO_R_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp)); - gg = YCBCR_TO_G_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp)); - bb = YCBCR_TO_B_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp)); + rr = YCBCR_TO_R_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp)); + gg = YCBCR_TO_G_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp)); + bb = YCBCR_TO_B_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp)); } - comp_type_t y2 = (Y_SCALE * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y2 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y2, dst2) - comp_type_t y22 = (Y_SCALE * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y22 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y22, dst2) } } @@ -1656,13 +1668,16 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __ comp_type_t cb = *src_cb++ - (1 << (S_DEPTH - 1)); comp_type_t cr = *src_cr++ - (1 << (S_DEPTH - 1)); comp_type_t y = - (*src_y++ - (1 << (S_DEPTH - 4))) * Y_SCALE; + (*src_y++ - (1 << (S_DEPTH - 4))) * Y_SCALE(S_DEPTH); comp_type_t r = - YCBCR_TO_R_709_SCALED(y, cb, cr) >> (COMP_BASE + 2); + YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr) >> + (COMP_BASE + 2); comp_type_t g = - YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE + 2); + YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr) >> + (COMP_BASE + 2); comp_type_t b = - YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE + 2); + YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr) >> + (COMP_BASE + 2); if (rgba) { *(uint32_t *)(void *) dst = MK_RGBA(r, g, b, alpha_mask, 8); dst += 4; diff --git a/src/libavcodec/to_lavc_vid_conv.c b/src/libavcodec/to_lavc_vid_conv.c index 1b5102990..b56a70325 100644 --- a/src/libavcodec/to_lavc_vid_conv.c +++ b/src/libavcodec/to_lavc_vid_conv.c @@ -651,6 +651,10 @@ static inline void r10k_to_yuv42Xp10le(AVFrame * __restrict out_frame, const uns assert((uintptr_t) out_frame->linesize[1] % 2 == 0); assert((uintptr_t) out_frame->linesize[2] % 2 == 0); + enum { + D_DEPTH = 10, + }; + const int src_linesize = vc_get_linesize(width, R10k); for(int y = 0; y < height; y++) { uint16_t *dst_y = (uint16_t *)(void *) (out_frame->data[0] + out_frame->linesize[0] * y); @@ -663,9 +667,18 @@ static inline void r10k_to_yuv42Xp10le(AVFrame * __restrict out_frame, const uns comp_type_t g = (src[1] & 0x3f ) << 4 | src[2] >> 4; comp_type_t b = (src[2] & 0x0f) << 6 | src[3] >> 2; - comp_type_t res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-4)); - comp_type_t res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1)); - comp_type_t res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1)); + comp_type_t res_y = + (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> + (COMP_BASE)) + + (1 << (D_DEPTH - 4)); + comp_type_t res_cb = + (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> + (COMP_BASE)) + + (1 << (D_DEPTH - 1)); + comp_type_t res_cr = + (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> + (COMP_BASE)) + + (1 << (D_DEPTH - 1)); dst_y[x * 2] = CLAMP_LIMITED_Y(res_y, 10); src += 4; @@ -674,9 +687,15 @@ static inline void r10k_to_yuv42Xp10le(AVFrame * __restrict out_frame, const uns g = (src[1] & 0x3f ) << 4 | src[2] >> 4; b = (src[2] & 0x0f) << 6 | src[3] >> 2; - res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-4)); - res_cb += (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1)); - res_cr += (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1)); + res_y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> + (COMP_BASE)) + + (1 << (D_DEPTH - 4)); + res_cb += (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> + (COMP_BASE)) + + (1 << (D_DEPTH - 1)); + res_cr += (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> + (COMP_BASE)) + + (1 << (D_DEPTH - 1)); res_cb /= 2; res_cr /= 2; @@ -721,7 +740,7 @@ static void r10k_to_yuv422p10le(AVFrame * __restrict out_frame, const unsigned c static inline void r10k_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) __attribute__((always_inline)); #endif -static inline void r10k_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) +static inline void r10k_to_yuv444pXXle(int out_depth, AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) { assert((uintptr_t) out_frame->linesize[0] % 2 == 0); assert((uintptr_t) out_frame->linesize[1] % 2 == 0); @@ -738,13 +757,22 @@ static inline void r10k_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame comp_type_t g = (src[1] & 0x3F ) << 4 | src[2] >> 4; comp_type_t b = (src[2] & 0x0F) << 6 | src[3] >> 2; - comp_type_t res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE+10-depth)) + (1<<(depth-4)); - comp_type_t res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE+10-depth)) + (1<<(depth-1)); - comp_type_t res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE+10-depth)) + (1<<(depth-1)); + comp_type_t res_y = + (RGB_TO_Y_709_SCALED(out_depth, r, g, b) >> + (COMP_BASE + 10 - out_depth)) + + (1 << (out_depth - 4)); + comp_type_t res_cb = + (RGB_TO_CB_709_SCALED(out_depth, r, g, b) >> + (COMP_BASE + 10 - out_depth)) + + (1 << (out_depth - 1)); + comp_type_t res_cr = + (RGB_TO_CR_709_SCALED(out_depth, r, g, b) >> + (COMP_BASE + 10 - out_depth)) + + (1 << (out_depth - 1)); - *dst_y++ = CLAMP(res_y, 1<<(depth-4), 235 * (1<<(depth-8))); - *dst_cb++ = CLAMP(res_cb, 1<<(depth-4), 240 * (1<<(depth-8))); - *dst_cr++ = CLAMP(res_cr, 1<<(depth-4), 240 * (1<<(depth-8))); + *dst_y++ = CLAMP_LIMITED_Y(res_y, out_depth); + *dst_cb++ = CLAMP_LIMITED_CBCR(res_cb, out_depth); + *dst_cr++ = CLAMP_LIMITED_CBCR(res_cr, out_depth); src += 4; } } @@ -777,12 +805,18 @@ static inline void r12l_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame assert((uintptr_t) out_frame->linesize[2] % 2 == 0); #define WRITE_RES \ - res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE+12-depth)) + (1<<(depth-4));\ - res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE+12-depth)) + (1<<(depth-1));\ - res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE+12-depth)) + (1<<(depth-1));\ - *dst_y++ = CLAMP(res_y, 1<<(depth-4), 235 * (1<<(depth-8)));\ - *dst_cb++ = CLAMP(res_cb, 1<<(depth-4), 240 * (1<<(depth-8)));\ - *dst_cr++ = CLAMP(res_cr, 1<<(depth-4), 240 * (1<<(depth-8))); + res_y = (RGB_TO_Y_709_SCALED(depth, r, g, b) >> \ + (COMP_BASE + 12 - depth)) + \ + (1 << (depth - 4)); \ + res_cb = (RGB_TO_CB_709_SCALED(depth, r, g, b) >> \ + (COMP_BASE + 12 - depth)) + \ + (1 << (depth - 1)); \ + res_cr = (RGB_TO_CR_709_SCALED(depth, r, g, b) >> \ + (COMP_BASE + 12 - depth)) + \ + (1 << (depth - 1)); \ + *dst_y++ = CLAMP_LIMITED_Y(res_y, depth);\ + *dst_cb++ = CLAMP_LIMITED_CBCR(res_cb, depth);\ + *dst_cr++ = CLAMP_LIMITED_CBCR(res_cr, depth); const int src_linesize = vc_get_linesize(width, R12L); for (int y = 0; y < height; ++y) { @@ -905,13 +939,22 @@ static inline void rg48_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame comp_type_t g = *src++; comp_type_t b = *src++; - comp_type_t res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE+16-depth)) + (1<<(depth-4)); - comp_type_t res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE+16-depth)) + (1<<(depth-1)); - comp_type_t res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE+16-depth)) + (1<<(depth-1)); + comp_type_t res_y = + (RGB_TO_Y_709_SCALED(depth, r, g, b) >> + (COMP_BASE + 16 - depth)) + + (1 << (depth - 4)); + comp_type_t res_cb = + (RGB_TO_CB_709_SCALED(depth, r, g, b) >> + (COMP_BASE + 16 - depth)) + + (1 << (depth - 1)); + comp_type_t res_cr = + (RGB_TO_CR_709_SCALED(depth, r, g, b) >> + (COMP_BASE + 16 - depth)) + + (1 << (depth - 1)); - *dst_y++ = CLAMP(res_y, 1<<(depth-4), 235 * (1<<(depth-8))); - *dst_cb++ = CLAMP(res_cb, 1<<(depth-4), 240 * (1<<(depth-8))); - *dst_cr++ = CLAMP(res_cr, 1<<(depth-4), 240 * (1<<(depth-8))); + *dst_y++ = CLAMP_LIMITED_Y(res_y, depth); + *dst_cb++ = CLAMP_LIMITED_CBCR(res_cb, depth); + *dst_cr++ = CLAMP_LIMITED_CBCR(res_cr, depth); } } } @@ -956,13 +999,13 @@ rgb_to_yuv444p(AVFrame *__restrict out_frame, const comp_type_t b = *src++; const comp_type_t res_y = - (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + + (RGB_TO_Y_709_SCALED(DEPTH, r, g, b) >> COMP_BASE) + (1 << (DEPTH - 4)); const comp_type_t res_cb = - (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) + + (RGB_TO_CB_709_SCALED(DEPTH, r, g, b) >> COMP_BASE) + (1 << (DEPTH - 1)); const comp_type_t res_cr = - (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) + + (RGB_TO_CR_709_SCALED(DEPTH, r, g, b) >> COMP_BASE) + (1 << (DEPTH - 1)); *dst_y++ = CLAMP_LIMITED_Y(res_y, DEPTH); diff --git a/src/pixfmt_conv.c b/src/pixfmt_conv.c index 63fd5a1b5..521530216 100644 --- a/src/pixfmt_conv.c +++ b/src/pixfmt_conv.c @@ -298,6 +298,9 @@ vc_copyliner10ktoRG48(unsigned char * __restrict dst, const unsigned char * __re static void vc_copyliner10ktoY416(unsigned char * __restrict dst, const unsigned char * __restrict src, int dstlen, int rshift, int gshift, int bshift) { UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); + enum { + D_DEPTH = 16, + }; assert((uintptr_t) dst % 2 == 0); uint16_t *d = (void *) dst; OPTIMIZED_FOR (int x = 0; x < dstlen; x += 8) { @@ -309,12 +312,18 @@ static void vc_copyliner10ktoY416(unsigned char * __restrict dst, const unsigned r = byte1 << 8U | (byte2 & 0xC0U); g = (byte2 & 0x3FU) << 10U | (byte3 & 0xF0U) << 2U; b = (byte3 & 0xFU) << 12U | (byte4 & 0xFCU) << 4U; - comp_type_t u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); - *d++ = CLAMP_LIMITED_CBCR(u, 16); - comp_type_t y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12); - *d++ = CLAMP_LIMITED_Y(y, 16); - comp_type_t v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); - *d++ = CLAMP_LIMITED_CBCR(v, 16); + comp_type_t u = + (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << (D_DEPTH - 1)); + *d++ = CLAMP_LIMITED_CBCR(u, D_DEPTH); + comp_type_t y = + (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << (D_DEPTH - 4)); + *d++ = CLAMP_LIMITED_Y(y, D_DEPTH); + comp_type_t v = + (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << (D_DEPTH - 4)); + *d++ = CLAMP_LIMITED_CBCR(v, D_DEPTH); *d++ = 0xFFFFU; } } @@ -1428,17 +1437,25 @@ static void vc_copylineR12LtoRG48(unsigned char * __restrict dst, const unsigned static void vc_copylineR12LtoY416(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + D_DEPTH = 16, + }; UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); assert((uintptr_t) dst % sizeof(uint16_t) == 0); uint16_t *d = (void *) dst; + #define WRITE_RES \ - u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); \ - *d++ = CLAMP_LIMITED_CBCR(u, 16); \ - y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12); \ - *d++ = CLAMP_LIMITED_Y(y, 16); \ - v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); \ - *d++ = CLAMP_LIMITED_CBCR(v, 16); \ - *d++ = 0xFFFFU; + u = (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + \ + (1 << (D_DEPTH - 1)); \ + *d++ = CLAMP_LIMITED_CBCR(u, D_DEPTH); \ + y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + \ + (1 << (D_DEPTH - 4)); \ + *d++ = CLAMP_LIMITED_Y(y, D_DEPTH); \ + v = (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + \ + (1 << (D_DEPTH - 1)); \ + *d++ = CLAMP_LIMITED_CBCR(v, D_DEPTH); \ + *d++ = 0xFFFFU; + OPTIMIZED_FOR (int x = 0; x < dst_len; x += 64) { comp_type_t r, g, b; comp_type_t y, u, v; @@ -1498,21 +1515,21 @@ vc_copylineR12LtoUYVY(unsigned char *__restrict dst, uint8_t *d = (void *) dst; #define WRITE_RES \ { \ - comp_type_t u = ((RGB_TO_CB_709_SCALED(r1, g1, b1) + \ - RGB_TO_CB_709_SCALED(r2, g2, b2)) >> \ + comp_type_t u = ((RGB_TO_CB_709_SCALED(D_DPTH, r1, g1, b1) + \ + RGB_TO_CB_709_SCALED(D_DPTH, r2, g2, b2)) >> \ (COMP_BASE + D_DPTH + 1)) + \ COFF; \ *d++ = CLAMP_LIMITED_CBCR(u, D_DPTH); \ - comp_type_t y = (RGB_TO_Y_709_SCALED(r1, g1, b1) >> \ + comp_type_t y = (RGB_TO_Y_709_SCALED(D_DPTH, r1, g1, b1) >> \ (COMP_BASE + D_DPTH)) + \ YOFF; \ *d++ = CLAMP_LIMITED_Y(y, D_DPTH); \ - comp_type_t v = ((RGB_TO_CR_709_SCALED(r1, g1, b1) + \ - RGB_TO_CR_709_SCALED(r2, g2, b2)) >> \ + comp_type_t v = ((RGB_TO_CR_709_SCALED(D_DPTH, r1, g1, b1) + \ + RGB_TO_CR_709_SCALED(D_DPTH, r2, g2, b2)) >> \ (COMP_BASE + D_DPTH + 1)) + \ COFF; \ *d++ = CLAMP_LIMITED_CBCR(v, D_DPTH); \ - y = (RGB_TO_Y_709_SCALED(r2, g2, b2) >> \ + y = (RGB_TO_Y_709_SCALED(D_DPTH, r2, g2, b2) >> \ (COMP_BASE + D_DPTH)) + \ YOFF; \ *d++ = CLAMP_LIMITED_Y(y, D_DPTH); \ @@ -1768,17 +1785,21 @@ static void vc_copylineRG48toR12L(unsigned char * __restrict dst, const unsigned static void vc_copylineY416toR12L(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + S_DEPTH = 16, + D_DEPTH = 12, + }; #define GET_NEXT \ - u = *in++ - (1<<15); \ - y = Y_SCALE * (*in++ - (1<<12)); \ - v = *in++ - (1<<15); \ + u = *in++ - (1 << (S_DEPTH - 1)); \ + y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); \ + v = *in++ - (1 << (S_DEPTH - 1)); \ in++; \ - r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 4U)); \ - g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 4U)); \ - b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 4U)); \ - r = CLAMP_FULL(r, 12); \ - g = CLAMP_FULL(g, 12); \ - b = CLAMP_FULL(b, 12); + r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 4U); \ + g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 4U); \ + b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 4U); \ + r = CLAMP_FULL(r, D_DEPTH); \ + g = CLAMP_FULL(g, D_DEPTH); \ + b = CLAMP_FULL(b, D_DEPTH); UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); assert((uintptr_t) src % 2 == 0); @@ -1852,19 +1873,22 @@ static void vc_copylineY416toR12L(unsigned char * __restrict dst, const unsigned static void vc_copylineY416toR10k(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + S_DEPTH = 16, + }; UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); assert((uintptr_t) src % 2 == 0); const uint16_t *in = (const void *) src; OPTIMIZED_FOR (int x = 0; x < dst_len; x += 4) { comp_type_t y, u, v, r, g, b; - u = *in++ - (1<<15); - y = Y_SCALE * (*in++ - (1<<12)); - v = *in++ - (1<<15); + u = *in++ - (1 << (S_DEPTH - 1)); + y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + v = *in++ - (1 << (S_DEPTH - 1)); in++; - r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 6U)); - g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 6U)); - b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 6U)); + r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 6U); + g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 6U); + b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 6U); r = CLAMP_FULL(r, 10); g = CLAMP_FULL(g, 10); b = CLAMP_FULL(b, 10); @@ -1879,19 +1903,22 @@ static void vc_copylineY416toR10k(unsigned char * __restrict dst, const unsigned static void vc_copylineY416toRGB(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + S_DEPTH = 16, + }; UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); assert((uintptr_t) src % 2 == 0); const uint16_t *in = (const void *) src; OPTIMIZED_FOR (int x = 0; x < dst_len; x += 3) { comp_type_t y, u, v, r, g, b; - u = *in++ - (1<<15); - y = Y_SCALE * (*in++ - (1<<12)); - v = *in++ - (1<<15); + u = *in++ - (1 << (S_DEPTH - 1)); + y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + v = *in++ - (1 << (S_DEPTH - 1)); in++; - r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 8U)); - g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 8U)); - b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 8U)); + r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U); + g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U); + b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U); r = CLAMP_FULL(r, 8); g = CLAMP_FULL(g, 8); b = CLAMP_FULL(b, 8); @@ -1905,6 +1932,9 @@ static void vc_copylineY416toRGB(unsigned char * __restrict dst, const unsigned static void vc_copylineY416toRGBA(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + S_DEPTH = 16, + }; assert((uintptr_t) src % 2 == 0); assert((uintptr_t) dst % 4 == 0); const uint16_t *in = (const void *) src; @@ -1913,13 +1943,13 @@ static void vc_copylineY416toRGBA(unsigned char * __restrict dst, const unsigned OPTIMIZED_FOR (int x = 0; x < dst_len; x += 4) { comp_type_t y, u, v, r, g, b; - u = *in++ - (1<<15); - y = Y_SCALE * (*in++ - (1<<12)); - v = *in++ - (1<<15); + u = *in++ - (1 << (S_DEPTH - 1)); + y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + v = *in++ - (1 << (S_DEPTH - 1)); in++; - r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 8U)); - g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 8U)); - b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 8U)); + r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U); + g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U); + b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U); r = CLAMP_FULL(r, 8); g = CLAMP_FULL(g, 8); b = CLAMP_FULL(b, 8); @@ -2252,20 +2282,23 @@ static void vc_copylineRG48toUYVY(unsigned char * __restrict dst, const unsigned */ static void vc_copylineRG48toV210(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + D_DEPTH = 10, + }; #define COMP_OFF (COMP_BASE+(16-10)) #define FETCH_BLOCK \ r = *in++; \ g = *in++; \ b = *in++; \ - y1 = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_OFF) + (1<<6); \ - u = RGB_TO_CB_709_SCALED(r, g, b) >> COMP_OFF; \ - v = RGB_TO_CR_709_SCALED(r, g, b) >> COMP_OFF; \ + y1 = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF) + (1<<6); \ + u = RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \ + v = RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \ r = *in++; \ g = *in++; \ b = *in++; \ - y2 = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_OFF) + (1<<6); \ - u += RGB_TO_CB_709_SCALED(r, g, b) >> COMP_OFF; \ - v += RGB_TO_CR_709_SCALED(r, g, b) >> COMP_OFF; \ + y2 = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF) + (1<<6); \ + u += RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \ + v += RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \ y1 = CLAMP_LIMITED_Y(y1, 10); \ y2 = CLAMP_LIMITED_Y(y2, 10); \ u = u / 2 + (1<<9); \ @@ -2303,6 +2336,9 @@ static void vc_copylineRG48toV210(unsigned char * __restrict dst, const unsigned static void vc_copylineRG48toY216(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + D_DEPTH = 16, + }; UNUSED(rshift); UNUSED(gshift); UNUSED(bshift); @@ -2316,24 +2352,33 @@ static void vc_copylineRG48toY216(unsigned char * __restrict dst, const unsigned r = *in++; g = *in++; b = *in++; - y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12); + y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << (D_DEPTH - 4)); *d++ = CLAMP_LIMITED_Y(y, 16); - u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE); - v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE); + u = (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE); + v = (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE); r = *in++; g = *in++; b = *in++; - u = (u + (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) / 2) + (1<<15); + u = (u + (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) / + 2) + + (1 << 15); *d++ = CLAMP_LIMITED_CBCR(u, 16); - y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12); + y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << 12); *d++ = CLAMP_LIMITED_Y(y, 16); - v = (v + (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) / 2) + (1<<15); + v = (v + (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) / + 2) + + (1 << 15); *d++ = CLAMP_LIMITED_CBCR(v, 16); } } static void vc_copylineRG48toY416(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + D_DEPTH = 16, + }; UNUSED(rshift); UNUSED(gshift); UNUSED(bshift); @@ -2346,18 +2391,27 @@ static void vc_copylineRG48toY416(unsigned char * __restrict dst, const unsigned r = *in++; g = *in++; b = *in++; - comp_type_t u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); - *d++ = CLAMP_LIMITED_CBCR(u, 16); - comp_type_t y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12); - *d++ = CLAMP_LIMITED_Y(y, 16); - comp_type_t v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); - *d++ = CLAMP_LIMITED_CBCR(v, 16); + comp_type_t u = + (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << (D_DEPTH - 1)); + *d++ = CLAMP_LIMITED_CBCR(u, D_DEPTH); + comp_type_t y = + (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << (D_DEPTH - 4)); + *d++ = CLAMP_LIMITED_Y(y, D_DEPTH); + comp_type_t v = + (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + + (1 << (D_DEPTH - 1)); + *d++ = CLAMP_LIMITED_CBCR(v, D_DEPTH); *d++ = 0xFFFFU; } } static void vc_copylineY416toRG48(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift, int gshift, int bshift) { + enum { + S_DEPTH = 16, + }; UNUSED(rshift); UNUSED(gshift); UNUSED(bshift); @@ -2366,13 +2420,17 @@ static void vc_copylineY416toRG48(unsigned char * __restrict dst, const unsigned const uint16_t *in = (const void *) src; uint16_t *d = (void *) dst; OPTIMIZED_FOR (int x = 0; x < dst_len; x += 6) { - comp_type_t u = *in++ - (1<<15); - comp_type_t y = Y_SCALE * (*in++ - (1<<12)); - comp_type_t v = *in++ - (1<<15); + comp_type_t u = *in++ - (1 << (S_DEPTH - 1)); + comp_type_t y = + Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + comp_type_t v = *in++ - (1 << (S_DEPTH - 1)); in++; - comp_type_t r = (YCBCR_TO_R_709_SCALED(y, u, v) >> COMP_BASE); - comp_type_t g = (YCBCR_TO_G_709_SCALED(y, u, v) >> COMP_BASE); - comp_type_t b = (YCBCR_TO_B_709_SCALED(y, u, v) >> COMP_BASE); + comp_type_t r = + YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> COMP_BASE; + comp_type_t g = + YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> COMP_BASE; + comp_type_t b = + YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> COMP_BASE; *d++ = CLAMP_FULL(r, 16); *d++ = CLAMP_FULL(g, 16); *d++ = CLAMP_FULL(b, 16); @@ -2699,12 +2757,12 @@ static void vc_copylineV210toRGB(unsigned char * __restrict dst, const unsigned }; UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); #define WRITE_YUV_AS_RGB(y, u, v) \ - (y) = Y_SCALE * ((y) - Y_SHIFT); \ - val = (YCBCR_TO_R_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \ + (y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \ + val = (YCBCR_TO_R_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); \ - val = (YCBCR_TO_G_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \ + val = (YCBCR_TO_G_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); \ - val = (YCBCR_TO_B_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \ + val = (YCBCR_TO_B_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); // read 8 bits from v210 directly @@ -2760,12 +2818,12 @@ vc_copylineV210toRG48(unsigned char *__restrict d, }; UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); #define WRITE_YUV_AS_RGB(y, u, v) \ - (y) = Y_SCALE * ((y) - Y_SHIFT); \ - val = (YCBCR_TO_R_709_SCALED((y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \ + (y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \ + val = (YCBCR_TO_R_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); \ - val = (YCBCR_TO_G_709_SCALED((y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \ + val = (YCBCR_TO_G_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); \ - val = (YCBCR_TO_B_709_SCALED((y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \ + val = (YCBCR_TO_B_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); // read 8 bits from v210 directly diff --git a/src/video_display/gl.cpp b/src/video_display/gl.cpp index 3772f45a2..084a6f529 100644 --- a/src/video_display/gl.cpp +++ b/src/video_display/gl.cpp @@ -1529,7 +1529,7 @@ static GLuint gl_substitute_compile_link(const char *vprogram, const char *fprog double kr = cs_coeffs[2 * index]; double kb = cs_coeffs[2 * index + 1]; const char *placeholders[] = { "Y_SCALED_PLACEHOLDER", "R_CR_PLACEHOLDER", "G_CB_PLACEHOLDER", "G_CR_PLACEHOLDER", "B_CB_PLACEHOLDER" }; - double values[] = { Y_LIMIT_INV, R_CR(kr,kb), G_CB(kr,kb), G_CR(kr,kb), B_CB(kr,kb)}; + double values[] = { Y_LIMIT_INV(8), R_CR(8,kr,kb), G_CB(8,kr,kb), G_CR(8,kr,kb), B_CB(8,kr,kb)}; for (size_t i = 0; i < sizeof placeholders / sizeof placeholders[0]; ++i) { char *tok = fp; diff --git a/src/video_display/opengl_conversions.cpp b/src/video_display/opengl_conversions.cpp index c2752e564..9d6b81bb6 100644 --- a/src/video_display/opengl_conversions.cpp +++ b/src/video_display/opengl_conversions.cpp @@ -112,15 +112,15 @@ static void load_yuv_coefficients(GlProgram& program){ glUseProgram(program.get()); GLuint loc = glGetUniformLocation(program.get(), "luma_scale"); - glUniform1f(loc, Y_LIMIT_INV); + glUniform1f(loc, Y_LIMIT_INV(8)); loc = glGetUniformLocation(program.get(), "r_cr"); - glUniform1f(loc, R_CR(kr, kb)); + glUniform1f(loc, R_CR(8, kr, kb)); loc = glGetUniformLocation(program.get(), "g_cr"); - glUniform1f(loc, G_CR(kr, kb)); + glUniform1f(loc, G_CR(8, kr, kb)); loc = glGetUniformLocation(program.get(), "g_cb"); - glUniform1f(loc, G_CB(kr, kb)); + glUniform1f(loc, G_CB(8, kr, kb)); loc = glGetUniformLocation(program.get(), "b_cb"); - glUniform1f(loc, B_CB(kr, kb)); + glUniform1f(loc, B_CB(8, kr, kb)); } class Rendering_convertor : public Frame_convertor{ diff --git a/test/misc_test.cpp b/test/misc_test.cpp index 46ef50d71..73b21af7d 100644 --- a/test/misc_test.cpp +++ b/test/misc_test.cpp @@ -1,12 +1,10 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#include "config_unix.h" -#include "config_win32.h" -#endif - +#include // for strcmp +#include // for abs #include #include +#include // for allocator, basic_string, operator+, string +#include "color.h" #include "types.h" #include "utils/string.h" #include "unit_common.h" @@ -14,12 +12,63 @@ #include "video_frame.h" extern "C" { - int misc_test_replace_all(); - int misc_test_video_desc_io_op_symmetry(); +int misc_test_color_coeff_range(); +int misc_test_replace_all(); +int misc_test_video_desc_io_op_symmetry(); } using namespace std; +/** + * check that scaled coefficient for minimal values match approximately minimal + * value of nominal range (== there is not significant shift) + */ +int +misc_test_color_coeff_range() +{ + const int depths[] = { 8, 10, 12, 16 }; + + for (unsigned i = 0; i < sizeof depths / sizeof depths[0]; ++i) { + const int d = depths[i]; + const int d_max = (1 << d) - 1; + const int max_diff = 1 << (d - 8); + + // Y + ASSERT_LE_MESSAGE( + "min Y diverges from nominal range min", max_diff, + abs((RGB_TO_Y_709_SCALED(d, 0, 0, 0) >> COMP_BASE) + + LIMIT_LO(d)) - + LIMIT_LO(d)); + ASSERT_LE_MESSAGE( + "max Y diverges from nominal range max", max_diff, + abs((RGB_TO_Y_709_SCALED(d, d_max, d_max, d_max) >> + COMP_BASE) + + LIMIT_LO(d) - LIMIT_HI_Y(d))); + // Cb + ASSERT_LE_MESSAGE( + "min Cb diverges from nominal range min", max_diff, + abs((RGB_TO_CB_709_SCALED(d, d_max, d_max, 0) >> + COMP_BASE) + + (1 << (d - 1)) - LIMIT_LO(d))); + ASSERT_LE_MESSAGE( + "max Cb diverges from nominal range max", max_diff, + abs((RGB_TO_CB_709_SCALED(d, 0, 0, d_max) >> COMP_BASE) + + (1 << (d - 1)) - LIMIT_HI_CBCR(d))); + // Cr + ASSERT_LE_MESSAGE( + "min Cr diverges from nominal range min", max_diff, + abs((RGB_TO_CR_709_SCALED(d, 0, d_max, d_max) >> + COMP_BASE) + + (1 << (d - 1)) - LIMIT_LO(d))); + ASSERT_LE_MESSAGE( + "max Cr diverges from nominal range max", max_diff, + abs((RGB_TO_CR_709_SCALED(d, d_max, 0, 0) >> COMP_BASE) + + (1 << (d - 1)) - LIMIT_HI_CBCR(d))); + } + + return 0; +} + #ifdef __clang__ #pragma clang diagnostic ignored "-Wstring-concatenation" #endif diff --git a/test/run_tests.c b/test/run_tests.c index 51ca7a2f8..1298b39e2 100644 --- a/test/run_tests.c +++ b/test/run_tests.c @@ -86,6 +86,7 @@ DECLARE_TEST(get_framerate_test_3000); DECLARE_TEST(get_framerate_test_free); DECLARE_TEST(gpujpeg_test_simple); DECLARE_TEST(libavcodec_test_get_decoder_from_uv_to_uv); +DECLARE_TEST(misc_test_color_coeff_range); DECLARE_TEST(misc_test_replace_all); DECLARE_TEST(misc_test_video_desc_io_op_symmetry); @@ -119,6 +120,7 @@ struct { DEFINE_TEST(get_framerate_test_free), DEFINE_TEST(gpujpeg_test_simple), DEFINE_TEST(libavcodec_test_get_decoder_from_uv_to_uv), + DEFINE_TEST(misc_test_color_coeff_range), DEFINE_TEST(misc_test_replace_all), DEFINE_TEST(misc_test_video_desc_io_op_symmetry), }; diff --git a/test/unit_common.h b/test/unit_common.h index 71a16528d..4a25ac7a1 100644 --- a/test/unit_common.h +++ b/test/unit_common.h @@ -67,4 +67,22 @@ } #endif +#define ASSERT_GE_MESSAGE(msg, expected, actual) \ + if ((actual) < (expected)) { \ + fprintf(stderr, \ + "Assertion failed - expected >=%" PRIdMAX \ + ", got %" PRIdMAX ": %s\n", \ + (intmax_t) (expected), (intmax_t) (actual), (msg)); \ + return -1; \ + } + +#define ASSERT_LE_MESSAGE(msg, expected, actual) \ + if ((actual) > (expected)) { \ + fprintf(stderr, \ + "Assertion failed - expected >=%" PRIdMAX \ + ", got %" PRIdMAX ": %s\n", \ + (intmax_t) (expected), (intmax_t) (actual), (msg)); \ + return -1; \ + } + #endif // defined TEST_UNIT_COMMON_H_7A471D89_C7E4_470A_A330_74F4BD85BBAC