diff --git a/src/color.c b/src/color.c index a52bb652b..1836ee96e 100644 --- a/src/color.c +++ b/src/color.c @@ -86,6 +86,7 @@ CB_R(depth, kr, kb), CB_G(depth, kr, kb), CB_B(depth, kr, kb),\ CR_R(depth, kr, kb), CR_G(depth, kr, kb), CR_B(depth, kr, kb),\ \ + Y_SCALE(depth), \ SCALED(R_CR(depth, kr, kb)), \ SCALED(G_CB(depth, kr, kb)), \ SCALED(G_CR(depth, kr, kb)), \ diff --git a/src/color.h b/src/color.h index eee483809..53eb1812a 100644 --- a/src/color.h +++ b/src/color.h @@ -170,6 +170,7 @@ struct color_coeffs { // the shorts below doesn't seem to be necessary - it seems like the // compiler doesn't vectorise those conversions (in contrary to the // above coeffs) + short y_scale; short r_cr, g_cb, g_cr; int b_cb; // is 34712 for 709 so doesn't fit to 16-bit short }; diff --git a/src/libavcodec/from_lavc_vid_conv.c b/src/libavcodec/from_lavc_vid_conv.c index fa0c72c11..e28316de2 100644 --- a/src/libavcodec/from_lavc_vid_conv.c +++ b/src/libavcodec/from_lavc_vid_conv.c @@ -325,7 +325,7 @@ yuv444pXXle_to_r10k(struct av_conv_data d, int depth) (unsigned char *) d.dst_buffer + y * d.pitch; OPTIMIZED_FOR (int x = 0; x < width; ++x) { - comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4)))); + comp_type_t y = (cfs.y_scale * (*src_y++ - (1<<(depth-4)))); comp_type_t cr = *src_cr++ - (1<<(depth-1)); comp_type_t cb = *src_cb++ - (1<<(depth-1)); @@ -392,7 +392,7 @@ yuv444pXXle_to_r12l(struct av_conv_data d, int depth) comp_type_t g[8]; comp_type_t b[8]; OPTIMIZED_FOR (int j = 0; j < 8; ++j) { - comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4)))); + comp_type_t y = (cfs.y_scale * (*src_y++ - (1<<(depth-4)))); comp_type_t cr = *src_cr++ - (1<<(depth-1)); comp_type_t cb = *src_cb++ - (1<<(depth-1)); comp_type_t rr = YCBCR_TO_R(cfs, y, cb, cr) >> (COMP_BASE-12+depth); @@ -487,7 +487,7 @@ yuv444pXXle_to_rg48(struct av_conv_data d, int depth) (uint16_t *) (void *) (d.dst_buffer + y * d.pitch); OPTIMIZED_FOR (int x = 0; x < width; ++x) { - comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4)))); + comp_type_t y = (cfs.y_scale * (*src_y++ - (1<<(depth-4)))); comp_type_t cr = *src_cr++ - (1<<(depth-1)); comp_type_t cb = *src_cb++ - (1<<(depth-1)); @@ -1159,7 +1159,7 @@ nv12_to_rgb(struct av_conv_data d, bool rgba) OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) { comp_type_t cb = *src_cbcr++ - 128; comp_type_t cr = *src_cbcr++ - 128; - comp_type_t y = (*src_y++ - 16) * Y_SCALE(S_DEPTH); + comp_type_t y = (*src_y++ - 16) * cfs.y_scale; comp_type_t r = YCBCR_TO_R(cfs, y, cb, cr) >> COMP_BASE; comp_type_t g = YCBCR_TO_G(cfs, y, cb, cr) >> COMP_BASE; comp_type_t b = YCBCR_TO_B(cfs, y, cb, cr) >> COMP_BASE; @@ -1172,7 +1172,7 @@ nv12_to_rgb(struct av_conv_data d, bool rgba) *dst++ = CLAMP_FULL(b, 8); } - y = (*src_y++ - 16) * Y_SCALE(S_DEPTH); + y = (*src_y++ - 16) * cfs.y_scale; if (rgba) { *((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8); dst += 4; @@ -1254,13 +1254,13 @@ static inline void yuv8p_to_rgb(struct av_conv_data d, int subsampling, bool rgb OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) { comp_type_t cb = *src_cb1++ - 128; comp_type_t cr = *src_cr1++ - 128; - comp_type_t y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH); + comp_type_t y = (*src_y1++ - 16) * cfs.y_scale; comp_type_t r = YCBCR_TO_R(cfs, y, cb, cr); comp_type_t g = YCBCR_TO_G(cfs, y, cb, cr); comp_type_t b = YCBCR_TO_B(cfs, y, cb, cr); WRITE_RES_YUV8P_TO_RGB(dst1) - y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH); + y = (*src_y1++ - 16) * cfs.y_scale; r = YCBCR_TO_R(cfs, y, cb, cr); g = YCBCR_TO_G(cfs, y, cb, cr); b = YCBCR_TO_B(cfs, y, cb, cr); @@ -1270,13 +1270,13 @@ static inline void yuv8p_to_rgb(struct av_conv_data d, int subsampling, bool rgb cb = *src_cb2++ - 128; cr = *src_cr2++ - 128; } - y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH); + y = (*src_y2++ - 16) * cfs.y_scale; r = YCBCR_TO_R(cfs, y, cb, cr); g = YCBCR_TO_G(cfs, y, cb, cr); b = YCBCR_TO_B(cfs, y, cb, cr); WRITE_RES_YUV8P_TO_RGB(dst2) - y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH); + y = (*src_y2++ - 16) * cfs.y_scale; r = YCBCR_TO_R(cfs, y, cb, cr); g = YCBCR_TO_G(cfs, y, cb, cr); b = YCBCR_TO_B(cfs, y, cb, cr); @@ -1343,7 +1343,7 @@ yuv444p_to_rgb(struct av_conv_data d, bool rgba) OPTIMIZED_FOR (int x = 0; x < width; ++x) { int cb = *src_cb++ - 128; int cr = *src_cr++ - 128; - int y = *src_y++ * Y_SCALE(S_DEPTH); + int y = *src_y++ * cfs.y_scale; comp_type_t r = YCBCR_TO_R(cfs, y, cb, cr) >> COMP_BASE; comp_type_t g = YCBCR_TO_G(cfs, y, cb, cr) >> COMP_BASE; comp_type_t b = YCBCR_TO_B(cfs, y, cb, cr) >> COMP_BASE; @@ -1776,10 +1776,10 @@ yuvp10le_to_rgb(struct av_conv_data d, int subsampling, int out_bit_depth) }\ } - comp_type_t y1 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y1 = (cfs.y_scale * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y1, dst1) - comp_type_t y11 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y11 = (cfs.y_scale * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y11, dst1) if (subsampling == 422) { @@ -1790,10 +1790,10 @@ yuvp10le_to_rgb(struct av_conv_data d, int subsampling, int out_bit_depth) bb = YCBCR_TO_B(cfs, 0, cb, cr) >> (COMP_BASE + (10 - bpp)); } - comp_type_t y2 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y2 = (cfs.y_scale * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y2, dst2) - comp_type_t y22 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); + comp_type_t y22 = (cfs.y_scale * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp)); WRITE_RES_YUV10P_TO_RGB(y22, dst2) } } @@ -1845,7 +1845,7 @@ yuv444p10le_to_rgb(struct av_conv_data d, bool rgba) comp_type_t cb = *src_cb++ - (1 << (S_DEPTH - 1)); comp_type_t cr = *src_cr++ - (1 << (S_DEPTH - 1)); comp_type_t y = - (*src_y++ - (1 << (S_DEPTH - 4))) * Y_SCALE(S_DEPTH); + (*src_y++ - (1 << (S_DEPTH - 4))) * cfs.y_scale; comp_type_t r = YCBCR_TO_R(cfs, y, cb, cr) >> (COMP_BASE + 2); comp_type_t g = diff --git a/src/pixfmt_conv.c b/src/pixfmt_conv.c index 64325ccca..b20f3aaf2 100644 --- a/src/pixfmt_conv.c +++ b/src/pixfmt_conv.c @@ -1049,8 +1049,8 @@ vc_copylineToUYVY(unsigned char *__restrict dst, enum { DEPTH = DEPTH8 };\ const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, DEPTH);\ OPTIMIZED_FOR (int x = 0; x <= (dst_len) - 6 * (1 + (rgb16)); x += 6 * (1 + (rgb16))) {\ - register int y1 = Y_SCALE(8) * ((src)[y1_off] - 16);\ - register int y2 = Y_SCALE(8) * ((src)[y2_off] - 16);\ + register int y1 = cfs.y_scale * ((src)[y1_off] - 16);\ + register int y2 = cfs.y_scale * ((src)[y2_off] - 16);\ register int u = (src)[u_off] - 128;\ register int v = (src)[v_off] - 128;\ int val;\ @@ -1818,7 +1818,7 @@ static void vc_copylineY416toR12L(unsigned char * __restrict dst, const unsigned const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, S_DEPTH); #define GET_NEXT \ u = *in++ - (1 << (S_DEPTH - 1)); \ - y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); \ + y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4))); \ v = *in++ - (1 << (S_DEPTH - 1)); \ in++; \ r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 4U); \ @@ -1911,7 +1911,7 @@ static void vc_copylineY416toR10k(unsigned char * __restrict dst, const unsigned comp_type_t y, u, v, r, g, b; u = *in++ - (1 << (S_DEPTH - 1)); - y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4))); v = *in++ - (1 << (S_DEPTH - 1)); in++; r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 6U); @@ -1942,7 +1942,7 @@ static void vc_copylineY416toRGB(unsigned char * __restrict dst, const unsigned comp_type_t y, u, v, r, g, b; u = *in++ - (1 << (S_DEPTH - 1)); - y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4))); v = *in++ - (1 << (S_DEPTH - 1)); in++; r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 8U); @@ -1974,7 +1974,7 @@ static void vc_copylineY416toRGBA(unsigned char * __restrict dst, const unsigned comp_type_t y, u, v, r, g, b; u = *in++ - (1 << (S_DEPTH - 1)); - y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4))); v = *in++ - (1 << (S_DEPTH - 1)); in++; r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 8U); @@ -2456,7 +2456,7 @@ static void vc_copylineY416toRG48(unsigned char * __restrict dst, const unsigned OPTIMIZED_FOR (int x = 0; x < dst_len; x += 6) { comp_type_t u = *in++ - (1 << (S_DEPTH - 1)); comp_type_t y = - Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); + cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4))); comp_type_t v = *in++ - (1 << (S_DEPTH - 1)); in++; comp_type_t r = @@ -2792,7 +2792,7 @@ static void vc_copylineV210toRGB(unsigned char * __restrict dst, const unsigned UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, IDEPTH); #define WRITE_YUV_AS_RGB(y, u, v) \ - (y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \ + (y) = cfs.y_scale * ((y) - Y_SHIFT); \ val = (YCBCR_TO_R(cfs, (y), (u), (v)) >> (COMP_BASE)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); \ val = (YCBCR_TO_G(cfs, (y), (u), (v)) >> (COMP_BASE)); \ @@ -2854,7 +2854,7 @@ vc_copylineV210toRG48(unsigned char *__restrict d, UNUSED(rshift), UNUSED(gshift), UNUSED(bshift); const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, IDEPTH); #define WRITE_YUV_AS_RGB(y, u, v) \ - (y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \ + (y) = cfs.y_scale * ((y) - Y_SHIFT); \ val = (YCBCR_TO_R(cfs, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \ *(dst++) = CLAMP_FULL(val, ODEPTH); \ val = (YCBCR_TO_G(cfs, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \