diff --git a/src/color.c b/src/color.c
index a52bb652b..1836ee96e 100644
--- a/src/color.c
+++ b/src/color.c
@@ -86,6 +86,7 @@
                 CB_R(depth, kr, kb), CB_G(depth, kr, kb), CB_B(depth, kr, kb),\
                 CR_R(depth, kr, kb), CR_G(depth, kr, kb), CR_B(depth, kr, kb),\
 \
+                Y_SCALE(depth), \
                 SCALED(R_CR(depth, kr, kb)), \
                 SCALED(G_CB(depth, kr, kb)), \
                 SCALED(G_CR(depth, kr, kb)), \
diff --git a/src/color.h b/src/color.h
index eee483809..53eb1812a 100644
--- a/src/color.h
+++ b/src/color.h
@@ -170,6 +170,7 @@ struct color_coeffs {
         // the shorts below doesn't seem to be necessary - it seems like the
         // compiler doesn't vectorise those conversions (in contrary to the
         // above coeffs)
+        short y_scale;
         short r_cr, g_cb, g_cr;
         int   b_cb; // is 34712 for 709  so doesn't fit to 16-bit short
 };
diff --git a/src/libavcodec/from_lavc_vid_conv.c b/src/libavcodec/from_lavc_vid_conv.c
index fa0c72c11..e28316de2 100644
--- a/src/libavcodec/from_lavc_vid_conv.c
+++ b/src/libavcodec/from_lavc_vid_conv.c
@@ -325,7 +325,7 @@ yuv444pXXle_to_r10k(struct av_conv_data d, int depth)
                     (unsigned char *) d.dst_buffer + y * d.pitch;
 
                 OPTIMIZED_FOR (int x = 0; x < width; ++x) {
-                        comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4))));
+                        comp_type_t y = (cfs.y_scale * (*src_y++ - (1<<(depth-4))));
                         comp_type_t cr = *src_cr++ - (1<<(depth-1));
                         comp_type_t cb = *src_cb++ - (1<<(depth-1));
 
@@ -392,7 +392,7 @@ yuv444pXXle_to_r12l(struct av_conv_data d, int depth)
                         comp_type_t g[8];
                         comp_type_t b[8];
                         OPTIMIZED_FOR (int j = 0; j < 8; ++j) {
-                                comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4))));
+                                comp_type_t y = (cfs.y_scale * (*src_y++ - (1<<(depth-4))));
                                 comp_type_t cr = *src_cr++ - (1<<(depth-1));
                                 comp_type_t cb = *src_cb++ - (1<<(depth-1));
                                 comp_type_t rr = YCBCR_TO_R(cfs, y, cb, cr) >> (COMP_BASE-12+depth);
@@ -487,7 +487,7 @@ yuv444pXXle_to_rg48(struct av_conv_data d, int depth)
                     (uint16_t *) (void *) (d.dst_buffer + y * d.pitch);
 
                 OPTIMIZED_FOR (int x = 0; x < width; ++x) {
-                        comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4))));
+                        comp_type_t y = (cfs.y_scale * (*src_y++ - (1<<(depth-4))));
                         comp_type_t cr = *src_cr++ - (1<<(depth-1));
                         comp_type_t cb = *src_cb++ - (1<<(depth-1));
 
@@ -1159,7 +1159,7 @@ nv12_to_rgb(struct av_conv_data d, bool rgba)
                 OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) {
                         comp_type_t cb = *src_cbcr++ - 128;
                         comp_type_t cr = *src_cbcr++ - 128;
-                        comp_type_t y = (*src_y++ - 16) * Y_SCALE(S_DEPTH);
+                        comp_type_t y = (*src_y++ - 16) * cfs.y_scale;
                         comp_type_t r = YCBCR_TO_R(cfs, y, cb, cr) >> COMP_BASE;
                         comp_type_t g = YCBCR_TO_G(cfs, y, cb, cr) >> COMP_BASE;
                         comp_type_t b = YCBCR_TO_B(cfs, y, cb, cr) >> COMP_BASE;
@@ -1172,7 +1172,7 @@ nv12_to_rgb(struct av_conv_data d, bool rgba)
                                 *dst++ = CLAMP_FULL(b, 8);
                         }
 
-                        y = (*src_y++ - 16) * Y_SCALE(S_DEPTH);
+                        y = (*src_y++ - 16) * cfs.y_scale;
                         if (rgba) {
                                 *((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8);
                                 dst += 4;
@@ -1254,13 +1254,13 @@ static inline void yuv8p_to_rgb(struct av_conv_data d, int subsampling, bool rgb
                 OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) {
                         comp_type_t cb = *src_cb1++ - 128;
                         comp_type_t cr = *src_cr1++ - 128;
-                        comp_type_t y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH);
+                        comp_type_t y = (*src_y1++ - 16) * cfs.y_scale;
                         comp_type_t r = YCBCR_TO_R(cfs, y, cb, cr);
                         comp_type_t g = YCBCR_TO_G(cfs, y, cb, cr);
                         comp_type_t b = YCBCR_TO_B(cfs, y, cb, cr);
                         WRITE_RES_YUV8P_TO_RGB(dst1)
 
-                        y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH);
+                        y = (*src_y1++ - 16) * cfs.y_scale;
                         r = YCBCR_TO_R(cfs, y, cb, cr);
                         g = YCBCR_TO_G(cfs, y, cb, cr);
                         b = YCBCR_TO_B(cfs, y, cb, cr);
@@ -1270,13 +1270,13 @@ static inline void yuv8p_to_rgb(struct av_conv_data d, int subsampling, bool rgb
                                 cb = *src_cb2++ - 128;
                                 cr = *src_cr2++ - 128;
                         }
-                        y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH);
+                        y = (*src_y2++ - 16) * cfs.y_scale;
                         r = YCBCR_TO_R(cfs, y, cb, cr);
                         g = YCBCR_TO_G(cfs, y, cb, cr);
                         b = YCBCR_TO_B(cfs, y, cb, cr);
                         WRITE_RES_YUV8P_TO_RGB(dst2)
 
-                        y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH);
+                        y = (*src_y2++ - 16) * cfs.y_scale;
                         r = YCBCR_TO_R(cfs, y, cb, cr);
                         g = YCBCR_TO_G(cfs, y, cb, cr);
                         b = YCBCR_TO_B(cfs, y, cb, cr);
@@ -1343,7 +1343,7 @@ yuv444p_to_rgb(struct av_conv_data d, bool rgba)
                 OPTIMIZED_FOR (int x = 0; x < width; ++x) {
                         int cb = *src_cb++ - 128;
                         int cr = *src_cr++ - 128;
-                        int y = *src_y++ * Y_SCALE(S_DEPTH);
+                        int y = *src_y++ * cfs.y_scale;
                         comp_type_t r = YCBCR_TO_R(cfs, y, cb, cr) >> COMP_BASE;
                         comp_type_t g = YCBCR_TO_G(cfs, y, cb, cr) >> COMP_BASE;
                         comp_type_t b = YCBCR_TO_B(cfs, y, cb, cr) >> COMP_BASE;
@@ -1776,10 +1776,10 @@ yuvp10le_to_rgb(struct av_conv_data d, int subsampling, int out_bit_depth)
                                 }\
                         }
 
-                        comp_type_t y1 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
+                        comp_type_t y1 = (cfs.y_scale * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
                         WRITE_RES_YUV10P_TO_RGB(y1, dst1)
 
-                        comp_type_t y11 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
+                        comp_type_t y11 = (cfs.y_scale * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
                         WRITE_RES_YUV10P_TO_RGB(y11, dst1)
 
                         if (subsampling == 422) {
@@ -1790,10 +1790,10 @@ yuvp10le_to_rgb(struct av_conv_data d, int subsampling, int out_bit_depth)
                                 bb = YCBCR_TO_B(cfs, 0, cb, cr) >> (COMP_BASE + (10 - bpp));
                         }
 
-                        comp_type_t y2 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
+                        comp_type_t y2 = (cfs.y_scale * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
                         WRITE_RES_YUV10P_TO_RGB(y2, dst2)
 
-                        comp_type_t y22 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
+                        comp_type_t y22 = (cfs.y_scale * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
                         WRITE_RES_YUV10P_TO_RGB(y22, dst2)
                 }
         }
@@ -1845,7 +1845,7 @@ yuv444p10le_to_rgb(struct av_conv_data d, bool rgba)
                         comp_type_t cb = *src_cb++ - (1 << (S_DEPTH - 1));
                         comp_type_t cr = *src_cr++ - (1 << (S_DEPTH - 1));
                         comp_type_t y =
-                            (*src_y++ - (1 << (S_DEPTH - 4))) * Y_SCALE(S_DEPTH);
+                            (*src_y++ - (1 << (S_DEPTH - 4))) * cfs.y_scale;
                         comp_type_t r =
                             YCBCR_TO_R(cfs, y, cb, cr) >> (COMP_BASE + 2);
                         comp_type_t g =
diff --git a/src/pixfmt_conv.c b/src/pixfmt_conv.c
index 64325ccca..b20f3aaf2 100644
--- a/src/pixfmt_conv.c
+++ b/src/pixfmt_conv.c
@@ -1049,8 +1049,8 @@ vc_copylineToUYVY(unsigned char *__restrict dst,
         enum { DEPTH = DEPTH8 };\
         const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, DEPTH);\
         OPTIMIZED_FOR (int x = 0; x <= (dst_len) - 6 * (1 + (rgb16)); x += 6 * (1 + (rgb16))) {\
-                register int y1 = Y_SCALE(8) * ((src)[y1_off] - 16);\
-                register int y2 = Y_SCALE(8) * ((src)[y2_off] - 16);\
+                register int y1 = cfs.y_scale * ((src)[y1_off] - 16);\
+                register int y2 = cfs.y_scale * ((src)[y2_off] - 16);\
                 register int u = (src)[u_off] - 128;\
                 register int v = (src)[v_off] - 128;\
                 int val;\
@@ -1818,7 +1818,7 @@ static void vc_copylineY416toR12L(unsigned char * __restrict dst, const unsigned
         const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, S_DEPTH);
 #define GET_NEXT \
         u = *in++ - (1 << (S_DEPTH - 1)); \
-        y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); \
+        y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4))); \
         v = *in++ - (1 << (S_DEPTH - 1)); \
         in++; \
         r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 4U); \
@@ -1911,7 +1911,7 @@ static void vc_copylineY416toR10k(unsigned char * __restrict dst, const unsigned
                 comp_type_t y, u, v, r, g, b;
 
                 u = *in++ - (1 << (S_DEPTH - 1));
-                y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
+                y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4)));
                 v = *in++ - (1 << (S_DEPTH - 1));
                 in++;
                 r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 6U);
@@ -1942,7 +1942,7 @@ static void vc_copylineY416toRGB(unsigned char * __restrict dst, const unsigned
                 comp_type_t y, u, v, r, g, b;
 
                 u = *in++ - (1 << (S_DEPTH - 1));
-                y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
+                y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4)));
                 v = *in++ - (1 << (S_DEPTH - 1));
                 in++;
                 r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 8U);
@@ -1974,7 +1974,7 @@ static void vc_copylineY416toRGBA(unsigned char * __restrict dst, const unsigned
                 comp_type_t y, u, v, r, g, b;
 
                 u = *in++ - (1 << (S_DEPTH - 1));
-                y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
+                y = cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4)));
                 v = *in++ - (1 << (S_DEPTH - 1));
                 in++;
                 r = YCBCR_TO_R(cfs, y, u, v) >> (COMP_BASE + 8U);
@@ -2456,7 +2456,7 @@ static void vc_copylineY416toRG48(unsigned char * __restrict dst, const unsigned
         OPTIMIZED_FOR (int x = 0; x < dst_len; x += 6) {
                 comp_type_t u = *in++ - (1 << (S_DEPTH - 1));
                 comp_type_t y =
-                    Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
+                    cfs.y_scale * (*in++ - (1 << (S_DEPTH - 4)));
                 comp_type_t v = *in++ - (1 << (S_DEPTH - 1));
                 in++;
                 comp_type_t r =
@@ -2792,7 +2792,7 @@ static void vc_copylineV210toRGB(unsigned char * __restrict dst, const unsigned
         UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
         const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, IDEPTH);
 #define WRITE_YUV_AS_RGB(y, u, v) \
-        (y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \
+        (y) = cfs.y_scale * ((y) - Y_SHIFT); \
         val = (YCBCR_TO_R(cfs, (y), (u), (v)) >> (COMP_BASE)); \
         *(dst++) = CLAMP_FULL(val, ODEPTH); \
         val = (YCBCR_TO_G(cfs, (y), (u), (v)) >> (COMP_BASE)); \
@@ -2854,7 +2854,7 @@ vc_copylineV210toRG48(unsigned char *__restrict d,
         UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
         const struct color_coeffs cfs = *get_color_coeffs(CS_DFL, IDEPTH);
 #define WRITE_YUV_AS_RGB(y, u, v) \
-        (y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \
+        (y) = cfs.y_scale * ((y) - Y_SHIFT); \
         val = (YCBCR_TO_R(cfs, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \
         *(dst++) = CLAMP_FULL(val, ODEPTH); \
         val = (YCBCR_TO_G(cfs, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \