color coeff computing updates

Mainly depth is included in Y_ and CBCR_LIMIT - the used denominator
255.0 matched only 8 bits.

Add (substract) epsilon 0.5 when converting the to integer to round the
value correctly.
This commit is contained in:
Martin Pulec
2024-09-17 15:25:06 +02:00
parent cad561ead6
commit 4fe65769f0
9 changed files with 425 additions and 205 deletions

View File

@@ -68,11 +68,13 @@ static_assert(sizeof(comp_type_t) * 8 >= COMP_BASE + 18, "comp_type_t not wide e
#define KG(kr,kb) (1.-kr-kb)
#ifdef YCBCR_FULL
#define Y_LIMIT 1.0
#define CBCR_LIMIT 1.0
#define Y_LIMIT(out_depth) 1.0
#define CBCR_LIMIT(out_depth) 1.0
#else
#define Y_LIMIT (219.0/255.0)
#define CBCR_LIMIT (224.0/255.0)
#define Y_LIMIT(out_depth) \
(219. * (1 << ((out_depth) - 8)) / ((1 << (out_depth)) - 1))
#define CBCR_LIMIT(out_depth) \
(224. * (1 << ((out_depth) - 8)) / ((1 << (out_depth)) - 1))
#endif // !defined YCBCR_FULL
#define KR_709 .212639
@@ -85,43 +87,76 @@ static_assert(sizeof(comp_type_t) * 8 >= COMP_BASE + 18, "comp_type_t not wide e
#define KG_709 KG(KR_709,KB_709)
#define D (2.*(KR_709+KG_709))
#define E (2.*(1.-KR_709))
#define Y_R ((comp_type_t) ((KR_709*Y_LIMIT) * (1<<COMP_BASE)))
#define Y_G ((comp_type_t) ((KG_709*Y_LIMIT) * (1<<COMP_BASE)))
#define Y_B ((comp_type_t) ((KB_709*Y_LIMIT) * (1<<COMP_BASE)))
#define CB_R ((comp_type_t) ((-KR_709/D*CBCR_LIMIT) * (1<<COMP_BASE)))
#define CB_G ((comp_type_t) ((-KG_709/D*CBCR_LIMIT) * (1<<COMP_BASE)))
#define CB_B ((comp_type_t) (((1-KB_709)/D*CBCR_LIMIT) * (1<<COMP_BASE)))
#define CR_R ((comp_type_t) (((1-KR_709)/E*CBCR_LIMIT) * (1<<COMP_BASE)))
#define CR_G ((comp_type_t) ((-KG_709/E*CBCR_LIMIT) * (1<<COMP_BASE)))
#define CR_B ((comp_type_t) ((-KB_709/E*CBCR_LIMIT) * (1<<COMP_BASE)))
#define RGB_TO_Y_709_SCALED(r, g, b) ((r) * Y_R + (g) * Y_G + (b) * Y_B)
#define RGB_TO_CB_709_SCALED(r, g, b) ((r) * CB_R + (g) * CB_G + (b) * CB_B)
#define RGB_TO_CR_709_SCALED(r, g, b) ((r) * CR_R + (g) * CR_G + (b) * CR_B)
#define C_EPS 0.5
#define Y_R(out_depth) \
((comp_type_t) (((KR_709 * Y_LIMIT(out_depth)) * (1 << COMP_BASE)) + \
C_EPS))
#define Y_G(out_depth) \
((comp_type_t) (((KG_709 * Y_LIMIT(out_depth)) * (1 << COMP_BASE)) + \
C_EPS))
#define Y_B(out_depth) \
((comp_type_t) (((KB_709 * Y_LIMIT(out_depth)) * (1 << COMP_BASE)) + \
C_EPS))
#define CB_R(out_depth) \
((comp_type_t) (((-KR_709 / D * CBCR_LIMIT(out_depth)) * \
(1 << COMP_BASE)) - C_EPS))
#define CB_G(out_depth) \
((comp_type_t) (((-KG_709 / D * CBCR_LIMIT(out_depth)) * \
(1 << COMP_BASE)) - C_EPS))
#define CB_B(out_depth) \
((comp_type_t) ((((1 - KB_709) / D * CBCR_LIMIT(out_depth)) * \
(1 << COMP_BASE)) + C_EPS))
#define CR_R(out_depth) \
((comp_type_t) ((((1 - KR_709) / E * CBCR_LIMIT(out_depth)) * \
(1 << COMP_BASE)) - C_EPS))
#define CR_G(out_depth) \
((comp_type_t) (((-KG_709 / E * CBCR_LIMIT(out_depth)) * \
(1 << COMP_BASE)) - C_EPS))
#define CR_B(out_depth) \
((comp_type_t) (((-KB_709 / E * CBCR_LIMIT(out_depth)) * \
(1 << COMP_BASE)) + C_EPS))
#define RGB_TO_Y_709_SCALED(out_depth, r, g, b) \
((r) * Y_R(out_depth) + (g) * Y_G(out_depth) + (b) * Y_B(out_depth))
#define RGB_TO_CB_709_SCALED(out_depth, r, g, b) \
((r) * CB_R(out_depth) + (g) * CB_G(out_depth) + (b) * CB_B(out_depth))
#define RGB_TO_CR_709_SCALED(out_depth, r, g, b) \
((r) * CR_R(out_depth) + (g) * CR_G(out_depth) + (b) * CR_B(out_depth))
#ifdef YCBCR_FULL
#define LIMIT_LO(depth) 0
#define LIMIT_HI_Y(depth) ((1<<(depth))-1)
#define LIMIT_HI_CBCR(depth) ((1<<(depth))-1)
#else
#define LIMIT_LO(depth) (1<<((depth)-4))
#define LIMIT_HI_Y(depth) (235 * (1<<((depth)-8)))
#define LIMIT_HI_CBCR(depth) (240 * (1<<((depth)-8)))
#ifdef YCBCR_FULL
#define CLAMP_LIMITED_Y(val, depth) (val)
#define CLAMP_LIMITED_CBCR(val, depth) (val)
#else
#endif
#define CLAMP_LIMITED_Y(val, depth) CLAMP((val), LIMIT_LO(depth), LIMIT_HI_Y(depth))
#define CLAMP_LIMITED_CBCR(val, depth) CLAMP((val), 1<<(depth-4), LIMIT_HI_CBCR(depth))
#endif
#define R_CB(kr,kb) 0.0
#define R_CR(kr,kb) ((2.*(1.-kr))/CBCR_LIMIT)
#define G_CB(kr,kb) ((-kb*(2.*(kr+KG(kr,kb)))/KG(kr,kb))/CBCR_LIMIT)
#define G_CR(kr,kb) ((-kr*(2.*(1.-kr))/KG(kr,kb))/CBCR_LIMIT)
#define B_CB(kr,kb) ((2.*(kr+KG(kr,kb)))/CBCR_LIMIT)
#define B_CR(kr,kb) 0.0
#define R_CR(in_depth, kr, kb) ((2. * (1. - (kr))) / CBCR_LIMIT(in_depth))
#define G_CB(in_depth, kr, kb) \
((-(kb) * (2. * ((kr) + KG(kr, kb))) / KG(kr, kb)) / \
CBCR_LIMIT(in_depth))
#define G_CR(in_depth, kr, kb) \
((-(kr) * (2. * (1. - (kr))) / KG(kr, kb)) / CBCR_LIMIT(in_depth))
#define B_CB(in_depth, kr, kb) \
((2. * ((kr) + KG(kr, kb))) / CBCR_LIMIT(in_depth))
#define SCALED(x) ((comp_type_t) ((x) * (1<<COMP_BASE)))
#define Y_LIMIT_INV (1./Y_LIMIT)
#define Y_SCALE SCALED(Y_LIMIT_INV) // precomputed value, Y multiplier is same for all channels
#define YCBCR_TO_R_709_SCALED(y, cb, cr) ((y) /* * r_y */ /* + (cb) * SCALED(r_cb(KR_709,KB_709)) */ + (cr) * SCALED(R_CR(KR_709,KB_709)))
#define YCBCR_TO_G_709_SCALED(y, cb, cr) ((y) /* * g_y */ + (cb) * SCALED(G_CB(KR_709,KB_709)) + (cr) * SCALED(G_CR(KR_709,KB_709)))
#define YCBCR_TO_B_709_SCALED(y, cb, cr) ((y) /* * b_y */ + (cb) * SCALED(B_CB(KR_709,KB_709)) /* + (cr) * SCALED(b_cr(KR_709,KB_709))) */)
#define FULL_FOOT(depth) (1<<((depth)-8))
#define Y_LIMIT_INV(in_depth) (1./Y_LIMIT(in_depth))
#define Y_SCALE(in_depth) \
SCALED(Y_LIMIT_INV(in_depth)) // precomputed value, Y multiplier is same
// for all channels
#define YCBCR_TO_R_709_SCALED(in_depth, y, cb, cr) \
((y) /* * r_y */ + (cr) * SCALED(R_CR(in_depth, KR_709, KB_709)))
#define YCBCR_TO_G_709_SCALED(in_depth, y, cb, cr) \
((y) /* * g_y */ + (cb) * SCALED(G_CB(in_depth, KR_709, KB_709)) + \
(cr) * SCALED(G_CR(in_depth, KR_709, KB_709)))
#define YCBCR_TO_B_709_SCALED(in_depth, y, cb, cr) \
((y) /* * b_y */ + (cb) * SCALED(B_CB(in_depth, KR_709, KB_709)))
#define FULL_FOOT(depth) (1 << ((depth) - 8))
#define FULL_HEAD(depth) ((255<<((depth)-8))-1)
#define CLAMP_FULL(val, depth) CLAMP((val), FULL_FOOT(depth), FULL_HEAD(depth))

View File

@@ -276,13 +276,13 @@ static inline void yuv444pXXle_to_r10k(int depth, char * __restrict dst_buffer,
unsigned char *dst = (unsigned char *) dst_buffer + y * pitch;
OPTIMIZED_FOR (int x = 0; x < width; ++x) {
comp_type_t y = (Y_SCALE * (*src_y++ - (1<<(depth-4))));
comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4))));
comp_type_t cr = *src_cr++ - (1<<(depth-1));
comp_type_t cb = *src_cb++ - (1<<(depth-1));
comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr) >> (COMP_BASE-10+depth);
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE-10+depth);
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE-10+depth);
comp_type_t r = YCBCR_TO_R_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-10+depth);
comp_type_t g = YCBCR_TO_G_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-10+depth);
comp_type_t b = YCBCR_TO_B_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-10+depth);
// r g b is now on 10 bit scale
r = CLAMP_FULL(r, 10);
@@ -339,12 +339,12 @@ static inline void yuv444pXXle_to_r12l(int depth, char * __restrict dst_buffer,
comp_type_t g[8];
comp_type_t b[8];
OPTIMIZED_FOR (int j = 0; j < 8; ++j) {
comp_type_t y = (Y_SCALE * (*src_y++ - (1<<(depth-4))));
comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4))));
comp_type_t cr = *src_cr++ - (1<<(depth-1));
comp_type_t cb = *src_cb++ - (1<<(depth-1));
comp_type_t rr = YCBCR_TO_R_709_SCALED(y, cb, cr) >> (COMP_BASE-12+depth);
comp_type_t gg = YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE-12+depth);
comp_type_t bb = YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE-12+depth);
comp_type_t rr = YCBCR_TO_R_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-12+depth);
comp_type_t gg = YCBCR_TO_G_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-12+depth);
comp_type_t bb = YCBCR_TO_B_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-12+depth);
r[j] = CLAMP_FULL(rr, 12);
g[j] = CLAMP_FULL(gg, 12);
b[j] = CLAMP_FULL(bb, 12);
@@ -430,13 +430,13 @@ static inline void yuv444pXXle_to_rg48(int depth, char * __restrict dst_buffer,
uint16_t *dst = (uint16_t *)(void *) (dst_buffer + y * pitch);
OPTIMIZED_FOR (int x = 0; x < width; ++x) {
comp_type_t y = (Y_SCALE * (*src_y++ - (1<<(depth-4))));
comp_type_t y = (Y_SCALE(depth) * (*src_y++ - (1<<(depth-4))));
comp_type_t cr = *src_cr++ - (1<<(depth-1));
comp_type_t cb = *src_cb++ - (1<<(depth-1));
comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr) >> (COMP_BASE-16+depth);
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE-16+depth);
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE-16+depth);
comp_type_t r = YCBCR_TO_R_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-16+depth);
comp_type_t g = YCBCR_TO_G_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-16+depth);
comp_type_t b = YCBCR_TO_B_709_SCALED(depth, y, cb, cr) >> (COMP_BASE-16+depth);
// r g b is now on 16 bit scale
*dst++ = CLAMP_FULL(r, 16);
@@ -1026,6 +1026,9 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restrict in_frame,
int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba)
{
enum {
S_DEPTH = 8,
};
assert((uintptr_t) dst_buffer % 4 == 0);
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
@@ -1038,10 +1041,10 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) {
comp_type_t cb = *src_cbcr++ - 128;
comp_type_t cr = *src_cbcr++ - 128;
comp_type_t y = (*src_y++ - 16) * Y_SCALE;
comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t y = (*src_y++ - 16) * Y_SCALE(S_DEPTH);
comp_type_t r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE;
comp_type_t g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE;
comp_type_t b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE;
if (rgba) {
*((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8);
dst += 4;
@@ -1051,7 +1054,7 @@ static inline void nv12_to_rgb(char * __restrict dst_buffer, AVFrame * __restric
*dst++ = CLAMP_FULL(b, 8);
}
y = (*src_y++ - 16) * Y_SCALE;
y = (*src_y++ - 16) * Y_SCALE(S_DEPTH);
if (rgba) {
*((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8);
dst += 4;
@@ -1087,6 +1090,9 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A
static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, AVFrame * __restrict in_frame,
int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba)
{
enum {
S_DEPTH = 8,
};
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
for(int y = 0; y < height / 2; ++y) {
@@ -1126,32 +1132,32 @@ static inline void yuv8p_to_rgb(int subsampling, char * __restrict dst_buffer, A
OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) {
comp_type_t cb = *src_cb1++ - 128;
comp_type_t cr = *src_cr1++ - 128;
comp_type_t y = (*src_y1++ - 16) * Y_SCALE;
comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr);
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr);
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr);
comp_type_t y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH);
comp_type_t r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr);
comp_type_t g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr);
comp_type_t b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr);
WRITE_RES_YUV8P_TO_RGB(dst1)
y = (*src_y1++ - 16) * Y_SCALE;
r = YCBCR_TO_R_709_SCALED(y, cb, cr);
g = YCBCR_TO_G_709_SCALED(y, cb, cr);
b = YCBCR_TO_B_709_SCALED(y, cb, cr);
y = (*src_y1++ - 16) * Y_SCALE(S_DEPTH);
r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr);
g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr);
b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr);
WRITE_RES_YUV8P_TO_RGB(dst1)
if (subsampling == 422) {
cb = *src_cb2++ - 128;
cr = *src_cr2++ - 128;
}
y = (*src_y2++ - 16) * Y_SCALE;
r = YCBCR_TO_R_709_SCALED(y, cb, cr);
g = YCBCR_TO_G_709_SCALED(y, cb, cr);
b = YCBCR_TO_B_709_SCALED(y, cb, cr);
y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH);
r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr);
g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr);
b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr);
WRITE_RES_YUV8P_TO_RGB(dst2)
y = (*src_y2++ - 16) * Y_SCALE;
r = YCBCR_TO_R_709_SCALED(y, cb, cr);
g = YCBCR_TO_G_709_SCALED(y, cb, cr);
b = YCBCR_TO_B_709_SCALED(y, cb, cr);
y = (*src_y2++ - 16) * Y_SCALE(S_DEPTH);
r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr);
g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr);
b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr);
WRITE_RES_YUV8P_TO_RGB(dst2)
}
}
@@ -1193,6 +1199,9 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest
static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __restrict in_frame,
int width, int height, int pitch, const int * __restrict rgb_shift, bool rgba)
{
enum {
S_DEPTH = 8,
};
assert((uintptr_t) dst_buffer % 4 == 0);
uint32_t alpha_mask = 0xFFFFFFFFU ^ (0xFFU << rgb_shift[R]) ^ (0xFFU << rgb_shift[G]) ^ (0xFFU << rgb_shift[B]);
@@ -1206,10 +1215,10 @@ static inline void yuv444p_to_rgb(char * __restrict dst_buffer, AVFrame * __rest
OPTIMIZED_FOR (int x = 0; x < width; ++x) {
int cb = *src_cb++ - 128;
int cr = *src_cr++ - 128;
int y = *src_y++ * Y_SCALE;
comp_type_t r = YCBCR_TO_R_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t g = YCBCR_TO_G_709_SCALED(y, cb, cr) >> COMP_BASE;
comp_type_t b = YCBCR_TO_B_709_SCALED(y, cb, cr) >> COMP_BASE;
int y = *src_y++ * Y_SCALE(S_DEPTH);
comp_type_t r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE;
comp_type_t g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE;
comp_type_t b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr) >> COMP_BASE;
if (rgba) {
*((uint32_t *)(void *) dst) = MK_RGBA(r, g, b, alpha_mask, 8);
dst += 4;
@@ -1539,6 +1548,9 @@ static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer
static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer, AVFrame * __restrict frame,
int width, int height, int pitch, const int * __restrict rgb_shift, int out_bit_depth)
{
enum {
S_DEPTH = 10,
};
assert((uintptr_t) dst_buffer % 4 == 0);
assert((uintptr_t) frame->linesize[0] % 2 == 0);
assert((uintptr_t) frame->linesize[1] % 2 == 0);
@@ -1571,9 +1583,9 @@ static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer
OPTIMIZED_FOR (int x = 0; x < width / 2; ++x) {
comp_type_t cr = *src_cr1++ - (1<<9);
comp_type_t cb = *src_cb1++ - (1<<9);
comp_type_t rr = YCBCR_TO_R_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp));
comp_type_t gg = YCBCR_TO_G_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp));
comp_type_t bb = YCBCR_TO_B_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp));
comp_type_t rr = YCBCR_TO_R_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp));
comp_type_t gg = YCBCR_TO_G_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp));
comp_type_t bb = YCBCR_TO_B_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp));
# define WRITE_RES_YUV10P_TO_RGB(Y, DST) {\
comp_type_t r = Y + rr;\
@@ -1596,24 +1608,24 @@ static inline void yuvp10le_to_rgb(int subsampling, char * __restrict dst_buffer
}\
}
comp_type_t y1 = (Y_SCALE * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
comp_type_t y1 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
WRITE_RES_YUV10P_TO_RGB(y1, dst1)
comp_type_t y11 = (Y_SCALE * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
comp_type_t y11 = (Y_SCALE(S_DEPTH) * (*src_y1++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
WRITE_RES_YUV10P_TO_RGB(y11, dst1)
if (subsampling == 422) {
cr = *src_cr2++ - (1<<9);
cb = *src_cb2++ - (1<<9);
rr = YCBCR_TO_R_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp));
gg = YCBCR_TO_G_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp));
bb = YCBCR_TO_B_709_SCALED(0, cb, cr) >> (COMP_BASE + (10 - bpp));
rr = YCBCR_TO_R_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp));
gg = YCBCR_TO_G_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp));
bb = YCBCR_TO_B_709_SCALED(S_DEPTH, 0, cb, cr) >> (COMP_BASE + (10 - bpp));
}
comp_type_t y2 = (Y_SCALE * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
comp_type_t y2 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
WRITE_RES_YUV10P_TO_RGB(y2, dst2)
comp_type_t y22 = (Y_SCALE * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
comp_type_t y22 = (Y_SCALE(S_DEPTH) * (*src_y2++ - (1<<6))) >> (COMP_BASE + (10 - bpp));
WRITE_RES_YUV10P_TO_RGB(y22, dst2)
}
}
@@ -1656,13 +1668,16 @@ static inline void yuv444p10le_to_rgb(char * __restrict dst_buffer, AVFrame * __
comp_type_t cb = *src_cb++ - (1 << (S_DEPTH - 1));
comp_type_t cr = *src_cr++ - (1 << (S_DEPTH - 1));
comp_type_t y =
(*src_y++ - (1 << (S_DEPTH - 4))) * Y_SCALE;
(*src_y++ - (1 << (S_DEPTH - 4))) * Y_SCALE(S_DEPTH);
comp_type_t r =
YCBCR_TO_R_709_SCALED(y, cb, cr) >> (COMP_BASE + 2);
YCBCR_TO_R_709_SCALED(S_DEPTH, y, cb, cr) >>
(COMP_BASE + 2);
comp_type_t g =
YCBCR_TO_G_709_SCALED(y, cb, cr) >> (COMP_BASE + 2);
YCBCR_TO_G_709_SCALED(S_DEPTH, y, cb, cr) >>
(COMP_BASE + 2);
comp_type_t b =
YCBCR_TO_B_709_SCALED(y, cb, cr) >> (COMP_BASE + 2);
YCBCR_TO_B_709_SCALED(S_DEPTH, y, cb, cr) >>
(COMP_BASE + 2);
if (rgba) {
*(uint32_t *)(void *) dst = MK_RGBA(r, g, b, alpha_mask, 8);
dst += 4;

View File

@@ -651,6 +651,10 @@ static inline void r10k_to_yuv42Xp10le(AVFrame * __restrict out_frame, const uns
assert((uintptr_t) out_frame->linesize[1] % 2 == 0);
assert((uintptr_t) out_frame->linesize[2] % 2 == 0);
enum {
D_DEPTH = 10,
};
const int src_linesize = vc_get_linesize(width, R10k);
for(int y = 0; y < height; y++) {
uint16_t *dst_y = (uint16_t *)(void *) (out_frame->data[0] + out_frame->linesize[0] * y);
@@ -663,9 +667,18 @@ static inline void r10k_to_yuv42Xp10le(AVFrame * __restrict out_frame, const uns
comp_type_t g = (src[1] & 0x3f ) << 4 | src[2] >> 4;
comp_type_t b = (src[2] & 0x0f) << 6 | src[3] >> 2;
comp_type_t res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-4));
comp_type_t res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1));
comp_type_t res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1));
comp_type_t res_y =
(RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >>
(COMP_BASE)) +
(1 << (D_DEPTH - 4));
comp_type_t res_cb =
(RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >>
(COMP_BASE)) +
(1 << (D_DEPTH - 1));
comp_type_t res_cr =
(RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >>
(COMP_BASE)) +
(1 << (D_DEPTH - 1));
dst_y[x * 2] = CLAMP_LIMITED_Y(res_y, 10);
src += 4;
@@ -674,9 +687,15 @@ static inline void r10k_to_yuv42Xp10le(AVFrame * __restrict out_frame, const uns
g = (src[1] & 0x3f ) << 4 | src[2] >> 4;
b = (src[2] & 0x0f) << 6 | src[3] >> 2;
res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-4));
res_cb += (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1));
res_cr += (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE)) + (1<<(10-1));
res_y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >>
(COMP_BASE)) +
(1 << (D_DEPTH - 4));
res_cb += (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >>
(COMP_BASE)) +
(1 << (D_DEPTH - 1));
res_cr += (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >>
(COMP_BASE)) +
(1 << (D_DEPTH - 1));
res_cb /= 2;
res_cr /= 2;
@@ -721,7 +740,7 @@ static void r10k_to_yuv422p10le(AVFrame * __restrict out_frame, const unsigned c
static inline void r10k_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height)
__attribute__((always_inline));
#endif
static inline void r10k_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height)
static inline void r10k_to_yuv444pXXle(int out_depth, AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height)
{
assert((uintptr_t) out_frame->linesize[0] % 2 == 0);
assert((uintptr_t) out_frame->linesize[1] % 2 == 0);
@@ -738,13 +757,22 @@ static inline void r10k_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame
comp_type_t g = (src[1] & 0x3F ) << 4 | src[2] >> 4;
comp_type_t b = (src[2] & 0x0F) << 6 | src[3] >> 2;
comp_type_t res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE+10-depth)) + (1<<(depth-4));
comp_type_t res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE+10-depth)) + (1<<(depth-1));
comp_type_t res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE+10-depth)) + (1<<(depth-1));
comp_type_t res_y =
(RGB_TO_Y_709_SCALED(out_depth, r, g, b) >>
(COMP_BASE + 10 - out_depth)) +
(1 << (out_depth - 4));
comp_type_t res_cb =
(RGB_TO_CB_709_SCALED(out_depth, r, g, b) >>
(COMP_BASE + 10 - out_depth)) +
(1 << (out_depth - 1));
comp_type_t res_cr =
(RGB_TO_CR_709_SCALED(out_depth, r, g, b) >>
(COMP_BASE + 10 - out_depth)) +
(1 << (out_depth - 1));
*dst_y++ = CLAMP(res_y, 1<<(depth-4), 235 * (1<<(depth-8)));
*dst_cb++ = CLAMP(res_cb, 1<<(depth-4), 240 * (1<<(depth-8)));
*dst_cr++ = CLAMP(res_cr, 1<<(depth-4), 240 * (1<<(depth-8)));
*dst_y++ = CLAMP_LIMITED_Y(res_y, out_depth);
*dst_cb++ = CLAMP_LIMITED_CBCR(res_cb, out_depth);
*dst_cr++ = CLAMP_LIMITED_CBCR(res_cr, out_depth);
src += 4;
}
}
@@ -777,12 +805,18 @@ static inline void r12l_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame
assert((uintptr_t) out_frame->linesize[2] % 2 == 0);
#define WRITE_RES \
res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE+12-depth)) + (1<<(depth-4));\
res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE+12-depth)) + (1<<(depth-1));\
res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE+12-depth)) + (1<<(depth-1));\
*dst_y++ = CLAMP(res_y, 1<<(depth-4), 235 * (1<<(depth-8)));\
*dst_cb++ = CLAMP(res_cb, 1<<(depth-4), 240 * (1<<(depth-8)));\
*dst_cr++ = CLAMP(res_cr, 1<<(depth-4), 240 * (1<<(depth-8)));
res_y = (RGB_TO_Y_709_SCALED(depth, r, g, b) >> \
(COMP_BASE + 12 - depth)) + \
(1 << (depth - 4)); \
res_cb = (RGB_TO_CB_709_SCALED(depth, r, g, b) >> \
(COMP_BASE + 12 - depth)) + \
(1 << (depth - 1)); \
res_cr = (RGB_TO_CR_709_SCALED(depth, r, g, b) >> \
(COMP_BASE + 12 - depth)) + \
(1 << (depth - 1)); \
*dst_y++ = CLAMP_LIMITED_Y(res_y, depth);\
*dst_cb++ = CLAMP_LIMITED_CBCR(res_cb, depth);\
*dst_cr++ = CLAMP_LIMITED_CBCR(res_cr, depth);
const int src_linesize = vc_get_linesize(width, R12L);
for (int y = 0; y < height; ++y) {
@@ -905,13 +939,22 @@ static inline void rg48_to_yuv444pXXle(int depth, AVFrame * __restrict out_frame
comp_type_t g = *src++;
comp_type_t b = *src++;
comp_type_t res_y = (RGB_TO_Y_709_SCALED(r, g, b) >> (COMP_BASE+16-depth)) + (1<<(depth-4));
comp_type_t res_cb = (RGB_TO_CB_709_SCALED(r, g, b) >> (COMP_BASE+16-depth)) + (1<<(depth-1));
comp_type_t res_cr = (RGB_TO_CR_709_SCALED(r, g, b) >> (COMP_BASE+16-depth)) + (1<<(depth-1));
comp_type_t res_y =
(RGB_TO_Y_709_SCALED(depth, r, g, b) >>
(COMP_BASE + 16 - depth)) +
(1 << (depth - 4));
comp_type_t res_cb =
(RGB_TO_CB_709_SCALED(depth, r, g, b) >>
(COMP_BASE + 16 - depth)) +
(1 << (depth - 1));
comp_type_t res_cr =
(RGB_TO_CR_709_SCALED(depth, r, g, b) >>
(COMP_BASE + 16 - depth)) +
(1 << (depth - 1));
*dst_y++ = CLAMP(res_y, 1<<(depth-4), 235 * (1<<(depth-8)));
*dst_cb++ = CLAMP(res_cb, 1<<(depth-4), 240 * (1<<(depth-8)));
*dst_cr++ = CLAMP(res_cr, 1<<(depth-4), 240 * (1<<(depth-8)));
*dst_y++ = CLAMP_LIMITED_Y(res_y, depth);
*dst_cb++ = CLAMP_LIMITED_CBCR(res_cb, depth);
*dst_cr++ = CLAMP_LIMITED_CBCR(res_cr, depth);
}
}
}
@@ -956,13 +999,13 @@ rgb_to_yuv444p(AVFrame *__restrict out_frame,
const comp_type_t b = *src++;
const comp_type_t res_y =
(RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) +
(RGB_TO_Y_709_SCALED(DEPTH, r, g, b) >> COMP_BASE) +
(1 << (DEPTH - 4));
const comp_type_t res_cb =
(RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) +
(RGB_TO_CB_709_SCALED(DEPTH, r, g, b) >> COMP_BASE) +
(1 << (DEPTH - 1));
const comp_type_t res_cr =
(RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) +
(RGB_TO_CR_709_SCALED(DEPTH, r, g, b) >> COMP_BASE) +
(1 << (DEPTH - 1));
*dst_y++ = CLAMP_LIMITED_Y(res_y, DEPTH);

View File

@@ -298,6 +298,9 @@ vc_copyliner10ktoRG48(unsigned char * __restrict dst, const unsigned char * __re
static void vc_copyliner10ktoY416(unsigned char * __restrict dst, const unsigned char * __restrict src, int dstlen, int rshift,
int gshift, int bshift) {
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
enum {
D_DEPTH = 16,
};
assert((uintptr_t) dst % 2 == 0);
uint16_t *d = (void *) dst;
OPTIMIZED_FOR (int x = 0; x < dstlen; x += 8) {
@@ -309,12 +312,18 @@ static void vc_copyliner10ktoY416(unsigned char * __restrict dst, const unsigned
r = byte1 << 8U | (byte2 & 0xC0U);
g = (byte2 & 0x3FU) << 10U | (byte3 & 0xF0U) << 2U;
b = (byte3 & 0xFU) << 12U | (byte4 & 0xFCU) << 4U;
comp_type_t u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15);
*d++ = CLAMP_LIMITED_CBCR(u, 16);
comp_type_t y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12);
*d++ = CLAMP_LIMITED_Y(y, 16);
comp_type_t v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15);
*d++ = CLAMP_LIMITED_CBCR(v, 16);
comp_type_t u =
(RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << (D_DEPTH - 1));
*d++ = CLAMP_LIMITED_CBCR(u, D_DEPTH);
comp_type_t y =
(RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << (D_DEPTH - 4));
*d++ = CLAMP_LIMITED_Y(y, D_DEPTH);
comp_type_t v =
(RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << (D_DEPTH - 4));
*d++ = CLAMP_LIMITED_CBCR(v, D_DEPTH);
*d++ = 0xFFFFU;
}
}
@@ -1428,17 +1437,25 @@ static void vc_copylineR12LtoRG48(unsigned char * __restrict dst, const unsigned
static void vc_copylineR12LtoY416(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift)
{
enum {
D_DEPTH = 16,
};
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
assert((uintptr_t) dst % sizeof(uint16_t) == 0);
uint16_t *d = (void *) dst;
#define WRITE_RES \
u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); \
*d++ = CLAMP_LIMITED_CBCR(u, 16); \
y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12); \
*d++ = CLAMP_LIMITED_Y(y, 16); \
v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15); \
*d++ = CLAMP_LIMITED_CBCR(v, 16); \
*d++ = 0xFFFFU;
u = (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + \
(1 << (D_DEPTH - 1)); \
*d++ = CLAMP_LIMITED_CBCR(u, D_DEPTH); \
y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + \
(1 << (D_DEPTH - 4)); \
*d++ = CLAMP_LIMITED_Y(y, D_DEPTH); \
v = (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) + \
(1 << (D_DEPTH - 1)); \
*d++ = CLAMP_LIMITED_CBCR(v, D_DEPTH); \
*d++ = 0xFFFFU;
OPTIMIZED_FOR (int x = 0; x < dst_len; x += 64) {
comp_type_t r, g, b;
comp_type_t y, u, v;
@@ -1498,21 +1515,21 @@ vc_copylineR12LtoUYVY(unsigned char *__restrict dst,
uint8_t *d = (void *) dst;
#define WRITE_RES \
{ \
comp_type_t u = ((RGB_TO_CB_709_SCALED(r1, g1, b1) + \
RGB_TO_CB_709_SCALED(r2, g2, b2)) >> \
comp_type_t u = ((RGB_TO_CB_709_SCALED(D_DPTH, r1, g1, b1) + \
RGB_TO_CB_709_SCALED(D_DPTH, r2, g2, b2)) >> \
(COMP_BASE + D_DPTH + 1)) + \
COFF; \
*d++ = CLAMP_LIMITED_CBCR(u, D_DPTH); \
comp_type_t y = (RGB_TO_Y_709_SCALED(r1, g1, b1) >> \
comp_type_t y = (RGB_TO_Y_709_SCALED(D_DPTH, r1, g1, b1) >> \
(COMP_BASE + D_DPTH)) + \
YOFF; \
*d++ = CLAMP_LIMITED_Y(y, D_DPTH); \
comp_type_t v = ((RGB_TO_CR_709_SCALED(r1, g1, b1) + \
RGB_TO_CR_709_SCALED(r2, g2, b2)) >> \
comp_type_t v = ((RGB_TO_CR_709_SCALED(D_DPTH, r1, g1, b1) + \
RGB_TO_CR_709_SCALED(D_DPTH, r2, g2, b2)) >> \
(COMP_BASE + D_DPTH + 1)) + \
COFF; \
*d++ = CLAMP_LIMITED_CBCR(v, D_DPTH); \
y = (RGB_TO_Y_709_SCALED(r2, g2, b2) >> \
y = (RGB_TO_Y_709_SCALED(D_DPTH, r2, g2, b2) >> \
(COMP_BASE + D_DPTH)) + \
YOFF; \
*d++ = CLAMP_LIMITED_Y(y, D_DPTH); \
@@ -1768,17 +1785,21 @@ static void vc_copylineRG48toR12L(unsigned char * __restrict dst, const unsigned
static void vc_copylineY416toR12L(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift)
{
enum {
S_DEPTH = 16,
D_DEPTH = 12,
};
#define GET_NEXT \
u = *in++ - (1<<15); \
y = Y_SCALE * (*in++ - (1<<12)); \
v = *in++ - (1<<15); \
u = *in++ - (1 << (S_DEPTH - 1)); \
y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4))); \
v = *in++ - (1 << (S_DEPTH - 1)); \
in++; \
r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 4U)); \
g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 4U)); \
b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 4U)); \
r = CLAMP_FULL(r, 12); \
g = CLAMP_FULL(g, 12); \
b = CLAMP_FULL(b, 12);
r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 4U); \
g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 4U); \
b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 4U); \
r = CLAMP_FULL(r, D_DEPTH); \
g = CLAMP_FULL(g, D_DEPTH); \
b = CLAMP_FULL(b, D_DEPTH);
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
assert((uintptr_t) src % 2 == 0);
@@ -1852,19 +1873,22 @@ static void vc_copylineY416toR12L(unsigned char * __restrict dst, const unsigned
static void vc_copylineY416toR10k(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift)
{
enum {
S_DEPTH = 16,
};
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
assert((uintptr_t) src % 2 == 0);
const uint16_t *in = (const void *) src;
OPTIMIZED_FOR (int x = 0; x < dst_len; x += 4) {
comp_type_t y, u, v, r, g, b;
u = *in++ - (1<<15);
y = Y_SCALE * (*in++ - (1<<12));
v = *in++ - (1<<15);
u = *in++ - (1 << (S_DEPTH - 1));
y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
v = *in++ - (1 << (S_DEPTH - 1));
in++;
r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 6U));
g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 6U));
b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 6U));
r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 6U);
g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 6U);
b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 6U);
r = CLAMP_FULL(r, 10);
g = CLAMP_FULL(g, 10);
b = CLAMP_FULL(b, 10);
@@ -1879,19 +1903,22 @@ static void vc_copylineY416toR10k(unsigned char * __restrict dst, const unsigned
static void vc_copylineY416toRGB(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift)
{
enum {
S_DEPTH = 16,
};
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
assert((uintptr_t) src % 2 == 0);
const uint16_t *in = (const void *) src;
OPTIMIZED_FOR (int x = 0; x < dst_len; x += 3) {
comp_type_t y, u, v, r, g, b;
u = *in++ - (1<<15);
y = Y_SCALE * (*in++ - (1<<12));
v = *in++ - (1<<15);
u = *in++ - (1 << (S_DEPTH - 1));
y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
v = *in++ - (1 << (S_DEPTH - 1));
in++;
r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 8U));
g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 8U));
b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 8U));
r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U);
g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U);
b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U);
r = CLAMP_FULL(r, 8);
g = CLAMP_FULL(g, 8);
b = CLAMP_FULL(b, 8);
@@ -1905,6 +1932,9 @@ static void vc_copylineY416toRGB(unsigned char * __restrict dst, const unsigned
static void vc_copylineY416toRGBA(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift)
{
enum {
S_DEPTH = 16,
};
assert((uintptr_t) src % 2 == 0);
assert((uintptr_t) dst % 4 == 0);
const uint16_t *in = (const void *) src;
@@ -1913,13 +1943,13 @@ static void vc_copylineY416toRGBA(unsigned char * __restrict dst, const unsigned
OPTIMIZED_FOR (int x = 0; x < dst_len; x += 4) {
comp_type_t y, u, v, r, g, b;
u = *in++ - (1<<15);
y = Y_SCALE * (*in++ - (1<<12));
v = *in++ - (1<<15);
u = *in++ - (1 << (S_DEPTH - 1));
y = Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
v = *in++ - (1 << (S_DEPTH - 1));
in++;
r = (YCBCR_TO_R_709_SCALED(y, u, v) >> (COMP_BASE + 8U));
g = (YCBCR_TO_G_709_SCALED(y, u, v) >> (COMP_BASE + 8U));
b = (YCBCR_TO_B_709_SCALED(y, u, v) >> (COMP_BASE + 8U));
r = YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U);
g = YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U);
b = YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> (COMP_BASE + 8U);
r = CLAMP_FULL(r, 8);
g = CLAMP_FULL(g, 8);
b = CLAMP_FULL(b, 8);
@@ -2252,20 +2282,23 @@ static void vc_copylineRG48toUYVY(unsigned char * __restrict dst, const unsigned
*/
static void vc_copylineRG48toV210(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift) {
enum {
D_DEPTH = 10,
};
#define COMP_OFF (COMP_BASE+(16-10))
#define FETCH_BLOCK \
r = *in++; \
g = *in++; \
b = *in++; \
y1 = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_OFF) + (1<<6); \
u = RGB_TO_CB_709_SCALED(r, g, b) >> COMP_OFF; \
v = RGB_TO_CR_709_SCALED(r, g, b) >> COMP_OFF; \
y1 = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF) + (1<<6); \
u = RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \
v = RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \
r = *in++; \
g = *in++; \
b = *in++; \
y2 = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_OFF) + (1<<6); \
u += RGB_TO_CB_709_SCALED(r, g, b) >> COMP_OFF; \
v += RGB_TO_CR_709_SCALED(r, g, b) >> COMP_OFF; \
y2 = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF) + (1<<6); \
u += RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \
v += RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_OFF; \
y1 = CLAMP_LIMITED_Y(y1, 10); \
y2 = CLAMP_LIMITED_Y(y2, 10); \
u = u / 2 + (1<<9); \
@@ -2303,6 +2336,9 @@ static void vc_copylineRG48toV210(unsigned char * __restrict dst, const unsigned
static void vc_copylineRG48toY216(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift) {
enum {
D_DEPTH = 16,
};
UNUSED(rshift);
UNUSED(gshift);
UNUSED(bshift);
@@ -2316,24 +2352,33 @@ static void vc_copylineRG48toY216(unsigned char * __restrict dst, const unsigned
r = *in++;
g = *in++;
b = *in++;
y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12);
y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << (D_DEPTH - 4));
*d++ = CLAMP_LIMITED_Y(y, 16);
u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE);
v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE);
u = (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE);
v = (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE);
r = *in++;
g = *in++;
b = *in++;
u = (u + (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) / 2) + (1<<15);
u = (u + (RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) /
2) +
(1 << 15);
*d++ = CLAMP_LIMITED_CBCR(u, 16);
y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12);
y = (RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << 12);
*d++ = CLAMP_LIMITED_Y(y, 16);
v = (v + (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) / 2) + (1<<15);
v = (v + (RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) /
2) +
(1 << 15);
*d++ = CLAMP_LIMITED_CBCR(v, 16);
}
}
static void vc_copylineRG48toY416(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift) {
enum {
D_DEPTH = 16,
};
UNUSED(rshift);
UNUSED(gshift);
UNUSED(bshift);
@@ -2346,18 +2391,27 @@ static void vc_copylineRG48toY416(unsigned char * __restrict dst, const unsigned
r = *in++;
g = *in++;
b = *in++;
comp_type_t u = (RGB_TO_CB_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15);
*d++ = CLAMP_LIMITED_CBCR(u, 16);
comp_type_t y = (RGB_TO_Y_709_SCALED(r, g, b) >> COMP_BASE) + (1<<12);
*d++ = CLAMP_LIMITED_Y(y, 16);
comp_type_t v = (RGB_TO_CR_709_SCALED(r, g, b) >> COMP_BASE) + (1<<15);
*d++ = CLAMP_LIMITED_CBCR(v, 16);
comp_type_t u =
(RGB_TO_CB_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << (D_DEPTH - 1));
*d++ = CLAMP_LIMITED_CBCR(u, D_DEPTH);
comp_type_t y =
(RGB_TO_Y_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << (D_DEPTH - 4));
*d++ = CLAMP_LIMITED_Y(y, D_DEPTH);
comp_type_t v =
(RGB_TO_CR_709_SCALED(D_DEPTH, r, g, b) >> COMP_BASE) +
(1 << (D_DEPTH - 1));
*d++ = CLAMP_LIMITED_CBCR(v, D_DEPTH);
*d++ = 0xFFFFU;
}
}
static void vc_copylineY416toRG48(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
int gshift, int bshift) {
enum {
S_DEPTH = 16,
};
UNUSED(rshift);
UNUSED(gshift);
UNUSED(bshift);
@@ -2366,13 +2420,17 @@ static void vc_copylineY416toRG48(unsigned char * __restrict dst, const unsigned
const uint16_t *in = (const void *) src;
uint16_t *d = (void *) dst;
OPTIMIZED_FOR (int x = 0; x < dst_len; x += 6) {
comp_type_t u = *in++ - (1<<15);
comp_type_t y = Y_SCALE * (*in++ - (1<<12));
comp_type_t v = *in++ - (1<<15);
comp_type_t u = *in++ - (1 << (S_DEPTH - 1));
comp_type_t y =
Y_SCALE(S_DEPTH) * (*in++ - (1 << (S_DEPTH - 4)));
comp_type_t v = *in++ - (1 << (S_DEPTH - 1));
in++;
comp_type_t r = (YCBCR_TO_R_709_SCALED(y, u, v) >> COMP_BASE);
comp_type_t g = (YCBCR_TO_G_709_SCALED(y, u, v) >> COMP_BASE);
comp_type_t b = (YCBCR_TO_B_709_SCALED(y, u, v) >> COMP_BASE);
comp_type_t r =
YCBCR_TO_R_709_SCALED(S_DEPTH, y, u, v) >> COMP_BASE;
comp_type_t g =
YCBCR_TO_G_709_SCALED(S_DEPTH, y, u, v) >> COMP_BASE;
comp_type_t b =
YCBCR_TO_B_709_SCALED(S_DEPTH, y, u, v) >> COMP_BASE;
*d++ = CLAMP_FULL(r, 16);
*d++ = CLAMP_FULL(g, 16);
*d++ = CLAMP_FULL(b, 16);
@@ -2699,12 +2757,12 @@ static void vc_copylineV210toRGB(unsigned char * __restrict dst, const unsigned
};
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
#define WRITE_YUV_AS_RGB(y, u, v) \
(y) = Y_SCALE * ((y) - Y_SHIFT); \
val = (YCBCR_TO_R_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \
(y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \
val = (YCBCR_TO_R_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE)); \
*(dst++) = CLAMP_FULL(val, ODEPTH); \
val = (YCBCR_TO_G_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \
val = (YCBCR_TO_G_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE)); \
*(dst++) = CLAMP_FULL(val, ODEPTH); \
val = (YCBCR_TO_B_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \
val = (YCBCR_TO_B_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE)); \
*(dst++) = CLAMP_FULL(val, ODEPTH);
// read 8 bits from v210 directly
@@ -2760,12 +2818,12 @@ vc_copylineV210toRG48(unsigned char *__restrict d,
};
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
#define WRITE_YUV_AS_RGB(y, u, v) \
(y) = Y_SCALE * ((y) - Y_SHIFT); \
val = (YCBCR_TO_R_709_SCALED((y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \
(y) = Y_SCALE(IDEPTH) * ((y) - Y_SHIFT); \
val = (YCBCR_TO_R_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \
*(dst++) = CLAMP_FULL(val, ODEPTH); \
val = (YCBCR_TO_G_709_SCALED((y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \
val = (YCBCR_TO_G_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \
*(dst++) = CLAMP_FULL(val, ODEPTH); \
val = (YCBCR_TO_B_709_SCALED((y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \
val = (YCBCR_TO_B_709_SCALED(IDEPTH, (y), (u), (v)) >> (COMP_BASE - DIFF_BPP)); \
*(dst++) = CLAMP_FULL(val, ODEPTH);
// read 8 bits from v210 directly

View File

@@ -1529,7 +1529,7 @@ static GLuint gl_substitute_compile_link(const char *vprogram, const char *fprog
double kr = cs_coeffs[2 * index];
double kb = cs_coeffs[2 * index + 1];
const char *placeholders[] = { "Y_SCALED_PLACEHOLDER", "R_CR_PLACEHOLDER", "G_CB_PLACEHOLDER", "G_CR_PLACEHOLDER", "B_CB_PLACEHOLDER" };
double values[] = { Y_LIMIT_INV, R_CR(kr,kb), G_CB(kr,kb), G_CR(kr,kb), B_CB(kr,kb)};
double values[] = { Y_LIMIT_INV(8), R_CR(8,kr,kb), G_CB(8,kr,kb), G_CR(8,kr,kb), B_CB(8,kr,kb)};
for (size_t i = 0; i < sizeof placeholders / sizeof placeholders[0]; ++i) {
char *tok = fp;

View File

@@ -112,15 +112,15 @@ static void load_yuv_coefficients(GlProgram& program){
glUseProgram(program.get());
GLuint loc = glGetUniformLocation(program.get(), "luma_scale");
glUniform1f(loc, Y_LIMIT_INV);
glUniform1f(loc, Y_LIMIT_INV(8));
loc = glGetUniformLocation(program.get(), "r_cr");
glUniform1f(loc, R_CR(kr, kb));
glUniform1f(loc, R_CR(8, kr, kb));
loc = glGetUniformLocation(program.get(), "g_cr");
glUniform1f(loc, G_CR(kr, kb));
glUniform1f(loc, G_CR(8, kr, kb));
loc = glGetUniformLocation(program.get(), "g_cb");
glUniform1f(loc, G_CB(kr, kb));
glUniform1f(loc, G_CB(8, kr, kb));
loc = glGetUniformLocation(program.get(), "b_cb");
glUniform1f(loc, B_CB(kr, kb));
glUniform1f(loc, B_CB(8, kr, kb));
}
class Rendering_convertor : public Frame_convertor{

View File

@@ -1,12 +1,10 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#include "config_unix.h"
#include "config_win32.h"
#endif
#include <cstring> // for strcmp
#include <cmath> // for abs
#include <list>
#include <sstream>
#include <string> // for allocator, basic_string, operator+, string
#include "color.h"
#include "types.h"
#include "utils/string.h"
#include "unit_common.h"
@@ -14,12 +12,63 @@
#include "video_frame.h"
extern "C" {
int misc_test_replace_all();
int misc_test_video_desc_io_op_symmetry();
int misc_test_color_coeff_range();
int misc_test_replace_all();
int misc_test_video_desc_io_op_symmetry();
}
using namespace std;
/**
* check that scaled coefficient for minimal values match approximately minimal
* value of nominal range (== there is not significant shift)
*/
int
misc_test_color_coeff_range()
{
const int depths[] = { 8, 10, 12, 16 };
for (unsigned i = 0; i < sizeof depths / sizeof depths[0]; ++i) {
const int d = depths[i];
const int d_max = (1 << d) - 1;
const int max_diff = 1 << (d - 8);
// Y
ASSERT_LE_MESSAGE(
"min Y diverges from nominal range min", max_diff,
abs((RGB_TO_Y_709_SCALED(d, 0, 0, 0) >> COMP_BASE) +
LIMIT_LO(d)) -
LIMIT_LO(d));
ASSERT_LE_MESSAGE(
"max Y diverges from nominal range max", max_diff,
abs((RGB_TO_Y_709_SCALED(d, d_max, d_max, d_max) >>
COMP_BASE) +
LIMIT_LO(d) - LIMIT_HI_Y(d)));
// Cb
ASSERT_LE_MESSAGE(
"min Cb diverges from nominal range min", max_diff,
abs((RGB_TO_CB_709_SCALED(d, d_max, d_max, 0) >>
COMP_BASE) +
(1 << (d - 1)) - LIMIT_LO(d)));
ASSERT_LE_MESSAGE(
"max Cb diverges from nominal range max", max_diff,
abs((RGB_TO_CB_709_SCALED(d, 0, 0, d_max) >> COMP_BASE) +
(1 << (d - 1)) - LIMIT_HI_CBCR(d)));
// Cr
ASSERT_LE_MESSAGE(
"min Cr diverges from nominal range min", max_diff,
abs((RGB_TO_CR_709_SCALED(d, 0, d_max, d_max) >>
COMP_BASE) +
(1 << (d - 1)) - LIMIT_LO(d)));
ASSERT_LE_MESSAGE(
"max Cr diverges from nominal range max", max_diff,
abs((RGB_TO_CR_709_SCALED(d, d_max, 0, 0) >> COMP_BASE) +
(1 << (d - 1)) - LIMIT_HI_CBCR(d)));
}
return 0;
}
#ifdef __clang__
#pragma clang diagnostic ignored "-Wstring-concatenation"
#endif

View File

@@ -86,6 +86,7 @@ DECLARE_TEST(get_framerate_test_3000);
DECLARE_TEST(get_framerate_test_free);
DECLARE_TEST(gpujpeg_test_simple);
DECLARE_TEST(libavcodec_test_get_decoder_from_uv_to_uv);
DECLARE_TEST(misc_test_color_coeff_range);
DECLARE_TEST(misc_test_replace_all);
DECLARE_TEST(misc_test_video_desc_io_op_symmetry);
@@ -119,6 +120,7 @@ struct {
DEFINE_TEST(get_framerate_test_free),
DEFINE_TEST(gpujpeg_test_simple),
DEFINE_TEST(libavcodec_test_get_decoder_from_uv_to_uv),
DEFINE_TEST(misc_test_color_coeff_range),
DEFINE_TEST(misc_test_replace_all),
DEFINE_TEST(misc_test_video_desc_io_op_symmetry),
};

View File

@@ -67,4 +67,22 @@
}
#endif
#define ASSERT_GE_MESSAGE(msg, expected, actual) \
if ((actual) < (expected)) { \
fprintf(stderr, \
"Assertion failed - expected >=%" PRIdMAX \
", got %" PRIdMAX ": %s\n", \
(intmax_t) (expected), (intmax_t) (actual), (msg)); \
return -1; \
}
#define ASSERT_LE_MESSAGE(msg, expected, actual) \
if ((actual) > (expected)) { \
fprintf(stderr, \
"Assertion failed - expected >=%" PRIdMAX \
", got %" PRIdMAX ": %s\n", \
(intmax_t) (expected), (intmax_t) (actual), (msg)); \
return -1; \
}
#endif // defined TEST_UNIT_COMMON_H_7A471D89_C7E4_470A_A330_74F4BD85BBAC