From fa8f3c64beea2006d9bf9e59ce5fcfe8756862ae Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Mon, 2 Jan 2023 15:08:27 +0100 Subject: [PATCH] fixed vc_copylineV210toY216 conversion --- src/types.h | 2 +- src/video_codec.c | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/types.h b/src/types.h index c76cc50ca..513298d59 100644 --- a/src/types.h +++ b/src/types.h @@ -95,7 +95,7 @@ typedef enum { RG48, ///< 16-bit RGB little-endian (RlRhGl...) AV1, ///< AOMedia Video 1 I420, ///< planar YCbCr 4:2:0 - Y216, ///< YCbCr 422 16-bit - Y0 Cb Y1 Cr + Y216, ///< YCbCr 422 16-bit little-endian - Y0 Cb Y1 Cr Y416, ///< interleaved little-endian YCbCr 4444 16-bit - UYVA PRORES, ///< abstract Apple ProRes, must not be used in transmit PRORES_4444, ///< Apple ProRes 4444 diff --git a/src/video_codec.c b/src/video_codec.c index 6e8e488d6..3882d32e3 100644 --- a/src/video_codec.c +++ b/src/video_codec.c @@ -2763,13 +2763,37 @@ static void vc_copylineV210toY216(unsigned char * __restrict dst, const unsigned UNUSED(bshift); assert((uintptr_t) dst % 2 == 0); assert((uintptr_t) src % 4 == 0); - OPTIMIZED_FOR (int x = 0; x < dst_len / 6; ++x) { - const uint32_t *s = (const void *) (src + x * 4); - uint16_t *d = (void *) (dst + x * 6); + OPTIMIZED_FOR (int x = 0; x < dst_len / 24; ++x) { + const uint32_t *s = (const void *) (src + x * 16); + uint16_t *d = (void *) (dst + x * 24); uint32_t tmp = *s++; - *d++ = (tmp & 0x3FFU) << 6U; - *d++ = ((tmp >> 10U) & 0x3FFU) << 6U; - *d++ = ((tmp >> 20U) & 0x3FFU) << 6U; + unsigned u = (tmp & 0x3FFU) << 6U; + unsigned y0 = ((tmp >> 10U) & 0x3FFU) << 6U; + unsigned v = ((tmp >> 20U) & 0x3FFU) << 6U; + tmp = *s++; + unsigned y1 = (tmp & 0x3FFU) << 6U; + *d++ = y0; + *d++ = u; + *d++ = y1; + *d++ = v; + u = ((tmp >> 10U) & 0x3FFU) << 6U; + y0 = ((tmp >> 20U) & 0x3FFU) << 6U; + tmp = *s++; + v = (tmp & 0x3FFU) << 6U; + y1 = ((tmp >> 10U) & 0x3FFU) << 6U; + *d++ = y0; + *d++ = u; + *d++ = y1; + *d++ = v; + u = ((tmp >> 20U) & 0x3FFU) << 6U; + tmp = *s++; + y0 = (tmp & 0x3FFU) << 6U; + v = ((tmp >> 10U) & 0x3FFU) << 6U; + y1 = ((tmp >> 20U) & 0x3FFU) << 6U; + *d++ = y0; + *d++ = u; + *d++ = y1; + *d++ = v; } }