From fa764db2d6960ed8589058c0926eaa661ce018b8 Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Fri, 14 Feb 2025 12:36:58 +0100 Subject: [PATCH] share some more convs between sdl3, to_lavc, testc 1. to SDL3 nv12 (not supported before) 2. to_lavc - use rgba_to_bgra - the vc_copyline version was not keeping alpha (if ever used - this may cause regressions, unfortunately, if alpha is assumed to be 100%, because the orig version set it always) 3. uyvy_to_i420 use by testcard and to_lavc uyvy_to_i420: do not write out of bounds If width % 2 = 1, 1 extra luma on first line of pair will override the first luma on the second. --- src/libavcodec/to_lavc_vid_conv.c | 156 +++------------------------ src/pixfmt_conv.c | 160 +++++++++++++++++++++++++++- src/pixfmt_conv.h | 18 ++-- src/video_capture/testcard_common.c | 51 +++------ src/video_display/sdl3.c | 130 +++++++++++----------- 5 files changed, 258 insertions(+), 257 deletions(-) diff --git a/src/libavcodec/to_lavc_vid_conv.c b/src/libavcodec/to_lavc_vid_conv.c index 9dbd78025..8b4773329 100644 --- a/src/libavcodec/to_lavc_vid_conv.c +++ b/src/libavcodec/to_lavc_vid_conv.c @@ -79,51 +79,8 @@ static void uyvy_to_yuv420p(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) { - int y; - for (y = 0; y < height - 1; y += 2) { - /* every even row */ - const unsigned char *src = in_data + y * (((width + 1) & ~1) * 2); - /* every odd row */ - const unsigned char *src2 = in_data + (y + 1) * (((width + 1) & ~1) * 2); - unsigned char *dst_y = out_frame->data[0] + out_frame->linesize[0] * y; - unsigned char *dst_y2 = out_frame->data[0] + out_frame->linesize[0] * (y + 1); - unsigned char *dst_cb = out_frame->data[1] + out_frame->linesize[1] * (y / 2); - unsigned char *dst_cr = out_frame->data[2] + out_frame->linesize[2] * (y / 2); - - int x; - OPTIMIZED_FOR (x = 0; x < width - 1; x += 2) { - *dst_cb++ = (*src++ + *src2++) / 2; - *dst_y++ = *src++; - *dst_y2++ = *src2++; - *dst_cr++ = (*src++ + *src2++) / 2; - *dst_y++ = *src++; - *dst_y2++ = *src2++; - } - if (x < width) { - *dst_cb++ = (*src++ + *src2++) / 2; - *dst_y++ = *src++; - *dst_y2++ = *src2++; - *dst_cr++ = (*src++ + *src2++) / 2; - } - } - if (y < height) { - const unsigned char *src = in_data + y * (((width + 1) & ~1) * 2); - unsigned char *dst_y = out_frame->data[0] + out_frame->linesize[0] * y; - unsigned char *dst_cb = out_frame->data[1] + out_frame->linesize[1] * (y / 2); - unsigned char *dst_cr = out_frame->data[2] + out_frame->linesize[2] * (y / 2); - int x; - OPTIMIZED_FOR (x = 0; x < width - 1; x += 2) { - *dst_cb++ = *src++; - *dst_y++ = *src++; - *dst_cr++ = *src++; - *dst_y++ = *src++; - } - if (x < width) { - *dst_cb++ = *src++; - *dst_y++ = *src++; - *dst_cr++ = *src++; - } - } + uyvy_to_i420(out_frame->data, out_frame->linesize, in_data, width, + height); } static void uyvy_to_yuv422p(AVFrame * __restrict out_frame, const unsigned char * __restrict src, int width, int height) @@ -181,86 +138,10 @@ static void uyvy_to_yuv444p(AVFrame * __restrict out_frame, const unsigned char } } -static void uyvy_to_nv12(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) +static void to_lavc_uyvy_to_nv12(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) { - for(int y = 0; y < height; y += 2) { - /* every even row */ - const unsigned char *src = in_data + y * (width * 2); - /* every odd row */ - const unsigned char *src2 = in_data + (y + 1) * (width * 2); - unsigned char *dst_y = out_frame->data[0] + out_frame->linesize[0] * y; - unsigned char *dst_y2 = out_frame->data[0] + out_frame->linesize[0] * (y + 1); - unsigned char *dst_cbcr = out_frame->data[1] + out_frame->linesize[1] * y / 2; - - int x = 0; -#ifdef __SSE3__ - __m128i yuv; - __m128i yuv2; - __m128i y1; - __m128i y2; - __m128i y3; - __m128i y4; - __m128i uv; - __m128i uv2; - __m128i uv3; - __m128i uv4; - __m128i ymask = _mm_set1_epi32(0xFF00FF00); - __m128i dsty; - __m128i dsty2; - __m128i dstuv; - - for (; x < (width - 15); x += 16){ - yuv = _mm_lddqu_si128((__m128i const*)(const void *) src); - yuv2 = _mm_lddqu_si128((__m128i const*)(const void *) src2); - src += 16; - src2 += 16; - - y1 = _mm_and_si128(ymask, yuv); - y1 = _mm_bsrli_si128(y1, 1); - y2 = _mm_and_si128(ymask, yuv2); - y2 = _mm_bsrli_si128(y2, 1); - - uv = _mm_andnot_si128(ymask, yuv); - uv2 = _mm_andnot_si128(ymask, yuv2); - - uv = _mm_avg_epu8(uv, uv2); - - yuv = _mm_lddqu_si128((__m128i const*)(const void *) src); - yuv2 = _mm_lddqu_si128((__m128i const*)(const void *) src2); - src += 16; - src2 += 16; - - y3 = _mm_and_si128(ymask, yuv); - y3 = _mm_bsrli_si128(y3, 1); - y4 = _mm_and_si128(ymask, yuv2); - y4 = _mm_bsrli_si128(y4, 1); - - uv3 = _mm_andnot_si128(ymask, yuv); - uv4 = _mm_andnot_si128(ymask, yuv2); - - uv3 = _mm_avg_epu8(uv3, uv4); - - dsty = _mm_packus_epi16(y1, y3); - dsty2 = _mm_packus_epi16(y2, y4); - dstuv = _mm_packus_epi16(uv, uv3); - _mm_storeu_si128((__m128i *)(void *) dst_y, dsty); - _mm_storeu_si128((__m128i *)(void *) dst_y2, dsty2); - _mm_storeu_si128((__m128i *)(void *) dst_cbcr, dstuv); - dst_y += 16; - dst_y2 += 16; - dst_cbcr += 16; - } -#endif - - OPTIMIZED_FOR (; x < width - 1; x += 2) { - *dst_cbcr++ = (*src++ + *src2++) / 2; - *dst_y++ = *src++; - *dst_y2++ = *src2++; - *dst_cbcr++ = (*src++ + *src2++) / 2; - *dst_y++ = *src++; - *dst_y2++ = *src2++; - } - } + uyvy_to_nv12(out_frame->data, out_frame->linesize, in_data, width, + height); } static void v210_to_yuv420p10le(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) @@ -547,9 +428,8 @@ to_lavc_v210_to_p010le(AVFrame *__restrict out_frame, const unsigned char *__restrict in_data, int width, int height) { - char *out_data[2] = { (char *) out_frame->data[0], (char *) out_frame->data[1]}; - v210_to_p010le(out_data, out_frame->linesize, (const char *) in_data, - width, height); + v210_to_p010le(out_frame->data, out_frame->linesize, in_data, width, + height); } static void @@ -557,9 +437,8 @@ to_lavc_y216_to_p010le(AVFrame *__restrict out_frame, const unsigned char *__restrict in_data, int width, int height) { - char *out_data[2] = { (char *) out_frame->data[0], (char *) out_frame->data[1]}; - y216_to_p010le(out_data, out_frame->linesize, (const char *) in_data, - width, height); + y216_to_p010le(out_frame->data, out_frame->linesize, in_data, width, + height); } #if P210_PRESENT @@ -1092,14 +971,13 @@ static void rgba_to_gbrp(AVFrame * __restrict out_frame, const unsigned char * _ rgb_rgba_to_gbrp(out_frame, in_data, width, height, 4); } -static void rgba_to_bgra(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) +static void +to_lavc_rgba_to_bgra(AVFrame *__restrict out_frame, + const unsigned char *__restrict in_data, int width, + int height) { - int linesize = vc_get_linesize(width, RGBA); - for (ptrdiff_t y = 0; y < height; ++y) { - const unsigned char *src = in_data + y * linesize; - unsigned char *dst = out_frame->data[0] + out_frame->linesize[0] * y; - vc_copylineRGBA(dst, src, linesize, 16, 8, 0); - } + rgba_to_bgra(out_frame->data, out_frame->linesize, in_data, width, + height); } #if defined __GNUC__ @@ -1311,7 +1189,7 @@ static const struct uv_to_av_conversion *get_uv_to_av_conversions() { #endif { UYVY, AV_PIX_FMT_YUV420P, uyvy_to_yuv420p }, { UYVY, AV_PIX_FMT_YUVJ420P, uyvy_to_yuv420p }, - { UYVY, AV_PIX_FMT_NV12, uyvy_to_nv12 }, + { UYVY, AV_PIX_FMT_NV12, to_lavc_uyvy_to_nv12 }, { UYVY, AV_PIX_FMT_YUV444P, uyvy_to_yuv444p }, { UYVY, AV_PIX_FMT_YUVJ444P, uyvy_to_yuv444p }, { Y216, AV_PIX_FMT_YUV422P10LE, y216_to_yuv422p10le }, @@ -1321,7 +1199,7 @@ static const struct uv_to_av_conversion *get_uv_to_av_conversions() { { RGB, AV_PIX_FMT_GBRP, rgb_to_gbrp }, { RGB, AV_PIX_FMT_YUV444P, rgb_to_yuv444p }, { RGBA, AV_PIX_FMT_GBRP, rgba_to_gbrp }, - { RGBA, AV_PIX_FMT_BGRA, rgba_to_bgra }, + { RGBA, AV_PIX_FMT_BGRA, to_lavc_rgba_to_bgra }, { R10k, AV_PIX_FMT_BGR0, r10k_to_bgr0 }, { R10k, AV_PIX_FMT_GBRP10LE, r10k_to_gbrp10le }, { R10k, AV_PIX_FMT_GBRP16LE, r10k_to_gbrp16le }, diff --git a/src/pixfmt_conv.c b/src/pixfmt_conv.c index ff2e98d22..8c296b8a5 100644 --- a/src/pixfmt_conv.c +++ b/src/pixfmt_conv.c @@ -64,6 +64,9 @@ #include "utils/macros.h" // to_fourcc, OPTIMEZED_FOR, CLAMP #include "video_codec.h" +#ifdef __SSE3__ +#include "pmmintrin.h" +#endif #ifdef __SSSE3__ #include "tmmintrin.h" #endif @@ -3074,9 +3077,9 @@ decoder_t get_best_decoder_from(codec_t in, const codec_t *out_candidates, codec * neither input nor output need to be padded */ void -v210_to_p010le(char *__restrict *__restrict out_data, +v210_to_p010le(unsigned char *__restrict *__restrict out_data, const int *__restrict out_linesize, - const char *__restrict in_data, int width, int height) + const unsigned char *__restrict in_data, int width, int height) { assert((uintptr_t) in_data % 4 == 0); assert(out_linesize[0] % 2 == 0); @@ -3168,9 +3171,9 @@ v210_to_p010le(char *__restrict *__restrict out_data, } void -y216_to_p010le(char *__restrict *__restrict out_data, +y216_to_p010le(unsigned char *__restrict *__restrict out_data, const int *__restrict out_linesize, - const char *__restrict in_data, int width, int height) + const unsigned char *__restrict in_data, int width, int height) { const size_t src_linesize = vc_get_linesize(width, Y216); for (int i = 0; i < height / 2; ++i) { @@ -3195,4 +3198,153 @@ y216_to_p010le(char *__restrict *__restrict out_data, } } +void +uyvy_to_nv12(unsigned char *__restrict *__restrict out_data, + const int *__restrict out_linesize, + const unsigned char *__restrict in_data, int width, int height) +{ + for (size_t y = 0; y < (size_t) height; y += 2) { + /* every even row */ + const unsigned char *src = in_data + (y * ((size_t) width * 2)); + /* every odd row */ + const unsigned char *src2 = src + ((size_t) width * 2); + unsigned char *dst_y = out_data[0] + (out_linesize[0] * y); + unsigned char *dst_y2 = dst_y + out_linesize[0]; + unsigned char *dst_cbcr = + out_data[1] + (out_linesize[1] * (y / 2)); + + int x = 0; +#ifdef __SSE3__ + __m128i yuv; + __m128i yuv2; + __m128i y1; + __m128i y2; + __m128i y3; + __m128i y4; + __m128i uv; + __m128i uv2; + __m128i uv3; + __m128i uv4; + __m128i ymask = _mm_set1_epi32(0xFF00FF00); + __m128i dsty; + __m128i dsty2; + __m128i dstuv; + + for (; x < (width - 15); x += 16){ + yuv = _mm_lddqu_si128((__m128i const*)(const void *) src); + yuv2 = _mm_lddqu_si128((__m128i const*)(const void *) src2); + src += 16; + src2 += 16; + + y1 = _mm_and_si128(ymask, yuv); + y1 = _mm_bsrli_si128(y1, 1); + y2 = _mm_and_si128(ymask, yuv2); + y2 = _mm_bsrli_si128(y2, 1); + + uv = _mm_andnot_si128(ymask, yuv); + uv2 = _mm_andnot_si128(ymask, yuv2); + + uv = _mm_avg_epu8(uv, uv2); + + yuv = _mm_lddqu_si128((__m128i const*)(const void *) src); + yuv2 = _mm_lddqu_si128((__m128i const*)(const void *) src2); + src += 16; + src2 += 16; + + y3 = _mm_and_si128(ymask, yuv); + y3 = _mm_bsrli_si128(y3, 1); + y4 = _mm_and_si128(ymask, yuv2); + y4 = _mm_bsrli_si128(y4, 1); + + uv3 = _mm_andnot_si128(ymask, yuv); + uv4 = _mm_andnot_si128(ymask, yuv2); + + uv3 = _mm_avg_epu8(uv3, uv4); + + dsty = _mm_packus_epi16(y1, y3); + dsty2 = _mm_packus_epi16(y2, y4); + dstuv = _mm_packus_epi16(uv, uv3); + _mm_storeu_si128((__m128i *)(void *) dst_y, dsty); + _mm_storeu_si128((__m128i *)(void *) dst_y2, dsty2); + _mm_storeu_si128((__m128i *)(void *) dst_cbcr, dstuv); + dst_y += 16; + dst_y2 += 16; + dst_cbcr += 16; + } +#endif + + OPTIMIZED_FOR (; x < width - 1; x += 2) { + *dst_cbcr++ = (*src++ + *src2++) / 2; + *dst_y++ = *src++; + *dst_y2++ = *src2++; + *dst_cbcr++ = (*src++ + *src2++) / 2; + *dst_y++ = *src++; + *dst_y2++ = *src2++; + } + } +} + +void +rgba_to_bgra(unsigned char *__restrict *__restrict out_data, + const int *__restrict out_linesize, + const unsigned char *__restrict in_data, int width, int height) +{ + const size_t src_linesize = vc_get_linesize(width, RGBA); + for (size_t i = 0; i < (size_t) height; ++i) { + const uint8_t *in = in_data + (i * src_linesize); + uint8_t *out = out_data[0] + (i * out_linesize[0]); + for (int i = 0; i < width; ++i) { + *out++ = in[2]; // B + *out++ = in[1]; // G + *out++ = in[0]; // R + *out++ = in[3]; // A + in += 4; + } + } +} + +/** + * converts UYVY to planar YUV 4:2:0 + * + * @sa uyvy_to_i422 + */ +void +uyvy_to_i420(unsigned char *__restrict *__restrict out_data, + const int *__restrict out_linesize, const unsigned char *__restrict in_data, + int width, int height) +{ + size_t src_linesize = vc_get_linesize(width, UYVY); + for (size_t i = 0; i < (size_t) (height + 1) / 2; ++i) { + const unsigned char *in1 = in_data + (2 * i * src_linesize); + const unsigned char *in2 = in1 + src_linesize; + unsigned char *y1 = + out_data[0] + ((2ULL * i) * out_linesize[0]); + unsigned char *y2 = y1 + out_linesize[0]; + unsigned char *u = out_data[1] + (i * out_linesize[1]); + unsigned char *v = out_data[2] + (i * out_linesize[2]); + + // handle height % 2 == 1 + if (i + 1 == (size_t) height) { + y2 = y1; + in2 = in1; + } + + int j = 0; + for (; j < width / 2; ++j) { + *u++ = (*in1++ + *in2++ + 1) / 2; + *y1++ = *in1++; + *y2++ = *in2++; + *v++ = (*in1++ + *in2++ + 1) / 2; + *y1++ = *in1++; + *y2++ = *in2++; + } + if (width % 2 == 1) { // do not overwrite EOL + *u++ = (*in1++ + *in2++ + 1) / 2; + *y1++ = *in1++; + *y2++ = *in2++; + *v++ = (*in1++ + *in2++ + 1) / 2; + } + } +} + /* vim: set expandtab sw=8: */ diff --git a/src/pixfmt_conv.h b/src/pixfmt_conv.h index 2ab1d7a2b..50d24a113 100644 --- a/src/pixfmt_conv.h +++ b/src/pixfmt_conv.h @@ -106,12 +106,18 @@ decoder_func_t vc_copylineUYVYtoGrayscale; /// dummy conversion - ptr to it returned if no conversion needed decoder_func_t vc_memcpy; -void v210_to_p010le(char *__restrict *__restrict out_data, - const int *__restrict out_linesize, - const char *__restrict in_data, int width, int height); -void y216_to_p010le(char *__restrict *__restrict out_data, - const int *__restrict out_linesize, - const char *__restrict in_data, int width, int height); + +typedef void +decode_buffer_func_t(unsigned char *__restrict *__restrict out_data, + const int *__restrict out_linesize, + const unsigned char *__restrict in_data, int width, + int height); +decode_buffer_func_t v210_to_p010le; +decode_buffer_func_t y216_to_p010le; +decode_buffer_func_t uyvy_to_nv12; +decode_buffer_func_t rgba_to_bgra; +// other packed->planar convs are histaorically in video_codec.[ch] +decode_buffer_func_t uyvy_to_i420; #ifdef __cplusplus } diff --git a/src/video_capture/testcard_common.c b/src/video_capture/testcard_common.c index aa97766dd..a5812501a 100644 --- a/src/video_capture/testcard_common.c +++ b/src/video_capture/testcard_common.c @@ -71,46 +71,21 @@ void testcard_fillRect(struct testcard_pixmap *s, struct testcard_rect *r, uint3 /** * @param[in] in buffer in UYVY - * @retval buffer in I420 (must be deallocated by the caller) - * @note - * Caller must deallocate returned buffer + * @retval buffer in I420 */ -static void toI420(unsigned char *out, const unsigned char *input, int width, int height) +static void +toI420(unsigned char *out, const unsigned char *input, int width, int height) { - const unsigned char *in = (const unsigned char *) input; - int w_ch = (width + 1) / 2; - int h_ch = (height + 1) / 2; - unsigned char *y = out; - unsigned char *u0 = out + width * height; - unsigned char *v0 = out + width * height + w_ch * h_ch; - unsigned char *u1 = u0, *v1 = v0; - - for (int i = 0; i < height; i += 1) { - for (int j = 0; j < ((width + 1) & ~1); j += 2) { - // U - if (i % 2 == 0) { - *u0++ = *in++; - } else { // average with every 2nd row - *u1 = (*u1 + *in++) / 2; - u1++; - } - // Y - *y++ = *in++; - // V - if (i % 2 == 0) { - *v0++ = *in++; - } else { // average with every 2nd row - *v1 = (*v1 + *in++) / 2; - v1++; - } - // Y - if (j + 1 == width) { - in++; - } else { - *y++ = *in++; - } - } - } + const size_t y_h = height; + const size_t chr_h = (y_h + 1) / 2; + int out_linesize[3] = { width, + (width + 1) / 2, + (width + 1) / 2 }; + unsigned char *out_data[3] = { out, + out + (y_h * out_linesize[0]), + out + (y_h * out_linesize[0]) + + (chr_h * out_linesize[1]) }; + uyvy_to_i420(out_data, out_linesize, input, width, height); } void testcard_convert_buffer(codec_t in_c, codec_t out_c, unsigned char *out, unsigned const char *in, int width, int height) diff --git a/src/video_display/sdl3.c b/src/video_display/sdl3.c index 8ce6d7bcc..1cccd2801 100644 --- a/src/video_display/sdl3.c +++ b/src/video_display/sdl3.c @@ -95,22 +95,24 @@ struct video_frame_sdl3_data { }; static void convert_UYVY_IYUV(const struct video_frame *uv_frame, - char *tex_data, size_t y_pitch); + unsigned char *tex_data, size_t y_pitch); +static void convert_UYVY_NV12(const struct video_frame *uv_frame, + unsigned char *tex_data, size_t y_pitch); static void convert_R10k_ARGB2101010(const struct video_frame *uv_frame, - char *tex_data, size_t y_pitch); + unsigned char *tex_data, size_t y_pitch); static void convert_R10k_ABGR2101010(const struct video_frame *uv_frame, - char *tex_data, size_t y_pitch); + unsigned char *tex_data, size_t y_pitch); static void convert_RGBA_BGRA(const struct video_frame *uv_frame, - char *tex_data, size_t y_pitch); -static void convert_Y216_P010(const struct video_frame *uv_frame, char *tex_data, - size_t y_pitch); -static void convert_v210_P010(const struct video_frame *uv_frame, char *tex_data, - size_t y_pitch); + unsigned char *tex_data, size_t y_pitch); +static void convert_Y216_P010(const struct video_frame *uv_frame, + unsigned char *tex_data, size_t y_pitch); +static void convert_v210_P010(const struct video_frame *uv_frame, + unsigned char *tex_data, size_t y_pitch); struct fmt_data { codec_t ug_codec; enum SDL_PixelFormat sdl_tex_fmt; - void (*convert)(const struct video_frame *uv_frame, char *tex_data, - size_t tex_pitch); + void (*convert)(const struct video_frame *uv_frame, + unsigned char *tex_data, size_t tex_pitch); }; // order matters relative to fixed ug codec - first usable SDL fmt is used static const struct fmt_data pf_mapping_template[] = { @@ -121,6 +123,7 @@ static const struct fmt_data pf_mapping_template[] = { { RGBA, SDL_PIXELFORMAT_BGRX32, convert_RGBA_BGRA }, // gles2,ogl,gpu,sw,vk,d3d12 { UYVY, SDL_PIXELFORMAT_UYVY, NULL }, // mac ogl { UYVY, SDL_PIXELFORMAT_IYUV, convert_UYVY_IYUV }, // fallback + { UYVY, SDL_PIXELFORMAT_NV12, convert_UYVY_NV12 }, // ditto { YUYV, SDL_PIXELFORMAT_YUY2, NULL }, { RGB, SDL_PIXELFORMAT_RGB24, NULL }, { BGR, SDL_PIXELFORMAT_BGR24, NULL }, @@ -230,7 +233,7 @@ display_frame(struct state_sdl3 *s, struct video_frame *frame) int pitch = 0; if (s->cs_data->convert != NULL) { - char *tex_data = NULL; + unsigned char *tex_data = NULL; SDL_CHECK(SDL_LockTexture(frame_data->texture, NULL, (void **) &tex_data, &pitch)); s->cs_data->convert(frame, tex_data, pitch); @@ -1093,8 +1096,8 @@ display_sdl3_getf(void *state) } static void -convert_R10k_ARGB2101010(const struct video_frame *uv_frame, char *tex_data, - size_t pitch) +convert_R10k_ARGB2101010(const struct video_frame *uv_frame, + unsigned char *tex_data, size_t pitch) { assert(pitch == (size_t) uv_frame->tiles[0].width * 4); assert((uintptr_t) uv_frame->tiles[0].data % 4 == 0); @@ -1121,8 +1124,8 @@ convert_R10k_ARGB2101010(const struct video_frame *uv_frame, char *tex_data, } static void -convert_R10k_ABGR2101010(const struct video_frame *uv_frame, char *tex_data, - size_t pitch) +convert_R10k_ABGR2101010(const struct video_frame *uv_frame, + unsigned char *tex_data, size_t pitch) { const size_t src_linesize = vc_get_linesize(uv_frame->tiles[0].width, R10k); for (unsigned i = 0; i < uv_frame->tiles[0].height; ++i) { @@ -1142,58 +1145,45 @@ convert_R10k_ABGR2101010(const struct video_frame *uv_frame, char *tex_data, } static void -convert_RGBA_BGRA(const struct video_frame *uv_frame, char *tex_data, +convert_RGBA_BGRA(const struct video_frame *uv_frame, unsigned char *tex_data, size_t pitch) { - const size_t src_linesize = vc_get_linesize(uv_frame->tiles[0].width, RGBA); - for (unsigned i = 0; i < uv_frame->tiles[0].height; ++i) { - const uint8_t *in = - (uint8_t *) uv_frame->tiles[0].data + (i * src_linesize); - uint8_t *out = (uint8_t *) tex_data + (i * pitch); - for (unsigned i = 0; i < uv_frame->tiles[0].width ; ++i) { - *out++ = in[2]; // B - *out++ = in[1]; // G - *out++ = in[0]; // R - *out++ = in[3]; // A - in += 4; - } - } + unsigned char *out_data[2] = { tex_data, 0 }; + int out_linesize[2] = { (int) pitch, 0 }; + rgba_to_bgra( + out_data, out_linesize, (unsigned char *) uv_frame->tiles[0].data, + (int) uv_frame->tiles[0].width, (int) uv_frame->tiles[0].height); } static void -convert_UYVY_IYUV(const struct video_frame *uv_frame, char *tex_data, +convert_UYVY_IYUV(const struct video_frame *uv_frame, unsigned char *tex_data, size_t y_pitch) { - size_t cr_pitch = (y_pitch + 1) / 2; - char *ubase = tex_data + (y_pitch * uv_frame->tiles[0].height); - char *vbase = - ubase + (cr_pitch * ((uv_frame->tiles[0].height + 1) / 2)); - const char *in = uv_frame->tiles[0].data; - for (unsigned i = 0; i < (uv_frame->tiles[0].height + 1) / 2; ++i) { - char *y1 = tex_data + ((2ULL * i) * y_pitch); - char *y2 = y1 + y_pitch; - char *u = ubase + (i * cr_pitch); - char *v = vbase + (i * cr_pitch); - for (unsigned j = 0; j < (uv_frame->tiles[0].width + 1) / 2; - ++j) { - *u++ = *in++; - *y1++ = *in++; - *v++ = *in++; - *y1++ = *in++; - } - // last line when height % 2 == 1 - if (i * 2 + 1 == uv_frame->tiles[0].height) { - break; - } - // take just lumas from second - for (unsigned j = 0; j < (uv_frame->tiles[0].width + 1) / 2; - ++j) { - in++; // drop U - *y2++ = *in++; - in++; // drop V - *y2++ = *in++; - } - } + const size_t y_h = uv_frame->tiles[0].height; + const size_t chr_h = (y_h + 1) / 2; + int out_linesize[3] = { (int) y_pitch, + (int) (y_pitch + 1) / 2, + (int) (y_pitch + 1) / 2 }; + unsigned char *out_data[3] = { tex_data, + tex_data + (y_h * out_linesize[0]), + tex_data + (y_h * out_linesize[0]) + + (chr_h * out_linesize[1]) }; + uyvy_to_i420( + out_data, out_linesize, (unsigned char *) uv_frame->tiles[0].data, + (int) uv_frame->tiles[0].width, (int) uv_frame->tiles[0].height); +} + +static void +convert_UYVY_NV12(const struct video_frame *uv_frame, unsigned char *tex_data, + size_t y_pitch) +{ + unsigned char *out_data[2] = { + tex_data, tex_data + (y_pitch * uv_frame->tiles[0].height) + }; + int out_linesize[2] = { (int) y_pitch, (int) ((y_pitch + 1) / 2) * 2 }; + uyvy_to_nv12( + out_data, out_linesize, (unsigned char *) uv_frame->tiles[0].data, + (int) uv_frame->tiles[0].width, (int) uv_frame->tiles[0].height); } /** @@ -1201,30 +1191,30 @@ convert_UYVY_IYUV(const struct video_frame *uv_frame, char *tex_data, * currently seem to work only on Metal */ static void -convert_Y216_P010(const struct video_frame *uv_frame, char *tex_data, +convert_Y216_P010(const struct video_frame *uv_frame, unsigned char *tex_data, size_t y_pitch) { - char *out_data[2] = { + unsigned char *out_data[2] = { tex_data, tex_data + (y_pitch * uv_frame->tiles[0].height) }; int out_linesize[2] = { (int) y_pitch, (int) ((y_pitch + 1) / 2) * 2 }; - y216_to_p010le(out_data, out_linesize, uv_frame->tiles[0].data, - (int) uv_frame->tiles[0].width, - (int) uv_frame->tiles[0].height); + y216_to_p010le( + out_data, out_linesize, (unsigned char *) uv_frame->tiles[0].data, + (int) uv_frame->tiles[0].width, (int) uv_frame->tiles[0].height); } /// @copydoc convert_Y216_P010 static void -convert_v210_P010(const struct video_frame *uv_frame, char *tex_data, +convert_v210_P010(const struct video_frame *uv_frame, unsigned char *tex_data, size_t y_pitch) { - char *out_data[2] = { + unsigned char *out_data[2] = { tex_data, tex_data + (y_pitch * uv_frame->tiles[0].height) }; int out_linesize[2] = { (int) y_pitch, (int) ((y_pitch + 1) / 2) * 2 }; - v210_to_p010le(out_data, out_linesize, uv_frame->tiles[0].data, - (int) uv_frame->tiles[0].width, - (int) uv_frame->tiles[0].height); + v210_to_p010le( + out_data, out_linesize, (unsigned char *) uv_frame->tiles[0].data, + (int) uv_frame->tiles[0].width, (int) uv_frame->tiles[0].height); } static bool