From 4136c263190cdc9dee35fce21bb5205b017f7460 Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Thu, 19 Feb 2026 12:11:21 +0100 Subject: [PATCH] mv r12l_to_gbrp1{2,6}le from lavc to pixfmt_conv to be able to used also in different places + use compat/endian.h (POSIX) macros in BYTE_SWAP --- .github/scripts/macOS/prepare.sh | 6 ++ src/libavcodec/to_lavc_vid_conv.c | 109 ++++----------------------- src/pixfmt_conv.c | 119 +++++++++++++++++++++++++++++- src/pixfmt_conv.h | 6 +- 4 files changed, 141 insertions(+), 99 deletions(-) diff --git a/.github/scripts/macOS/prepare.sh b/.github/scripts/macOS/prepare.sh index ac202e640..3711f46cc 100755 --- a/.github/scripts/macOS/prepare.sh +++ b/.github/scripts/macOS/prepare.sh @@ -31,6 +31,12 @@ echo "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" >> "$GITHUB_ENV" echo "/usr/local/opt/qt/bin" >> "$GITHUB_PATH" echo "DYLIBBUNDLER_FLAGS=$DYLIBBUNDLER_FLAGS" >> "$GITHUB_ENV" +# TODO TOREMOVE allow gnu::always_inline - remove when no macOS <= 14 runner +macos_major=$(sw_vers -productVersion | cut -d. -f1) +if [ "$macos_major" -le 14 ]; then + echo "CFLAGS=-std=c2x" >> "$GITHUB_ENV" +fi + set -- \ asciidoctor \ autoconf \ diff --git a/src/libavcodec/to_lavc_vid_conv.c b/src/libavcodec/to_lavc_vid_conv.c index 3b911ea62..1b7e55db3 100644 --- a/src/libavcodec/to_lavc_vid_conv.c +++ b/src/libavcodec/to_lavc_vid_conv.c @@ -4,7 +4,7 @@ * @author Martin Piatka <445597@mail.muni.cz> */ /* - * Copyright (c) 2013-2025 CESNET + * Copyright (c) 2013-2026 CESNET, zájmové sdružení právnických osob * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1283,101 +1283,22 @@ static void r10k_to_gbrp16le(AVFrame * __restrict out_frame, const unsigned char r10k_to_gbrpXXle(out_frame, in_data, width, height, 16U); } -/// @note out_depth needs to be at least 12 -#if defined __GNUC__ -static inline void r12l_to_gbrpXXle(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height, unsigned int out_depth) - __attribute__((always_inline)); -#endif -static inline void r12l_to_gbrpXXle(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height, unsigned int out_depth) +static void +av_r12l_to_gbrp12le(AVFrame *__restrict out_frame, + const unsigned char *__restrict in_data, int width, + int height) { - assert(out_depth >= 12); - assert((uintptr_t) out_frame->linesize[0] % 2 == 0); - assert((uintptr_t) out_frame->linesize[1] % 2 == 0); - assert((uintptr_t) out_frame->linesize[2] % 2 == 0); - -#undef S -#define S(x) ((x) << (out_depth - 12U)) - - int src_linesize = vc_get_linesize(width, R12L); - for (int y = 0; y < height; ++y) { - const unsigned char *src = in_data + y * src_linesize; - uint16_t *dst_g = (uint16_t *)(void *) (out_frame->data[0] + out_frame->linesize[0] * y); - uint16_t *dst_b = (uint16_t *)(void *) (out_frame->data[1] + out_frame->linesize[1] * y); - uint16_t *dst_r = (uint16_t *)(void *) (out_frame->data[2] + out_frame->linesize[2] * y); - - OPTIMIZED_FOR (int x = 0; x < width; x += 8) { - uint16_t tmp = src[BYTE_SWAP(0)]; - tmp |= (src[BYTE_SWAP(1)] & 0xFU) << 8U; - *dst_r++ = S(tmp); // r0 - *dst_g++ = S(src[BYTE_SWAP(2)] << 4U | src[BYTE_SWAP(1)] >> 4U); // g0 - tmp = src[BYTE_SWAP(3)]; - src += 4; - tmp |= (src[BYTE_SWAP(0)] & 0xFU) << 8U; - *dst_b++ = S(tmp); // b0 - *dst_r++ = S(src[BYTE_SWAP(1)] << 4U | src[BYTE_SWAP(0)] >> 4U); // r1 - tmp = src[BYTE_SWAP(2)]; - tmp |= (src[BYTE_SWAP(3)] & 0xFU) << 8U; - *dst_g++ = S(tmp); // g1 - tmp = src[BYTE_SWAP(3)] >> 4U; - src += 4; - *dst_b++ = S(src[BYTE_SWAP(0)] << 4U | tmp); // b1 - tmp = src[BYTE_SWAP(1)]; - tmp |= (src[BYTE_SWAP(2)] & 0xFU) << 8U; - *dst_r++ = S(tmp); // r2 - *dst_g++ = S(src[BYTE_SWAP(3)] << 4U | src[BYTE_SWAP(2)] >> 4U); // g2 - src += 4; - tmp = src[BYTE_SWAP(0)]; - tmp |= (src[BYTE_SWAP(1)] & 0xFU) << 8U; - *dst_b++ = S(tmp); // b2 - *dst_r++ = S(src[BYTE_SWAP(2)] << 4U | src[BYTE_SWAP(1)] >> 4U); // r3 - tmp = src[BYTE_SWAP(3)]; - src += 4; - tmp |= (src[BYTE_SWAP(0)] & 0xFU) << 8U; - *dst_g++ = S(tmp); // g3 - *dst_b++ = S(src[BYTE_SWAP(1)] << 4U | src[BYTE_SWAP(0)] >> 4U); // b3 - tmp = src[BYTE_SWAP(2)]; - tmp |= (src[BYTE_SWAP(3)] & 0xFU) << 8U; - *dst_r++ = S(tmp); // r4 - tmp = src[BYTE_SWAP(3)] >> 4U; - src += 4; - *dst_g++ = S(src[BYTE_SWAP(0)] << 4U | tmp); // g4 - tmp = src[BYTE_SWAP(1)]; - tmp |= (src[BYTE_SWAP(2)] & 0xFU) << 8U; - *dst_b++ = S(tmp); // b4 - *dst_r++ = S(src[BYTE_SWAP(3)] << 4U | src[BYTE_SWAP(2)] >> 4U); // r5 - src += 4; - tmp = src[BYTE_SWAP(0)]; - tmp |= (src[BYTE_SWAP(1)] & 0xFU) << 8U; - *dst_g++ = S(tmp); // g5 - *dst_b++ = S(src[BYTE_SWAP(2)] << 4U | src[BYTE_SWAP(1)] >> 4U); // b5 - tmp = src[BYTE_SWAP(3)]; - src += 4; - tmp |= (src[BYTE_SWAP(0)] & 0xFU) << 8U; - *dst_r++ = S(tmp); // r6 - *dst_g++ = S(src[BYTE_SWAP(1)] << 4U | src[BYTE_SWAP(0)] >> 4U); // g6 - tmp = src[BYTE_SWAP(2)]; - tmp |= (src[BYTE_SWAP(3)] & 0xFU) << 8U; - *dst_b++ = S(tmp); // b6 - tmp = src[BYTE_SWAP(3)] >> 4U; - src += 4; - *dst_r++ = S(src[BYTE_SWAP(0)] << 4U | tmp); // r7 - tmp = src[BYTE_SWAP(1)]; - tmp |= (src[BYTE_SWAP(2)] & 0xFU) << 8U; - *dst_g++ = S(tmp); // g7 - *dst_b++ = S(src[BYTE_SWAP(3)] << 4U | src[BYTE_SWAP(2)] >> 4U); // b7 - src += 4; - } - } + r12l_to_gbrp12le(out_frame->data, out_frame->linesize, in_data, width, + height); } -static void r12l_to_gbrp16le(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) +static void +av_r12l_to_gbrp16le(AVFrame *__restrict out_frame, + const unsigned char *__restrict in_data, int width, + int height) { - r12l_to_gbrpXXle(out_frame, in_data, width, height, 16U); -} - -static void r12l_to_gbrp12le(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) -{ - r12l_to_gbrpXXle(out_frame, in_data, width, height, 12U); + r12l_to_gbrp16le(out_frame->data, out_frame->linesize, in_data, width, + height); } static void rg48_to_gbrp12le(AVFrame * __restrict out_frame, const unsigned char * __restrict in_data, int width, int height) @@ -1479,8 +1400,8 @@ static const struct uv_to_av_conversion *get_uv_to_av_conversions() { #endif { R10k, AV_PIX_FMT_YUV422P10LE, r10k_to_yuv422p10le }, { R10k, AV_PIX_FMT_YUV420P10LE, r10k_to_yuv420p10le }, - { R12L, AV_PIX_FMT_GBRP12LE, r12l_to_gbrp12le }, - { R12L, AV_PIX_FMT_GBRP16LE, r12l_to_gbrp16le }, + { R12L, AV_PIX_FMT_GBRP12LE, av_r12l_to_gbrp12le }, + { R12L, AV_PIX_FMT_GBRP16LE, av_r12l_to_gbrp16le }, { RG48, AV_PIX_FMT_GBRP12LE, rg48_to_gbrp12le }, { VIDEO_CODEC_NONE, AV_PIX_FMT_NONE, 0 } }; diff --git a/src/pixfmt_conv.c b/src/pixfmt_conv.c index 340bf6b03..5f82d765d 100644 --- a/src/pixfmt_conv.c +++ b/src/pixfmt_conv.c @@ -12,7 +12,7 @@ * * To measure performance of conversions, use `tools/convert benchmark`. */ -/* Copyright (c) 2005-2025 CESNET +/* Copyright (c) 2005-2026 CESNET, zájmové sdružení právnických osob * * Redistribution and use in source and binary forms, with or without * modification, is permitted provided that the following conditions @@ -49,6 +49,7 @@ * */ +#include "types.h" #define __STDC_WANT_LIB_EXT1__ 1 #include "pixfmt_conv.h" @@ -59,6 +60,7 @@ #include // for memcpy #include "color_space.h" +#include "compat/endian.h" // BYTE_ORDER, BIG_ENDIAN #include "compat/qsort_s.h" #include "debug.h" #include "utils/macros.h" // to_fourcc, OPTIMEZED_FOR, CLAMP @@ -71,13 +73,19 @@ #include "tmmintrin.h" #endif -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define MOD_NAME "[pixfmt_conv] " + +#if BYTE_ORDER == BIG_ENDIAN #define BYTE_SWAP(x) (3 - x) #else #define BYTE_SWAP(x) x #endif -#define MOD_NAME "[pixfmt_conv] " +#if defined __GNUC__ && (__GNUC__ >= 10 || __clang_major__ >= 9) +#define ALWAYS_INLINE [[gnu::always_inline]] +#else +#define ALWAYS_INLINE +#endif /** * @brief Converts v210 to UYVY @@ -3473,4 +3481,109 @@ uyvy_to_i420(unsigned char *__restrict *__restrict out_data, } } +/// @note out_depth needs to be at least 12 +ALWAYS_INLINE static inline void +r12l_to_gbrpXXle(unsigned char *__restrict *__restrict out_data, + const int *__restrict out_linesize, + const unsigned char *__restrict in_data, int width, int height, + unsigned int out_depth) +{ + assert(out_depth >= 12); + assert((uintptr_t) out_linesize[0] % 2 == 0); + assert((uintptr_t) out_linesize[1] % 2 == 0); + assert((uintptr_t) out_linesize[2] % 2 == 0); + +#define S(x) ((x) << (out_depth - 12U)) + + int src_linesize = vc_get_linesize(width, R12L); + for (int y = 0; y < height; ++y) { + const unsigned char *src = in_data + y * src_linesize; + uint16_t *dst_g = (uint16_t *)(void *) (out_data[0] + out_linesize[0] * y); + uint16_t *dst_b = (uint16_t *)(void *) (out_data[1] + out_linesize[1] * y); + uint16_t *dst_r = (uint16_t *)(void *) (out_data[2] + out_linesize[2] * y); + + OPTIMIZED_FOR (int x = 0; x < width; x += 8) { + uint16_t tmp = src[BYTE_SWAP(0)]; + tmp |= (src[BYTE_SWAP(1)] & 0xFU) << 8U; + *dst_r++ = S(tmp); // r0 + *dst_g++ = S(src[BYTE_SWAP(2)] << 4U | src[BYTE_SWAP(1)] >> 4U); // g0 + tmp = src[BYTE_SWAP(3)]; + src += 4; + tmp |= (src[BYTE_SWAP(0)] & 0xFU) << 8U; + *dst_b++ = S(tmp); // b0 + *dst_r++ = S(src[BYTE_SWAP(1)] << 4U | src[BYTE_SWAP(0)] >> 4U); // r1 + tmp = src[BYTE_SWAP(2)]; + tmp |= (src[BYTE_SWAP(3)] & 0xFU) << 8U; + *dst_g++ = S(tmp); // g1 + tmp = src[BYTE_SWAP(3)] >> 4U; + src += 4; + *dst_b++ = S(src[BYTE_SWAP(0)] << 4U | tmp); // b1 + tmp = src[BYTE_SWAP(1)]; + tmp |= (src[BYTE_SWAP(2)] & 0xFU) << 8U; + *dst_r++ = S(tmp); // r2 + *dst_g++ = S(src[BYTE_SWAP(3)] << 4U | src[BYTE_SWAP(2)] >> 4U); // g2 + src += 4; + tmp = src[BYTE_SWAP(0)]; + tmp |= (src[BYTE_SWAP(1)] & 0xFU) << 8U; + *dst_b++ = S(tmp); // b2 + *dst_r++ = S(src[BYTE_SWAP(2)] << 4U | src[BYTE_SWAP(1)] >> 4U); // r3 + tmp = src[BYTE_SWAP(3)]; + src += 4; + tmp |= (src[BYTE_SWAP(0)] & 0xFU) << 8U; + *dst_g++ = S(tmp); // g3 + *dst_b++ = S(src[BYTE_SWAP(1)] << 4U | src[BYTE_SWAP(0)] >> 4U); // b3 + tmp = src[BYTE_SWAP(2)]; + tmp |= (src[BYTE_SWAP(3)] & 0xFU) << 8U; + *dst_r++ = S(tmp); // r4 + tmp = src[BYTE_SWAP(3)] >> 4U; + src += 4; + *dst_g++ = S(src[BYTE_SWAP(0)] << 4U | tmp); // g4 + tmp = src[BYTE_SWAP(1)]; + tmp |= (src[BYTE_SWAP(2)] & 0xFU) << 8U; + *dst_b++ = S(tmp); // b4 + *dst_r++ = S(src[BYTE_SWAP(3)] << 4U | src[BYTE_SWAP(2)] >> 4U); // r5 + src += 4; + tmp = src[BYTE_SWAP(0)]; + tmp |= (src[BYTE_SWAP(1)] & 0xFU) << 8U; + *dst_g++ = S(tmp); // g5 + *dst_b++ = S(src[BYTE_SWAP(2)] << 4U | src[BYTE_SWAP(1)] >> 4U); // b5 + tmp = src[BYTE_SWAP(3)]; + src += 4; + tmp |= (src[BYTE_SWAP(0)] & 0xFU) << 8U; + *dst_r++ = S(tmp); // r6 + *dst_g++ = S(src[BYTE_SWAP(1)] << 4U | src[BYTE_SWAP(0)] >> 4U); // g6 + tmp = src[BYTE_SWAP(2)]; + tmp |= (src[BYTE_SWAP(3)] & 0xFU) << 8U; + *dst_b++ = S(tmp); // b6 + tmp = src[BYTE_SWAP(3)] >> 4U; + src += 4; + *dst_r++ = S(src[BYTE_SWAP(0)] << 4U | tmp); // r7 + tmp = src[BYTE_SWAP(1)]; + tmp |= (src[BYTE_SWAP(2)] & 0xFU) << 8U; + *dst_g++ = S(tmp); // g7 + *dst_b++ = S(src[BYTE_SWAP(3)] << 4U | src[BYTE_SWAP(2)] >> 4U); // b7 + src += 4; + } + } +#undef S +} + +void +r12l_to_gbrp12le(unsigned char *__restrict *__restrict out_data, + const int *__restrict out_linesize, + const unsigned char *__restrict in_data, int width, int height) +{ + r12l_to_gbrpXXle(out_data, out_linesize, in_data, width, height, + DEPTH12); +} + +void +r12l_to_gbrp16le(unsigned char *__restrict *__restrict out_data, + const int *__restrict out_linesize, + const unsigned char *__restrict in_data, int width, int height) +{ + r12l_to_gbrpXXle(out_data, out_linesize, in_data, width, height, + DEPTH16); +} + /* vim: set expandtab sw=8: */ diff --git a/src/pixfmt_conv.h b/src/pixfmt_conv.h index ba79a9062..0e0f7dc06 100644 --- a/src/pixfmt_conv.h +++ b/src/pixfmt_conv.h @@ -14,7 +14,7 @@ * @sa from_lavc_vid_conv.h to_lavc_vid_conv.h * @sa utils/parallel_conv.h */ -/* Copyright (c) 2005-2023 CESNET z.s.p.o. +/* Copyright (c) 2005-2026 CESNET, zájmové sdružení právnických osob * * Redistribution and use in source and binary forms, with or without * modification, is permitted provided that the following conditions @@ -106,7 +106,7 @@ decoder_func_t vc_copylineUYVYtoGrayscale; /// dummy conversion - ptr to it returned if no conversion needed decoder_func_t vc_memcpy; - +/// functions to decode whole buffer of packed data to planar or packed typedef void decode_buffer_func_t(unsigned char *__restrict *__restrict out_data, const int *__restrict out_linesize, @@ -118,6 +118,8 @@ decode_buffer_func_t uyvy_to_nv12; decode_buffer_func_t rgba_to_bgra; // other packed->planar convs are histaorically in video_codec.[ch] decode_buffer_func_t uyvy_to_i420; +decode_buffer_func_t r12l_to_gbrp16le; +decode_buffer_func_t r12l_to_gbrp12le; #ifdef __cplusplus }