diff --git a/src/types.h b/src/types.h
index 187af8819..f285a3cf7 100644
--- a/src/types.h
+++ b/src/types.h
@@ -95,6 +95,7 @@ typedef enum {
         AV1,      ///< AOMedia Video 1
         I420,     ///< planar YCbCr 4:2:0
         Y216,     ///< YCbCr 422 16-bit - Y0 Cb Y1 Cr
+        Y416,     ///< interleaved little-endian YCbCr 4444 16-bit - UYVA
         PRORES,           ///< abstract Apple ProRes, must not be used in transmit
         PRORES_4444,      ///< Apple ProRes 4444
         PRORES_4444_XQ,   ///< Apple ProRes 4444 (XQ)
diff --git a/src/video_capture/testcard.cpp b/src/video_capture/testcard.cpp
index cc28650e4..595e0efef 100644
--- a/src/video_capture/testcard.cpp
+++ b/src/video_capture/testcard.cpp
@@ -302,7 +302,7 @@ static int configure_tiling(struct testcard_state *s, const char *fmt)
 
 static const codec_t codecs_8b[] = {I420, RGBA, RGB, UYVY, YUYV, VIDEO_CODEC_NONE};
 static const codec_t codecs_10b[] = {R10k, v210, VIDEO_CODEC_NONE};
-static const codec_t codecs_ge12b[] = {Y216, RG48, R12L, VIDEO_CODEC_NONE};
+static const codec_t codecs_ge12b[] = {Y216, Y416, RG48, R12L, VIDEO_CODEC_NONE};
 
 static bool parse_fps(const char *fps, struct video_desc *desc) {
         char *endptr = nullptr;
diff --git a/src/video_capture/testcard_common.c b/src/video_capture/testcard_common.c
index 9942157e3..9e6f67ce5 100644
--- a/src/video_capture/testcard_common.c
+++ b/src/video_capture/testcard_common.c
@@ -113,7 +113,7 @@ static void toI420(unsigned char *out, const unsigned char *input, int width, in
 void testcard_convert_buffer(codec_t in_c, codec_t out_c, unsigned char *out, unsigned const char *in, int width, int height)
 {
         unsigned char *tmp_buffer = NULL;
-        if (out_c == I420 || out_c == YUYV || out_c == Y216) {
+        if (out_c == I420 || out_c == YUYV || out_c == Y216 || out_c == Y416) {
                 decoder_t decoder = get_decoder_from_to(in_c, UYVY, true);
                 tmp_buffer =  malloc(2L * ((width + 1U) ^ 1U) * height);
                 long in_linesize = vc_get_linesize(width, in_c);
diff --git a/src/video_codec.c b/src/video_codec.c
index 33876e4c6..a2dc8b2ff 100644
--- a/src/video_codec.c
+++ b/src/video_codec.c
@@ -195,6 +195,8 @@ static const struct codec_info_t codec_info[] = {
                 to_fourcc('I','4','2','0'), 2, 3.0/2.0, 8, 1, FALSE, FALSE, FALSE, FALSE, 4200, "yuv"},
         [Y216] =  {"Y216", "Packed 16-bit YUV 4:2:2 little-endian",
                 to_fourcc('Y','2','1','6'), 2, 4.0, 16, 8, FALSE, FALSE, FALSE, FALSE, 4220, "y216"},
+        [Y416] =  {"Y416", "Packed 16-bit YUV 4:4:4:4 little-endian",
+                to_fourcc('Y','4','1','6'), 1, 8.0, 16, 8, FALSE, FALSE, FALSE, FALSE, 4444, "y416"},
         [PRORES] =  {"PRORES", "Apple ProRes",
                 0, 0, 1.0, 8, 1, FALSE, TRUE, TRUE, FALSE, 0, "pror"},
         [PRORES_4444] =  {"PRORES_4444", "Apple ProRes 4444",
@@ -2495,6 +2497,44 @@ static void vc_copylineUYVYtoY216(unsigned char * __restrict dst, const unsigned
         }
 }
 
+static void vc_copylineUYVYtoY416(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
+                int gshift, int bshift)
+{
+        UNUSED(rshift);
+        UNUSED(gshift);
+        UNUSED(bshift);
+        while (dst_len >= 12) {
+                *dst++ = 0;
+                *dst++ = src[0]; // U
+                *dst++ = 0;
+                *dst++ = src[1]; // Y0
+                *dst++ = 0;
+                *dst++ = src[2]; // V
+                *dst++ = 0;
+                *dst++ = 0;      // A
+                *dst++ = 0;
+                *dst++ = src[0]; // U
+                *dst++ = 0;
+                *dst++ = src[3]; // Y1
+                *dst++ = 0;
+                *dst++ = src[2]; // V
+                *dst++ = 0;
+                *dst++ = 0;      // A
+                src += 4;
+                dst_len -= 16;
+        }
+        if (dst_len >= 8) {
+                *dst++ = 0;
+                *dst++ = src[0]; // U
+                *dst++ = 0;
+                *dst++ = src[1]; // Y0
+                *dst++ = 0;
+                *dst++ = src[2]; // V
+                *dst++ = 0;
+                *dst++ = 0;      // A
+        }
+}
+
 static void vc_copylineY216toUYVY(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
                 int gshift, int bshift)
 {
@@ -2511,6 +2551,22 @@ static void vc_copylineY216toUYVY(unsigned char * __restrict dst, const unsigned
         }
 }
 
+static void vc_copylineY416toUYVY(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
+                int gshift, int bshift)
+{
+        UNUSED(rshift);
+        UNUSED(gshift);
+        UNUSED(bshift);
+        while (dst_len >= 4) {
+                *dst++ = (src[1] + src[9]) / 2; // U
+                *dst++ = src[3]; // Y0
+                *dst++ = (src[5] + src[13]) / 2; // V
+                *dst++ = src[11]; // Y1
+                src += 16;
+                dst_len -= 4;
+        }
+}
+
 static void vc_copylineY216toV210(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
                 int gshift, int bshift)
 {
@@ -2542,6 +2598,37 @@ static void vc_copylineY216toV210(unsigned char * __restrict dst, const unsigned
         }
 }
 
+static void vc_copylineY416toV210(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
+                int gshift, int bshift)
+{
+        UNUSED(rshift);
+        UNUSED(gshift);
+        UNUSED(bshift);
+        assert((uintptr_t) src % 2 == 0);
+        assert((uintptr_t) dst % 4 == 0);
+        OPTIMIZED_FOR (int x = 0; x < dst_len / 16; ++x) {
+                const uint16_t *s = (const uint16_t *)(const void *) (src + x * 48);
+                uint32_t *d = (uint32_t *)(void *) (dst + x * 16);
+                uint16_t y1, u, y2, v;
+                u = (s[0] + s[4]) / 2;
+                y1 = s[1];
+                v = (s[2] + s[6]) / 2;
+                y2 = s[5];
+                d[0] = u >> 6U | y1 >> 6U << 10U | v >> 6U << 20U;
+                y1 = s[9];
+                u = (s[8] + s[12]) / 2;
+                d[1] = y2 >> 6U | u >> 6U << 10U | y1 >> 6U << 20U;
+                y2 = s[13];
+                v = (s[10] + s[14]) / 2;
+                y1 = s[17];
+                u = (s[16] + s[20]) / 2;
+                d[2] = v >> 6U | y2 >> 6U << 10U | u >> 6U << 20U;
+                y2 = s[21];
+                v = (s[18] + s[22]) / 2;
+                d[3] = y1 >> 6U | v >> 6U << 10U | y2 >> 6U << 20U;
+        }
+}
+
 struct decoder_item {
         decoder_t decoder;
         codec_t in;
@@ -2583,8 +2670,11 @@ static const struct decoder_item decoders[] = {
         { vc_copylineRGBAtoR10k,  RGBA,  R10k, false },
         { vc_copylineUYVYtoV210,  UYVY,  v210, false },
         { vc_copylineUYVYtoY216,  UYVY,  Y216, false },
+        { vc_copylineUYVYtoY416,  UYVY,  Y416, false },
         { vc_copylineY216toUYVY,  Y216,  UYVY, false },
         { vc_copylineY216toV210,  Y216,  v210, false },
+        { vc_copylineY416toUYVY,  Y416,  UYVY, false },
+        { vc_copylineY416toV210,  Y416,  v210, false },
 };
 
 /**
diff --git a/src/video_codec.h b/src/video_codec.h
index 30ec38f33..b5b52b432 100644
--- a/src/video_codec.h
+++ b/src/video_codec.h
@@ -57,7 +57,7 @@ extern "C" {
 #define DEFAULT_G_SHIFT  8
 #define DEFAULT_B_SHIFT 16
 #define DEFAULT_RGB_SHIFT_INIT { DEFAULT_R_SHIFT, DEFAULT_G_SHIFT, DEFAULT_B_SHIFT }
-#define MAX_BPS 6 /* for RG48 */  ///< maximal (average) number of pixels per know pixel formats (up-round if needed)
+#define MAX_BPS 8 /* for Y416 */  ///< maximal (average) number of pixels per know pixel formats (up-round if needed)
 #define MAX_PADDING 36 /* R12L */ ///< maximal padding that may be needed to align to pixfmt block size
 
 /**