mirror of
https://github.com/outbackdingo/UltraGrid.git
synced 2026-03-21 08:40:19 +00:00
to_planar: add decode_to_planar_parallel
for parallel processing decode_buffer_func_t funcs
This commit is contained in:
@@ -48,8 +48,11 @@
|
||||
#include <pmmintrin.h> // for _mm_lddqu_si128
|
||||
#endif
|
||||
|
||||
#include "compat/c23.h"
|
||||
#include "types.h" // for depth, v210, R12L, RGBA, UYVY, Y216
|
||||
#include "utils/macros.h" // for OPTIMIZED_FOR, ALWAYS_INLINE
|
||||
#include "utils/misc.h" // for get_cpu_core_count
|
||||
#include "utils/worker.h" // for task_run_parallel
|
||||
#include "video_codec.h" // for vc_get_linesize
|
||||
|
||||
/**
|
||||
@@ -459,3 +462,44 @@ r12l_to_rgbp12le(struct to_planar_data d)
|
||||
{
|
||||
r12l_to_gbrpXXle(d, DEPTH12, 0, 1, 2);
|
||||
}
|
||||
|
||||
struct convert_task_data {
|
||||
decode_buffer_func_t *convert;
|
||||
struct to_planar_data d;
|
||||
};
|
||||
|
||||
static void *
|
||||
convert_task(void *arg)
|
||||
{
|
||||
struct convert_task_data *d = arg;
|
||||
d->convert(d->d);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
decode_to_planar_parallel(decode_buffer_func_t *dec, struct to_planar_data d,
|
||||
int src_linesize, int num_threads)
|
||||
{
|
||||
const unsigned cpu_count = num_threads == TO_PLANAR_THREADS_AUTO
|
||||
? get_cpu_core_count()
|
||||
: num_threads;
|
||||
|
||||
struct convert_task_data data[cpu_count];
|
||||
for (size_t i = 0; i < cpu_count; ++i) {
|
||||
unsigned row_height = (d.height / cpu_count) & ~1; // needs to be even
|
||||
data[i].convert = dec;
|
||||
data[i].d = d;
|
||||
data[i].d.in_data = d.in_data + (i * row_height * src_linesize);
|
||||
|
||||
for (unsigned plane = 0; plane < countof(d.out_data); ++plane) {
|
||||
data[i].d.out_data[plane] =
|
||||
d.out_data[plane] +
|
||||
((i * row_height * d.out_linesize[plane]));
|
||||
}
|
||||
if (i == cpu_count - 1) {
|
||||
row_height = d.height - (row_height * (cpu_count - 1));
|
||||
}
|
||||
data[i].d.height = row_height;
|
||||
}
|
||||
task_run_parallel(convert_task, (int) cpu_count, data, sizeof data[0], NULL);
|
||||
}
|
||||
|
||||
@@ -45,7 +45,10 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TO_PLANAR_MAX_COMP 4
|
||||
enum {
|
||||
TO_PLANAR_THREADS_AUTO = 0,
|
||||
TO_PLANAR_MAX_COMP = 4,
|
||||
};
|
||||
|
||||
struct to_planar_data {
|
||||
int width;
|
||||
@@ -69,6 +72,19 @@ decode_buffer_func_t r12l_to_gbrp12le;
|
||||
decode_buffer_func_t r12l_to_gbrp16le;
|
||||
decode_buffer_func_t r12l_to_rgbp12le;
|
||||
|
||||
/**
|
||||
* run the @ref decode_buffer_func_t from packed format in parallel
|
||||
* @param dec fn to run
|
||||
* @param src_linesize source linesize (vc_get_linesize(width, in_pixfmt))
|
||||
* @param num_threads number of threads or DECODE_TO_THREADS_AUTO to use the
|
||||
* number of logical cores)
|
||||
* @note no support for horizontal subsampling for now
|
||||
* @sa decode_to_planar_parallel
|
||||
*/
|
||||
void decode_to_planar_parallel(decode_buffer_func_t *dec,
|
||||
struct to_planar_data d, int src_linesize,
|
||||
int num_threads);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user