mirror of
https://github.com/outbackdingo/UltraGrid.git
synced 2026-04-07 18:05:51 +00:00
vdec/cmpto_j2k: use kernel for ->R12L conversion
refers to GH-406
This commit is contained in:
21
configure.ac
21
configure.ac
@@ -3088,13 +3088,22 @@ if test $cmpto_j2k_req != no; then
|
|||||||
AC_CHECK_HEADER(cmpto_j2k_dec.h, FOUND_CMPTO_J2K_DEC_H=yes, FOUND_CMPTO_J2K_DEC_H=no)
|
AC_CHECK_HEADER(cmpto_j2k_dec.h, FOUND_CMPTO_J2K_DEC_H=yes, FOUND_CMPTO_J2K_DEC_H=no)
|
||||||
AC_CHECK_LIB(cmpto_j2k_enc, cmpto_j2k_enc_ctx_cfg_create, FOUND_CMPTO_J2K_ENC_L=yes, FOUND_CMPTO_J2K_ENC_L=no)
|
AC_CHECK_LIB(cmpto_j2k_enc, cmpto_j2k_enc_ctx_cfg_create, FOUND_CMPTO_J2K_ENC_L=yes, FOUND_CMPTO_J2K_ENC_L=no)
|
||||||
AC_CHECK_LIB(cmpto_j2k_dec, cmpto_j2k_dec_ctx_cfg_create, FOUND_CMPTO_J2K_DEC_L=yes, FOUND_CMPTO_J2K_DEC_L=no)
|
AC_CHECK_LIB(cmpto_j2k_dec, cmpto_j2k_dec_ctx_cfg_create, FOUND_CMPTO_J2K_DEC_L=yes, FOUND_CMPTO_J2K_DEC_L=no)
|
||||||
|
fi
|
||||||
if test "$FOUND_CMPTO_J2K_ENC_H" = yes && test "$FOUND_CMPTO_J2K_DEC_H" = yes && test "$FOUND_CMPTO_J2K_ENC_L" = yes && test "$FOUND_CMPTO_J2K_DEC_L" = yes
|
if test "$cmpto_j2k_req" != no &&
|
||||||
then
|
test "$FOUND_CMPTO_J2K_ENC_H" = yes &&
|
||||||
add_module vcompress_cmpto_j2k src/video_compress/cmpto_j2k.o -lcmpto_j2k_enc
|
test "$FOUND_CMPTO_J2K_DEC_H" = yes &&
|
||||||
add_module vdecompress_cmpto_j2k src/video_decompress/cmpto_j2k.o -lcmpto_j2k_dec
|
test "$FOUND_CMPTO_J2K_ENC_L" = yes &&
|
||||||
cmpto_j2k=yes
|
test "$FOUND_CMPTO_J2K_DEC_L" = yes
|
||||||
|
then
|
||||||
|
dec_objs=src/video_decompress/cmpto_j2k.o
|
||||||
|
if test "$FOUND_CUDA" = yes; then
|
||||||
|
dec_objs="$dec_objs src/cuda_wrapper/kernels.o"
|
||||||
|
else
|
||||||
|
UG_MSG_WARN([CUDA is recommended for optimal cmpto_j2k performance but not found])
|
||||||
fi
|
fi
|
||||||
|
add_module vcompress_cmpto_j2k src/video_compress/cmpto_j2k.o -lcmpto_j2k_enc
|
||||||
|
add_module vdecompress_cmpto_j2k "$dec_objs" -lcmpto_j2k_dec
|
||||||
|
cmpto_j2k=yes
|
||||||
fi
|
fi
|
||||||
|
|
||||||
ENSURE_FEATURE_PRESENT([$cmpto_j2k_req], [$cmpto_j2k], [Comprimato J2K not found!])
|
ENSURE_FEATURE_PRESENT([$cmpto_j2k_req], [$cmpto_j2k], [Comprimato J2K not found!])
|
||||||
|
|||||||
204
src/cuda_wrapper/kernels.cu
Normal file
204
src/cuda_wrapper/kernels.cu
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
/**
|
||||||
|
* @file cuda_wrapper/kernels.cu
|
||||||
|
* @author Martin Pulec <pulec@cesnet.cz>
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 CESNET
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, is permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of CESNET nor the names of its contributors may be
|
||||||
|
* used to endorse or promote products derived from this software without
|
||||||
|
* specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||||
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "kernels.hpp"
|
||||||
|
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/// modified vc_copylineRG48toR12L
|
||||||
|
__global__ void
|
||||||
|
kernel_rg48_to_r12l(uint8_t *in, uint8_t *out, unsigned size_x, unsigned size_y)
|
||||||
|
{
|
||||||
|
unsigned position_x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
unsigned position_y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
|
if (position_x > (size_x + 7) / 8) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// drop last block if not aligned (prevent OOB read from input)
|
||||||
|
if (position_y == size_y - 1 && position_x > size_x / 8) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint8_t *src = in + 2 * (position_y * 3 * size_x + position_x * 3 * 8);
|
||||||
|
uint8_t *dst =
|
||||||
|
out + (position_y * ((size_x + 7) / 8) + position_x) * 36;
|
||||||
|
|
||||||
|
// 0
|
||||||
|
dst[0] = src[0] >> 4;
|
||||||
|
dst[0] |= src[1] << 4;
|
||||||
|
dst[1] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[1] |= src[0] & 0xF0;
|
||||||
|
dst[2] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[3] = src[0] >> 4;
|
||||||
|
dst[3] |= src[1] << 4;
|
||||||
|
dst[4 + 0] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
// 1
|
||||||
|
dst[4 + 0] |= src[0] & 0xF0;
|
||||||
|
dst[4 + 1] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[4 + 2] = src[0] >> 4;
|
||||||
|
dst[4 + 2] |= src[1] << 4;
|
||||||
|
dst[4 + 3] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[4 + 3] |= src[0] & 0xF0;
|
||||||
|
dst[8 + 0] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
// 2
|
||||||
|
dst[8 + 1] = src[0] >> 4;
|
||||||
|
dst[8 + 1] |= src[1] << 4;
|
||||||
|
dst[8 + 2] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[8 + 2] |= src[0] & 0xF0;
|
||||||
|
dst[8 + 3] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[12 + 0] = src[0] >> 4;
|
||||||
|
dst[12 + 0] |= src[1] << 4;
|
||||||
|
dst[12 + 1] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
// 3
|
||||||
|
dst[12 + 1] |= src[0] & 0xF0;
|
||||||
|
dst[12 + 2] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[12 + 3] = src[0] >> 4;
|
||||||
|
dst[12 + 3] |= src[1] << 4;
|
||||||
|
dst[16 + 0] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[16 + 0] |= src[0] & 0xF0;
|
||||||
|
dst[16 + 1] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
// 4
|
||||||
|
dst[16 + 2] = src[0] >> 4;
|
||||||
|
dst[16 + 2] |= src[1] << 4;
|
||||||
|
dst[16 + 3] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[16 + 3] |= src[0] & 0xF0;
|
||||||
|
dst[20 + 0] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[20 + 1] = src[0] >> 4;
|
||||||
|
dst[20 + 1] |= src[1] << 4;
|
||||||
|
dst[20 + 2] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
// 5
|
||||||
|
dst[20 + 2] |= src[0] & 0xF0;
|
||||||
|
dst[20 + 3] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[24 + 0] = src[0] >> 4;
|
||||||
|
dst[24 + 0] |= src[1] << 4;
|
||||||
|
dst[24 + 1] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[24 + 1] |= src[0] & 0xF0;
|
||||||
|
dst[24 + 2] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
// 6
|
||||||
|
dst[24 + 3] = src[0] >> 4;
|
||||||
|
dst[24 + 3] |= src[1] << 4;
|
||||||
|
dst[28 + 0] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[28 + 0] |= src[0] & 0xF0;
|
||||||
|
dst[28 + 1] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[28 + 2] = src[0] >> 4;
|
||||||
|
dst[28 + 2] |= src[1] << 4;
|
||||||
|
dst[28 + 3] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
// 7
|
||||||
|
dst[28 + 3] |= src[0] & 0xF0;
|
||||||
|
dst[32 + 0] = src[1];
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[32 + 1] = src[0] >> 4;
|
||||||
|
dst[32 + 1] |= src[1] << 4;
|
||||||
|
dst[32 + 2] = src[1] >> 4;
|
||||||
|
src += 2;
|
||||||
|
|
||||||
|
dst[32 + 2] |= src[0] & 0xF0;
|
||||||
|
dst[32 + 3] = src[1];
|
||||||
|
src += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @sa cmpto_j2k_dec_postprocessor_run_callback_cuda
|
||||||
|
*/
|
||||||
|
int postprocess_rg48_to_r12l(
|
||||||
|
void * /* postprocessor */,
|
||||||
|
void * /* img_custom_data*/,
|
||||||
|
size_t /* img_custom_data_size */,
|
||||||
|
int size_x,
|
||||||
|
int size_y,
|
||||||
|
struct cmpto_j2k_dec_comp_format * /* comp_formats */,
|
||||||
|
int /* comp_count */,
|
||||||
|
void *input_samples,
|
||||||
|
size_t /* input_samples_size */,
|
||||||
|
void * /* temp_buffer */,
|
||||||
|
size_t /* temp_buffer_size */,
|
||||||
|
void * output_buffer,
|
||||||
|
size_t /* output_buffer_size */,
|
||||||
|
void * stream
|
||||||
|
) {
|
||||||
|
dim3 threads_per_block(256);
|
||||||
|
dim3 blocks((((size_x + 7) / 8) + 255) / 256, size_y);
|
||||||
|
|
||||||
|
kernel_rg48_to_r12l<<<blocks, threads_per_block, 0,
|
||||||
|
(cudaStream_t) stream>>>(
|
||||||
|
(uint8_t *) input_samples, (uint8_t *) output_buffer, size_x,
|
||||||
|
size_y);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
62
src/cuda_wrapper/kernels.hpp
Normal file
62
src/cuda_wrapper/kernels.hpp
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
/**
|
||||||
|
* @file cuda_wrapper/kernels.hpp
|
||||||
|
* @author Martin Pulec <pulec@cesnet.cz>
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 CESNET
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, is permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of CESNET nor the names of its contributors may be
|
||||||
|
* used to endorse or promote products derived from this software without
|
||||||
|
* specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||||
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CUDA_WRAPPER_KERNELS_HPP_1A3F7B57_EE91_4363_8D50_9CDDC60CB74F
|
||||||
|
#define CUDA_WRAPPER_KERNELS_HPP_1A3F7B57_EE91_4363_8D50_9CDDC60CB74F
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
struct cmpto_j2k_dec_comp_format;
|
||||||
|
|
||||||
|
int postprocess_rg48_to_r12l(
|
||||||
|
void * postprocessor,
|
||||||
|
void * img_custom_data,
|
||||||
|
size_t img_custom_data_size,
|
||||||
|
int size_x,
|
||||||
|
int size_y,
|
||||||
|
struct cmpto_j2k_dec_comp_format * comp_formats,
|
||||||
|
int comp_count,
|
||||||
|
void * input_samples,
|
||||||
|
size_t input_samples_size,
|
||||||
|
void * temp_buffer,
|
||||||
|
size_t temp_buffer_size,
|
||||||
|
void * output_buffer,
|
||||||
|
size_t output_buffer_size,
|
||||||
|
void * stream
|
||||||
|
);
|
||||||
|
|
||||||
|
#endif // defined CUDA_WRAPPER_KERNELS_HPP_1A3F7B57_EE91_4363_8D50_9CDDC60CB74F
|
||||||
@@ -48,12 +48,6 @@
|
|||||||
* (which is asynchronous, thus non-blocking)
|
* (which is asynchronous, thus non-blocking)
|
||||||
* - then queue (filled by thread in first point) is checked - if it is
|
* - then queue (filled by thread in first point) is checked - if it is
|
||||||
* non-empty, frame is copied to framebufffer. If not false is returned.
|
* non-empty, frame is copied to framebufffer. If not false is returned.
|
||||||
*
|
|
||||||
* @todo
|
|
||||||
* Reconfiguration isn't entirely correct - on reconfigure, all frames
|
|
||||||
* should be dropped and not copied to framebuffer. However this is usually
|
|
||||||
* not an issue because dynamic video change is rare (except switching to
|
|
||||||
* another stream, which, however, creates a new decoder).
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm> // for min
|
#include <algorithm> // for min
|
||||||
@@ -68,6 +62,7 @@
|
|||||||
#include <queue> // for queue
|
#include <queue> // for queue
|
||||||
#include <utility> // for pair
|
#include <utility> // for pair
|
||||||
|
|
||||||
|
#include "cuda_wrapper/kernels.hpp"
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
#include "host.h"
|
#include "host.h"
|
||||||
#include "lib_common.h"
|
#include "lib_common.h"
|
||||||
@@ -92,11 +87,17 @@ using std::min;
|
|||||||
using std::mutex;
|
using std::mutex;
|
||||||
using std::pair;
|
using std::pair;
|
||||||
using std::queue;
|
using std::queue;
|
||||||
|
using std::stoi;
|
||||||
using std::unique_lock;
|
using std::unique_lock;
|
||||||
|
|
||||||
|
static void
|
||||||
|
j2k_decompress_cleanup_common(struct state_decompress_j2k *s);
|
||||||
|
|
||||||
struct state_decompress_j2k {
|
struct state_decompress_j2k {
|
||||||
state_decompress_j2k(unsigned int mqs, unsigned int mif)
|
state_decompress_j2k(unsigned int mqs, unsigned int mif)
|
||||||
: max_queue_size(mqs), max_in_frames(mif) {}
|
: max_queue_size(mqs), max_in_frames(mif) {}
|
||||||
|
long long int req_mem_limit = DEFAULT_MEM_LIMIT;
|
||||||
|
unsigned int req_tile_limit = DEFAULT_TILE_LIMIT;
|
||||||
cmpto_j2k_dec_ctx *decoder{};
|
cmpto_j2k_dec_ctx *decoder{};
|
||||||
cmpto_j2k_dec_cfg *settings{};
|
cmpto_j2k_dec_cfg *settings{};
|
||||||
|
|
||||||
@@ -220,21 +221,8 @@ ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-encoder-queue=<len>\n"
|
|||||||
" max number of frames held by encoder\n");
|
" max number of frames held by encoder\n");
|
||||||
static void * j2k_decompress_init(void)
|
static void * j2k_decompress_init(void)
|
||||||
{
|
{
|
||||||
struct state_decompress_j2k *s = NULL;
|
|
||||||
long long int mem_limit = DEFAULT_MEM_LIMIT;
|
|
||||||
unsigned int tile_limit = DEFAULT_TILE_LIMIT;
|
|
||||||
unsigned int queue_len = DEFAULT_MAX_QUEUE_SIZE;
|
unsigned int queue_len = DEFAULT_MAX_QUEUE_SIZE;
|
||||||
unsigned int encoder_in_frames = DEFAULT_MAX_IN_FRAMES;
|
unsigned int encoder_in_frames = DEFAULT_MAX_IN_FRAMES;
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (get_commandline_param("j2k-dec-mem-limit")) {
|
|
||||||
mem_limit = unit_evaluate(
|
|
||||||
get_commandline_param("j2k-dec-mem-limit"), nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (get_commandline_param("j2k-dec-tile-limit")) {
|
|
||||||
tile_limit = atoi(get_commandline_param("j2k-dec-tile-limit"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (get_commandline_param("j2k-dec-queue-len")) {
|
if (get_commandline_param("j2k-dec-queue-len")) {
|
||||||
queue_len = atoi(get_commandline_param("j2k-dec-queue-len"));
|
queue_len = atoi(get_commandline_param("j2k-dec-queue-len"));
|
||||||
@@ -244,57 +232,57 @@ static void * j2k_decompress_init(void)
|
|||||||
encoder_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue"));
|
encoder_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto *s = new state_decompress_j2k(queue_len, encoder_in_frames);
|
||||||
|
if (get_commandline_param("j2k-dec-mem-limit") != nullptr) {
|
||||||
|
s->req_mem_limit = unit_evaluate(
|
||||||
|
get_commandline_param("j2k-dec-mem-limit"), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (get_commandline_param("j2k-dec-tile-limit") != nullptr) {
|
||||||
|
s->req_tile_limit = stoi(get_commandline_param("j2k-dec-tile-limit"));
|
||||||
|
}
|
||||||
|
|
||||||
const auto *version = cmpto_j2k_dec_get_version();
|
const auto *version = cmpto_j2k_dec_get_version();
|
||||||
LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n";
|
LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n";
|
||||||
|
|
||||||
s = new state_decompress_j2k(queue_len, encoder_in_frames);
|
|
||||||
|
|
||||||
struct cmpto_j2k_dec_ctx_cfg *ctx_cfg;
|
|
||||||
CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", goto error);
|
|
||||||
for (unsigned int i = 0; i < cuda_devices_count; ++i) {
|
|
||||||
CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], mem_limit, tile_limit),
|
|
||||||
"Error setting CUDA device", goto error);
|
|
||||||
}
|
|
||||||
|
|
||||||
CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &s->decoder), "Error initializing context",
|
|
||||||
goto error);
|
|
||||||
|
|
||||||
CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP);
|
|
||||||
|
|
||||||
CHECK_OK(cmpto_j2k_dec_cfg_create(s->decoder, &s->settings), "Error creating configuration",
|
|
||||||
goto error);
|
|
||||||
|
|
||||||
ret = pthread_create(&s->thread_id, NULL, decompress_j2k_worker, (void *) s);
|
|
||||||
assert(ret == 0 && "Unable to create thread");
|
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
|
|
||||||
error:
|
|
||||||
if (!s) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if (s->settings) {
|
|
||||||
cmpto_j2k_dec_cfg_destroy(s->settings);
|
|
||||||
}
|
|
||||||
if (s->decoder) {
|
|
||||||
cmpto_j2k_dec_ctx_destroy(s->decoder);
|
|
||||||
}
|
|
||||||
delete s;
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
r12l_postprocessor_get_sz(
|
||||||
|
void */*postprocessor*/, void */*img_custom_data*/, size_t /*img_custom_data_size*/,
|
||||||
|
int size_x, int size_y, struct cmpto_j2k_dec_comp_format */*comp_formats*/,
|
||||||
|
int comp_count, size_t *temp_buffer_size, size_t *output_buffer_size)
|
||||||
|
{
|
||||||
|
assert(comp_count == 3);
|
||||||
|
*temp_buffer_size = 0; // no temp buffer required
|
||||||
|
*output_buffer_size = vc_get_datalen(size_x, size_y, R12L);
|
||||||
|
}
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda =
|
||||||
|
postprocess_rg48_to_r12l;
|
||||||
|
#else
|
||||||
|
const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda =
|
||||||
|
nullptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct {
|
static struct {
|
||||||
codec_t ug_codec;
|
codec_t ug_codec;
|
||||||
enum cmpto_sample_format_type cmpto_sf;
|
enum cmpto_sample_format_type cmpto_sf;
|
||||||
|
// CPU postprocess
|
||||||
void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height);
|
void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height);
|
||||||
|
// GPU postprocess
|
||||||
|
cmpto_j2k_dec_postprocessor_size_callback_cuda size_callback;
|
||||||
|
cmpto_j2k_dec_postprocessor_run_callback_cuda run_callback;
|
||||||
} codecs[] = {
|
} codecs[] = {
|
||||||
{UYVY, CMPTO_422_U8_P1020, nullptr},
|
{ UYVY, CMPTO_422_U8_P1020, nullptr, nullptr, nullptr },
|
||||||
{v210, CMPTO_422_U10_V210, nullptr},
|
{ v210, CMPTO_422_U10_V210, nullptr, nullptr, nullptr },
|
||||||
{RGB, CMPTO_444_U8_P012, nullptr},
|
{ RGB, CMPTO_444_U8_P012, nullptr, nullptr, nullptr },
|
||||||
{BGR, CMPTO_444_U8_P210, nullptr},
|
{ BGR, CMPTO_444_U8_P210, nullptr, nullptr, nullptr },
|
||||||
{RGBA, CMPTO_444_U8_P012Z, nullptr},
|
{ RGBA, CMPTO_444_U8_P012Z, nullptr, nullptr, nullptr },
|
||||||
{R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr},
|
{ R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr, nullptr, nullptr },
|
||||||
{R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l},
|
{ R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l,
|
||||||
|
r12l_postprocessor_get_sz, r12l_postprocess_cuda },
|
||||||
};
|
};
|
||||||
|
|
||||||
static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
|
static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
|
||||||
@@ -308,17 +296,40 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
j2k_decompress_cleanup_common(s);
|
||||||
|
|
||||||
if (out_codec == R12L) {
|
if (out_codec == R12L) {
|
||||||
LOG(LOG_LEVEL_NOTICE) << MOD_NAME << "Decoding to 12-bit RGB.\n";
|
LOG(LOG_LEVEL_NOTICE) << MOD_NAME << "Decoding to 12-bit RGB.\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0;
|
enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0;
|
||||||
|
|
||||||
|
struct cmpto_j2k_dec_ctx_cfg *ctx_cfg = nullptr;
|
||||||
|
CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false);
|
||||||
|
for (unsigned int i = 0; i < cuda_devices_count; ++i) {
|
||||||
|
CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(
|
||||||
|
ctx_cfg, cuda_devices[i], s->req_mem_limit,
|
||||||
|
s->req_tile_limit),
|
||||||
|
"Error setting CUDA device", return false);
|
||||||
|
}
|
||||||
|
|
||||||
for(const auto &codec : codecs){
|
for(const auto &codec : codecs){
|
||||||
if(codec.ug_codec == out_codec){
|
if(codec.ug_codec != out_codec){
|
||||||
cmpto_sf = codec.cmpto_sf;
|
continue;
|
||||||
|
}
|
||||||
|
cmpto_sf = codec.cmpto_sf;
|
||||||
|
if (codec.run_callback != nullptr) {
|
||||||
|
CHECK_OK(cmpto_j2k_dec_ctx_cfg_set_postprocessor_cuda(
|
||||||
|
ctx_cfg, nullptr, nullptr,
|
||||||
|
codec.size_callback, codec.run_callback),
|
||||||
|
"add postprocessor", return false);
|
||||||
|
} else {
|
||||||
s->convert = codec.convert;
|
s->convert = codec.convert;
|
||||||
break;
|
if (s->convert != nullptr) {
|
||||||
|
MSG(WARNING,
|
||||||
|
"Compiled without CUDA, pixfmt conv will "
|
||||||
|
"be processed on CPU...\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -328,6 +339,14 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
|
|||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &s->decoder),
|
||||||
|
"Error initializing context", return false);
|
||||||
|
|
||||||
|
CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP);
|
||||||
|
|
||||||
|
CHECK_OK(cmpto_j2k_dec_cfg_create(s->decoder, &s->settings),
|
||||||
|
"Error creating configuration", return false);
|
||||||
|
|
||||||
if (out_codec != RGBA || (rshift == 0 && gshift == 8 && bshift == 16)) {
|
if (out_codec != RGBA || (rshift == 0 && gshift == 8 && bshift == 16)) {
|
||||||
CHECK_OK(cmpto_j2k_dec_cfg_set_samples_format_type(s->settings, cmpto_sf),
|
CHECK_OK(cmpto_j2k_dec_cfg_set_samples_format_type(s->settings, cmpto_sf),
|
||||||
"Error setting sample format type", return false);
|
"Error setting sample format type", return false);
|
||||||
@@ -361,6 +380,9 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
|
|||||||
s->out_codec = out_codec;
|
s->out_codec = out_codec;
|
||||||
s->pitch = pitch;
|
s->pitch = pitch;
|
||||||
|
|
||||||
|
int ret = pthread_create(&s->thread_id, NULL, decompress_j2k_worker, (void *) s);
|
||||||
|
assert(ret == 0 && "Unable to create thread");
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -489,16 +511,21 @@ static int j2k_decompress_get_property(void *state, int property, void *val, siz
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void j2k_decompress_done(void *state)
|
static void
|
||||||
|
j2k_decompress_cleanup_common(struct state_decompress_j2k *s)
|
||||||
{
|
{
|
||||||
struct state_decompress_j2k *s = (struct state_decompress_j2k *) state;
|
|
||||||
|
|
||||||
cmpto_j2k_dec_ctx_stop(s->decoder);
|
cmpto_j2k_dec_ctx_stop(s->decoder);
|
||||||
pthread_join(s->thread_id, NULL);
|
pthread_join(s->thread_id, NULL);
|
||||||
log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n");
|
log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n");
|
||||||
|
|
||||||
cmpto_j2k_dec_cfg_destroy(s->settings);
|
if (s->settings != nullptr) {
|
||||||
cmpto_j2k_dec_ctx_destroy(s->decoder);
|
cmpto_j2k_dec_cfg_destroy(s->settings);
|
||||||
|
s->settings = nullptr;
|
||||||
|
}
|
||||||
|
if (s->decoder != nullptr) {
|
||||||
|
cmpto_j2k_dec_ctx_destroy(s->decoder);
|
||||||
|
s->decoder = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
while (s->decompressed_frames.size() > 0) {
|
while (s->decompressed_frames.size() > 0) {
|
||||||
auto decoded = s->decompressed_frames.front();
|
auto decoded = s->decompressed_frames.front();
|
||||||
@@ -506,6 +533,13 @@ static void j2k_decompress_done(void *state)
|
|||||||
free(decoded.first);
|
free(decoded.first);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s->convert = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void j2k_decompress_done(void *state)
|
||||||
|
{
|
||||||
|
auto *s = (struct state_decompress_j2k *) state;
|
||||||
|
j2k_decompress_cleanup_common(s);
|
||||||
delete s;
|
delete s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user