vdec/cmpto_j2k: use kernel for ->R12L conversion

refers to GH-406
2026-03-21 22:40:30 +00:00 · 2024-08-27 16:27:29 +02:00
parent 3e988715bf
commit 4f3add780d
4 changed files with 382 additions and 73 deletions
--- a/configure.ac
+++ b/configure.ac
@@ -3088,13 +3088,22 @@ if test $cmpto_j2k_req != no; then
        AC_CHECK_HEADER(cmpto_j2k_dec.h, FOUND_CMPTO_J2K_DEC_H=yes, FOUND_CMPTO_J2K_DEC_H=no)
        AC_CHECK_LIB(cmpto_j2k_enc, cmpto_j2k_enc_ctx_cfg_create, FOUND_CMPTO_J2K_ENC_L=yes, FOUND_CMPTO_J2K_ENC_L=no)
        AC_CHECK_LIB(cmpto_j2k_dec, cmpto_j2k_dec_ctx_cfg_create, FOUND_CMPTO_J2K_DEC_L=yes, FOUND_CMPTO_J2K_DEC_L=no)
-
-        if test "$FOUND_CMPTO_J2K_ENC_H" = yes && test "$FOUND_CMPTO_J2K_DEC_H" = yes && test "$FOUND_CMPTO_J2K_ENC_L" = yes && test "$FOUND_CMPTO_J2K_DEC_L" = yes
-        then
-                add_module vcompress_cmpto_j2k src/video_compress/cmpto_j2k.o -lcmpto_j2k_enc
-                add_module vdecompress_cmpto_j2k src/video_decompress/cmpto_j2k.o -lcmpto_j2k_dec
-                cmpto_j2k=yes
+fi
+if test "$cmpto_j2k_req" != no &&
+        test "$FOUND_CMPTO_J2K_ENC_H" = yes &&
+        test "$FOUND_CMPTO_J2K_DEC_H" = yes &&
+        test "$FOUND_CMPTO_J2K_ENC_L" = yes &&
+        test "$FOUND_CMPTO_J2K_DEC_L" = yes
+then
+        dec_objs=src/video_decompress/cmpto_j2k.o
+        if test "$FOUND_CUDA" = yes; then
+                dec_objs="$dec_objs src/cuda_wrapper/kernels.o"
+        else
+                UG_MSG_WARN([CUDA is recommended for optimal cmpto_j2k performance but not found])
        fi
+        add_module vcompress_cmpto_j2k src/video_compress/cmpto_j2k.o -lcmpto_j2k_enc
+        add_module vdecompress_cmpto_j2k "$dec_objs" -lcmpto_j2k_dec
+        cmpto_j2k=yes
 fi

 ENSURE_FEATURE_PRESENT([$cmpto_j2k_req], [$cmpto_j2k], [Comprimato J2K not found!])
--- a/src/cuda_wrapper/kernels.cu
+++ b/src/cuda_wrapper/kernels.cu
@@ -0,0 +1,204 @@
+/**
+ * @file   cuda_wrapper/kernels.cu
+ * @author Martin Pulec     <pulec@cesnet.cz>
+ */
+/*
+ * Copyright (c) 2024 CESNET
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, is permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of CESNET nor the names of its contributors may be
+ *    used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "kernels.hpp"
+
+#include <cuda_runtime_api.h>
+#include <stdint.h>
+
+/// modified vc_copylineRG48toR12L
+__global__ void
+kernel_rg48_to_r12l(uint8_t *in, uint8_t *out, unsigned size_x, unsigned size_y)
+{
+        unsigned position_x = threadIdx.x + blockIdx.x * blockDim.x;
+        unsigned position_y = threadIdx.y + blockIdx.y * blockDim.y;
+        if (position_x > (size_x + 7) / 8) {
+                return;
+        }
+        // drop last block if not aligned (prevent OOB read from input)
+        if (position_y == size_y - 1 && position_x > size_x / 8) {
+                return;
+        }
+        uint8_t *src = in + 2 * (position_y * 3 * size_x + position_x * 3 * 8);
+        uint8_t *dst =
+            out + (position_y * ((size_x + 7) / 8) + position_x) * 36;
+
+        // 0
+        dst[0] = src[0] >> 4;
+        dst[0] |= src[1] << 4;
+        dst[1] = src[1] >> 4;
+        src += 2;
+
+        dst[1] |= src[0] & 0xF0;
+        dst[2] = src[1];
+        src += 2;
+
+        dst[3] = src[0] >> 4;
+        dst[3] |= src[1] << 4;
+        dst[4 + 0] = src[1] >> 4;
+        src += 2;
+
+        // 1
+        dst[4 + 0] |= src[0] & 0xF0;
+        dst[4 + 1] = src[1];
+        src += 2;
+
+        dst[4 + 2] = src[0] >> 4;
+        dst[4 + 2] |= src[1] << 4;
+        dst[4 + 3] = src[1] >> 4;
+        src += 2;
+
+        dst[4 + 3] |= src[0] & 0xF0;
+        dst[8 + 0] = src[1];
+        src += 2;
+
+        // 2
+        dst[8 + 1] = src[0] >> 4;
+        dst[8 + 1] |= src[1] << 4;
+        dst[8 + 2] = src[1] >> 4;
+        src += 2;
+
+        dst[8 + 2] |= src[0] & 0xF0;
+        dst[8 + 3] = src[1];
+        src += 2;
+
+        dst[12 + 0] = src[0] >> 4;
+        dst[12 + 0] |= src[1] << 4;
+        dst[12 + 1] = src[1] >> 4;
+        src += 2;
+
+        // 3
+        dst[12 + 1] |= src[0] & 0xF0;
+        dst[12 + 2] = src[1];
+        src += 2;
+
+        dst[12 + 3] = src[0] >> 4;
+        dst[12 + 3] |= src[1] << 4;
+        dst[16 + 0] = src[1] >> 4;
+        src += 2;
+
+        dst[16 + 0] |= src[0] & 0xF0;
+        dst[16 + 1] = src[1];
+        src += 2;
+
+        // 4
+        dst[16 + 2] = src[0] >> 4;
+        dst[16 + 2] |= src[1] << 4;
+        dst[16 + 3] = src[1] >> 4;
+        src += 2;
+
+        dst[16 + 3] |= src[0] & 0xF0;
+        dst[20 + 0] = src[1];
+        src += 2;
+
+        dst[20 + 1] = src[0] >> 4;
+        dst[20 + 1] |= src[1] << 4;
+        dst[20 + 2] = src[1] >> 4;
+        src += 2;
+
+        // 5
+        dst[20 + 2] |= src[0] & 0xF0;
+        dst[20 + 3] = src[1];
+        src += 2;
+
+        dst[24 + 0] = src[0] >> 4;
+        dst[24 + 0] |= src[1] << 4;
+        dst[24 + 1] = src[1] >> 4;
+        src += 2;
+
+        dst[24 + 1] |= src[0] & 0xF0;
+        dst[24 + 2] = src[1];
+        src += 2;
+
+        // 6
+        dst[24 + 3] = src[0] >> 4;
+        dst[24 + 3] |= src[1] << 4;
+        dst[28 + 0] = src[1] >> 4;
+        src += 2;
+
+        dst[28 + 0] |= src[0] & 0xF0;
+        dst[28 + 1] = src[1];
+        src += 2;
+
+        dst[28 + 2] = src[0] >> 4;
+        dst[28 + 2] |= src[1] << 4;
+        dst[28 + 3] = src[1] >> 4;
+        src += 2;
+
+        // 7
+        dst[28 + 3] |= src[0] & 0xF0;
+        dst[32 + 0] = src[1];
+        src += 2;
+
+        dst[32 + 1] = src[0] >> 4;
+        dst[32 + 1] |= src[1] << 4;
+        dst[32 + 2] = src[1] >> 4;
+        src += 2;
+
+        dst[32 + 2] |= src[0] & 0xF0;
+        dst[32 + 3] = src[1];
+        src += 2;
+}
+
+/**
+ * @sa cmpto_j2k_dec_postprocessor_run_callback_cuda
+ */
+int postprocess_rg48_to_r12l(
+    void * /* postprocessor */,
+    void * /* img_custom_data*/,
+    size_t /* img_custom_data_size */,
+    int size_x,
+    int size_y,
+    struct cmpto_j2k_dec_comp_format * /* comp_formats */,
+    int /* comp_count */,
+    void *input_samples,
+    size_t /* input_samples_size */,
+    void * /* temp_buffer */,
+    size_t /* temp_buffer_size */,
+    void * output_buffer,
+    size_t /* output_buffer_size */,
+    void * stream
+) {
+        dim3 threads_per_block(256);
+        dim3 blocks((((size_x + 7) / 8) + 255) / 256, size_y);
+
+        kernel_rg48_to_r12l<<<blocks, threads_per_block, 0,
+                              (cudaStream_t) stream>>>(
+            (uint8_t *) input_samples, (uint8_t *) output_buffer, size_x,
+            size_y);
+        return 0;
+}
--- a/src/cuda_wrapper/kernels.hpp
+++ b/src/cuda_wrapper/kernels.hpp
@@ -0,0 +1,62 @@
+/**
+ * @file   cuda_wrapper/kernels.hpp
+ * @author Martin Pulec     <pulec@cesnet.cz>
+ */
+/*
+ * Copyright (c) 2024 CESNET
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, is permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of CESNET nor the names of its contributors may be
+ *    used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CUDA_WRAPPER_KERNELS_HPP_1A3F7B57_EE91_4363_8D50_9CDDC60CB74F
+#define CUDA_WRAPPER_KERNELS_HPP_1A3F7B57_EE91_4363_8D50_9CDDC60CB74F
+
+#include <cstddef>
+
+struct cmpto_j2k_dec_comp_format;
+
+int postprocess_rg48_to_r12l(
+    void * postprocessor,
+    void * img_custom_data,
+    size_t img_custom_data_size,
+    int size_x,
+    int size_y,
+    struct cmpto_j2k_dec_comp_format * comp_formats,
+    int comp_count,
+    void * input_samples,
+    size_t input_samples_size,
+    void * temp_buffer,
+    size_t temp_buffer_size,
+    void * output_buffer,
+    size_t output_buffer_size,
+    void * stream
+);
+
+#endif // defined CUDA_WRAPPER_KERNELS_HPP_1A3F7B57_EE91_4363_8D50_9CDDC60CB74F
--- a/src/video_decompress/cmpto_j2k.cpp
+++ b/src/video_decompress/cmpto_j2k.cpp
@@ -48,12 +48,6 @@
 *   (which is asynchronous, thus non-blocking)
 * - then queue (filled by thread in first point) is checked - if it is
 *   non-empty, frame is copied to framebufffer. If not false is returned.
- *
- * @todo
- * Reconfiguration isn't entirely correct - on reconfigure, all frames
- * should be dropped and not copied to framebuffer. However this is usually
- * not an issue because dynamic video change is rare (except switching to
- * another stream, which, however, creates a new decoder).
 */

 #include <algorithm>           // for min
@@ -68,6 +62,7 @@
 #include <queue>               // for queue
 #include <utility>             // for pair

+#include "cuda_wrapper/kernels.hpp"
 #include "debug.h"
 #include "host.h"
 #include "lib_common.h"
@@ -92,11 +87,17 @@ using std::min;
 using std::mutex;
 using std::pair;
 using std::queue;
+using std::stoi;
 using std::unique_lock;

+static void
+j2k_decompress_cleanup_common(struct state_decompress_j2k *s);
+
 struct state_decompress_j2k {
        state_decompress_j2k(unsigned int mqs, unsigned int mif)
                : max_queue_size(mqs), max_in_frames(mif) {}
+        long long int req_mem_limit = DEFAULT_MEM_LIMIT;
+        unsigned int req_tile_limit = DEFAULT_TILE_LIMIT;
        cmpto_j2k_dec_ctx *decoder{};
        cmpto_j2k_dec_cfg *settings{};

@@ -220,21 +221,8 @@ ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-encoder-queue=<len>\n"
                                "  max number of frames held by encoder\n");
 static void * j2k_decompress_init(void)
 {
-        struct state_decompress_j2k *s = NULL;
-        long long int mem_limit = DEFAULT_MEM_LIMIT;
-        unsigned int tile_limit = DEFAULT_TILE_LIMIT;
        unsigned int queue_len = DEFAULT_MAX_QUEUE_SIZE;
        unsigned int encoder_in_frames = DEFAULT_MAX_IN_FRAMES;
-        int ret;
-
-        if (get_commandline_param("j2k-dec-mem-limit")) {
-                mem_limit = unit_evaluate(
-                    get_commandline_param("j2k-dec-mem-limit"), nullptr);
-        }
-
-        if (get_commandline_param("j2k-dec-tile-limit")) {
-                tile_limit = atoi(get_commandline_param("j2k-dec-tile-limit"));
-        }

        if (get_commandline_param("j2k-dec-queue-len")) {
                queue_len = atoi(get_commandline_param("j2k-dec-queue-len"));
@@ -244,57 +232,57 @@ static void * j2k_decompress_init(void)
                encoder_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue"));
        }

+        auto *s = new state_decompress_j2k(queue_len, encoder_in_frames);
+        if (get_commandline_param("j2k-dec-mem-limit") != nullptr) {
+                s->req_mem_limit = unit_evaluate(
+                    get_commandline_param("j2k-dec-mem-limit"), nullptr);
+        }
+
+        if (get_commandline_param("j2k-dec-tile-limit") != nullptr) {
+                s->req_tile_limit = stoi(get_commandline_param("j2k-dec-tile-limit"));
+        }
+
        const auto *version = cmpto_j2k_dec_get_version();
        LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n";

-        s = new state_decompress_j2k(queue_len, encoder_in_frames);
-
-        struct cmpto_j2k_dec_ctx_cfg *ctx_cfg;
-        CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", goto error);
-        for (unsigned int i = 0; i < cuda_devices_count; ++i) {
-                CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], mem_limit, tile_limit),
-                                "Error setting CUDA device", goto error);
-        }
-
-        CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &s->decoder), "Error initializing context",
-                        goto error);
-
-        CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP);
-
-        CHECK_OK(cmpto_j2k_dec_cfg_create(s->decoder, &s->settings), "Error creating configuration",
-                        goto error);
-
-        ret = pthread_create(&s->thread_id, NULL, decompress_j2k_worker, (void *) s);
-        assert(ret == 0 && "Unable to create thread");
-
        return s;
-
-error:
-        if (!s) {
-                return NULL;
-        }
-        if (s->settings) {
-                cmpto_j2k_dec_cfg_destroy(s->settings);
-        }
-        if (s->decoder) {
-                cmpto_j2k_dec_ctx_destroy(s->decoder);
-        }
-        delete s;
-        return NULL;
 }

+static void
+r12l_postprocessor_get_sz(
+    void */*postprocessor*/, void */*img_custom_data*/, size_t /*img_custom_data_size*/,
+    int size_x, int size_y, struct cmpto_j2k_dec_comp_format */*comp_formats*/,
+    int comp_count, size_t *temp_buffer_size, size_t *output_buffer_size)
+{
+        assert(comp_count == 3);
+        *temp_buffer_size = 0; // no temp buffer required
+        *output_buffer_size = vc_get_datalen(size_x, size_y, R12L);
+}
+#ifdef HAVE_CUDA
+const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda =
+    postprocess_rg48_to_r12l;
+#else
+const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda =
+    nullptr;
+#endif
+
 static struct {
        codec_t ug_codec;
        enum cmpto_sample_format_type cmpto_sf;
+        // CPU postprocess
        void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height);
+        // GPU postprocess
+        cmpto_j2k_dec_postprocessor_size_callback_cuda size_callback;
+        cmpto_j2k_dec_postprocessor_run_callback_cuda run_callback;
 } codecs[] = {
-        {UYVY, CMPTO_422_U8_P1020, nullptr},
-        {v210, CMPTO_422_U10_V210, nullptr},
-        {RGB, CMPTO_444_U8_P012, nullptr},
-        {BGR, CMPTO_444_U8_P210, nullptr},
-        {RGBA, CMPTO_444_U8_P012Z, nullptr},
-        {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr},
-        {R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l},
+        { UYVY, CMPTO_422_U8_P1020,               nullptr,      nullptr, nullptr               },
+        { v210, CMPTO_422_U10_V210,               nullptr,      nullptr, nullptr               },
+        { RGB,  CMPTO_444_U8_P012,                nullptr,      nullptr, nullptr               },
+        { BGR,  CMPTO_444_U8_P210,                nullptr,      nullptr, nullptr               },
+        { RGBA, CMPTO_444_U8_P012Z,               nullptr,      nullptr, nullptr               },
+        { R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr,      nullptr, nullptr               },
+        { R12L, CMPTO_444_U12_MSB16LE_P012,       rg48_to_r12l,
+         r12l_postprocessor_get_sz,                                      r12l_postprocess_cuda },
 };

 static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
@@ -308,17 +296,40 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
                return true;
        }

+        j2k_decompress_cleanup_common(s);
+
        if (out_codec == R12L) {
                LOG(LOG_LEVEL_NOTICE) << MOD_NAME << "Decoding to 12-bit RGB.\n";
        }

        enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0;

+        struct cmpto_j2k_dec_ctx_cfg *ctx_cfg = nullptr;
+        CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false);
+        for (unsigned int i = 0; i < cuda_devices_count; ++i) {
+                CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(
+                             ctx_cfg, cuda_devices[i], s->req_mem_limit,
+                             s->req_tile_limit),
+                         "Error setting CUDA device", return false);
+        }
+
        for(const auto &codec : codecs){
-                if(codec.ug_codec == out_codec){
-                        cmpto_sf = codec.cmpto_sf;
+                if(codec.ug_codec != out_codec){
+                        continue;
+                }
+                cmpto_sf = codec.cmpto_sf;
+                if (codec.run_callback != nullptr) {
+                        CHECK_OK(cmpto_j2k_dec_ctx_cfg_set_postprocessor_cuda(
+                                     ctx_cfg, nullptr, nullptr,
+                                     codec.size_callback, codec.run_callback),
+                                 "add postprocessor", return false);
+                } else {
                        s->convert = codec.convert;
-                        break;
+                        if (s->convert != nullptr) {
+                                MSG(WARNING,
+                                    "Compiled without CUDA, pixfmt conv will "
+                                    "be processed on CPU...\n");
+                        }
                }
        }

@@ -328,6 +339,14 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
                abort();
        }

+        CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &s->decoder),
+                 "Error initializing context", return false);
+
+        CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP);
+
+        CHECK_OK(cmpto_j2k_dec_cfg_create(s->decoder, &s->settings),
+                 "Error creating configuration", return false);
+
        if (out_codec != RGBA || (rshift == 0 && gshift == 8 && bshift == 16)) {
                CHECK_OK(cmpto_j2k_dec_cfg_set_samples_format_type(s->settings, cmpto_sf),
                                "Error setting sample format type", return false);
@@ -361,6 +380,9 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc,
        s->out_codec = out_codec;
        s->pitch = pitch;

+        int ret = pthread_create(&s->thread_id, NULL, decompress_j2k_worker, (void *) s);
+        assert(ret == 0 && "Unable to create thread");
+
        return true;
 }

@@ -489,16 +511,21 @@ static int j2k_decompress_get_property(void *state, int property, void *val, siz
        return ret;
 }

-static void j2k_decompress_done(void *state)
+static void
+j2k_decompress_cleanup_common(struct state_decompress_j2k *s)
 {
-        struct state_decompress_j2k *s = (struct state_decompress_j2k *) state;
-
        cmpto_j2k_dec_ctx_stop(s->decoder);
        pthread_join(s->thread_id, NULL);
        log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n");

-        cmpto_j2k_dec_cfg_destroy(s->settings);
-        cmpto_j2k_dec_ctx_destroy(s->decoder);
+        if (s->settings != nullptr) {
+                cmpto_j2k_dec_cfg_destroy(s->settings);
+                s->settings = nullptr;
+        }
+        if (s->decoder != nullptr) {
+                cmpto_j2k_dec_ctx_destroy(s->decoder);
+                s->decoder = nullptr;
+        }

        while (s->decompressed_frames.size() > 0) {
                auto decoded = s->decompressed_frames.front();
@@ -506,6 +533,13 @@ static void j2k_decompress_done(void *state)
                free(decoded.first);
        }

+        s->convert = nullptr;
+}
+
+static void j2k_decompress_done(void *state)
+{
+        auto *s = (struct state_decompress_j2k *) state;
+        j2k_decompress_cleanup_common(s);
        delete s;
 }