From fcbbae0420ae9a2a69a3d47320b9b6fd0f2a687f Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Thu, 29 Apr 2021 09:30:36 +0200 Subject: [PATCH] GPUJPEG: accept pitches --- src/rtp/video_decoders.cpp | 22 ++++++++++++++++++++++ src/video_decompress/gpujpeg.c | 20 ++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/rtp/video_decoders.cpp b/src/rtp/video_decoders.cpp index a3379c98f..e55290960 100644 --- a/src/rtp/video_decoders.cpp +++ b/src/rtp/video_decoders.cpp @@ -589,6 +589,7 @@ struct decompress_data { decompress_status ret = DECODER_NO_FRAME; unsigned char *out; codec_t internal_codec; // set only if probing (ret == DECODER_GOT_CODEC) + const int *pitches; }; static void *decompress_worker(void *data) { @@ -607,6 +608,23 @@ static void *decompress_worker(void *data) return d; } +static int *get_pitches(struct video_frame *f, int *pitches) { + if (f->render_packet.dx_row_pitch == 0) { + return nullptr; + } + if (f->color_spec == I420 || f->color_spec == CUDA_I420) { + // pitches defined but default + if (f->render_packet.dx_row_pitch == f->tiles[0].width + && f->render_packet.dx_row_pitch_uv == f->tiles[0].width / 2) { + return nullptr; + } + } + pitches[0] = f->render_packet.dx_row_pitch; + pitches[1] = + pitches[2] = f->render_packet.dx_row_pitch_uv; + return pitches; +} + ADD_TO_PARAM("decoder-drop-policy", "* decoder-drop-policy=blocking|nonblock\n" " Force specified blocking policy (default nonblock).\n"); @@ -647,6 +665,9 @@ static void *decompress_thread(void *args) { tmp = unique_ptr(new char[tile_height * (tile_width * MAX_BPS + MAX_PADDING)]); } + int pitch_buf[4] = { }; + int *pitches = get_pitches(msg->nofec_frame, pitch_buf); + if(decoder->decoder_type == EXTERNAL_DECODER) { int tile_count = get_video_mode_tiles_x(decoder->video_mode) * get_video_mode_tiles_y(decoder->video_mode); @@ -654,6 +675,7 @@ static void *decompress_thread(void *args) { vector data(tile_count); for (int pos = 0; pos < tile_count; ++pos) { data[pos].decoder = decoder; + data[pos].pitches = pitches; data[pos].pos = pos; data[pos].compressed = msg->nofec_frame; data[pos].buffer_num = msg->buffer_num[pos]; diff --git a/src/video_decompress/gpujpeg.c b/src/video_decompress/gpujpeg.c index ace932eb7..30e1d4f37 100644 --- a/src/video_decompress/gpujpeg.c +++ b/src/video_decompress/gpujpeg.c @@ -120,7 +120,7 @@ static int configure_with(struct state_decompress_gpujpeg *s, struct video_desc assert("Invalid codec!" && 0); } - if (cudaMalloc(&s->cuda_tmp_buf, desc.width * desc.height * 4) != cudaSuccess) { + if (cudaMalloc((void **) &s->cuda_tmp_buf, desc.width * desc.height * 4) != cudaSuccess) { log_msg(LOG_LEVEL_WARNING, "Cannot allocate CUDA buffer!\n"); } @@ -223,7 +223,6 @@ static decompress_status gpujpeg_decompress(void *state, unsigned char *dst, uns unsigned int src_len, int frame_seq, struct video_frame_callbacks *callbacks, codec_t *internal_codec, const int *pitches) { - assert(pitches == NULL); UNUSED(frame_seq); UNUSED(callbacks); struct state_decompress_gpujpeg *s = (struct state_decompress_gpujpeg *) state; @@ -256,6 +255,23 @@ static decompress_status gpujpeg_decompress(void *state, unsigned char *dst, uns log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D failed: %s!\n", cudaGetErrorString(cudaGetLastError())); } } + } else if (pitches != NULL) { + assert(s->out_codec == I420 || s->out_codec == CUDA_I420); + assert(s->cuda_tmp_buf != NULL); + gpujpeg_decoder_output_set_custom_cuda (&decoder_output, s->cuda_tmp_buf); + if (gpujpeg_decoder_decode(s->decoder, (uint8_t*) buffer, src_len, &decoder_output) != 0) { + return DECODER_NO_FRAME; + } + if (cudaMemcpy2D(dst, pitches[0], s->cuda_tmp_buf, s->desc.width, s->desc.width, s->desc.height, cudaMemcpyDefault) != cudaSuccess) { + log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D Y failed: %s!\n", cudaGetErrorString(cudaGetLastError())); + } + if (cudaMemcpy2D(dst + pitches[0] * s->desc.height, pitches[1], s->cuda_tmp_buf + s->desc.width * s->desc.height, s->desc.width / 2, s->desc.width / 2, s->desc.height / 2, cudaMemcpyDefault) != cudaSuccess) { + log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D Cb failed: %s!\n", cudaGetErrorString(cudaGetLastError())); + } + if (cudaMemcpy2D(dst + pitches[0] * s->desc.height + pitches[1] * s->desc.height / 2, pitches[2], s->cuda_tmp_buf + s->desc.width * s->desc.height + s->desc.width / 2 + s->desc.height / 2, s->desc.width / 2, s->desc.width / 2, s->desc.height / 2, cudaMemcpyDefault) != cudaSuccess) { + log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D Cr failed: %s!\n", cudaGetErrorString(cudaGetLastError())); + } + } else if (s->out_codec == CUDA_I420 || s->out_codec == CUDA_RGBA) { gpujpeg_decoder_output_set_custom_cuda (&decoder_output, dst); if (gpujpeg_decoder_decode(s->decoder, (uint8_t*) buffer, src_len, &decoder_output) != 0) {