GPUJPEG: accept pitches

This commit is contained in:
Martin Pulec
2021-04-29 09:30:36 +02:00
parent 842aea8887
commit fcbbae0420
2 changed files with 40 additions and 2 deletions

View File

@@ -589,6 +589,7 @@ struct decompress_data {
decompress_status ret = DECODER_NO_FRAME;
unsigned char *out;
codec_t internal_codec; // set only if probing (ret == DECODER_GOT_CODEC)
const int *pitches;
};
static void *decompress_worker(void *data)
{
@@ -607,6 +608,23 @@ static void *decompress_worker(void *data)
return d;
}
static int *get_pitches(struct video_frame *f, int *pitches) {
if (f->render_packet.dx_row_pitch == 0) {
return nullptr;
}
if (f->color_spec == I420 || f->color_spec == CUDA_I420) {
// pitches defined but default
if (f->render_packet.dx_row_pitch == f->tiles[0].width
&& f->render_packet.dx_row_pitch_uv == f->tiles[0].width / 2) {
return nullptr;
}
}
pitches[0] = f->render_packet.dx_row_pitch;
pitches[1] =
pitches[2] = f->render_packet.dx_row_pitch_uv;
return pitches;
}
ADD_TO_PARAM("decoder-drop-policy",
"* decoder-drop-policy=blocking|nonblock\n"
" Force specified blocking policy (default nonblock).\n");
@@ -647,6 +665,9 @@ static void *decompress_thread(void *args) {
tmp = unique_ptr<char[]>(new char[tile_height * (tile_width * MAX_BPS + MAX_PADDING)]);
}
int pitch_buf[4] = { };
int *pitches = get_pitches(msg->nofec_frame, pitch_buf);
if(decoder->decoder_type == EXTERNAL_DECODER) {
int tile_count = get_video_mode_tiles_x(decoder->video_mode) *
get_video_mode_tiles_y(decoder->video_mode);
@@ -654,6 +675,7 @@ static void *decompress_thread(void *args) {
vector<decompress_data> data(tile_count);
for (int pos = 0; pos < tile_count; ++pos) {
data[pos].decoder = decoder;
data[pos].pitches = pitches;
data[pos].pos = pos;
data[pos].compressed = msg->nofec_frame;
data[pos].buffer_num = msg->buffer_num[pos];

View File

@@ -120,7 +120,7 @@ static int configure_with(struct state_decompress_gpujpeg *s, struct video_desc
assert("Invalid codec!" && 0);
}
if (cudaMalloc(&s->cuda_tmp_buf, desc.width * desc.height * 4) != cudaSuccess) {
if (cudaMalloc((void **) &s->cuda_tmp_buf, desc.width * desc.height * 4) != cudaSuccess) {
log_msg(LOG_LEVEL_WARNING, "Cannot allocate CUDA buffer!\n");
}
@@ -223,7 +223,6 @@ static decompress_status gpujpeg_decompress(void *state, unsigned char *dst, uns
unsigned int src_len, int frame_seq, struct video_frame_callbacks *callbacks, codec_t *internal_codec,
const int *pitches)
{
assert(pitches == NULL);
UNUSED(frame_seq);
UNUSED(callbacks);
struct state_decompress_gpujpeg *s = (struct state_decompress_gpujpeg *) state;
@@ -256,6 +255,23 @@ static decompress_status gpujpeg_decompress(void *state, unsigned char *dst, uns
log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D failed: %s!\n", cudaGetErrorString(cudaGetLastError()));
}
}
} else if (pitches != NULL) {
assert(s->out_codec == I420 || s->out_codec == CUDA_I420);
assert(s->cuda_tmp_buf != NULL);
gpujpeg_decoder_output_set_custom_cuda (&decoder_output, s->cuda_tmp_buf);
if (gpujpeg_decoder_decode(s->decoder, (uint8_t*) buffer, src_len, &decoder_output) != 0) {
return DECODER_NO_FRAME;
}
if (cudaMemcpy2D(dst, pitches[0], s->cuda_tmp_buf, s->desc.width, s->desc.width, s->desc.height, cudaMemcpyDefault) != cudaSuccess) {
log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D Y failed: %s!\n", cudaGetErrorString(cudaGetLastError()));
}
if (cudaMemcpy2D(dst + pitches[0] * s->desc.height, pitches[1], s->cuda_tmp_buf + s->desc.width * s->desc.height, s->desc.width / 2, s->desc.width / 2, s->desc.height / 2, cudaMemcpyDefault) != cudaSuccess) {
log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D Cb failed: %s!\n", cudaGetErrorString(cudaGetLastError()));
}
if (cudaMemcpy2D(dst + pitches[0] * s->desc.height + pitches[1] * s->desc.height / 2, pitches[2], s->cuda_tmp_buf + s->desc.width * s->desc.height + s->desc.width / 2 + s->desc.height / 2, s->desc.width / 2, s->desc.width / 2, s->desc.height / 2, cudaMemcpyDefault) != cudaSuccess) {
log_msg(LOG_LEVEL_WARNING, MOD_NAME "cudaMemcpy2D Cr failed: %s!\n", cudaGetErrorString(cudaGetLastError()));
}
} else if (s->out_codec == CUDA_I420 || s->out_codec == CUDA_RGBA) {
gpujpeg_decoder_output_set_custom_cuda (&decoder_output, dst);
if (gpujpeg_decoder_decode(s->decoder, (uint8_t*) buffer, src_len, &decoder_output) != 0) {