vidcap file: improved A/V synchronization

fixes GH-322
This commit is contained in:
Martin Pulec
2023-06-15 11:22:06 +02:00
parent 7cf8b42235
commit bb3ff5e165
5 changed files with 186 additions and 12 deletions

View File

@@ -166,6 +166,7 @@ COMMON_OBJS = \
src/utils/fs.o \
src/utils/jpeg_reader.o \
src/utils/list.o \
src/utils/math.o \
src/utils/misc.o \
src/utils/nat.o \
src/utils/net.o \

View File

@@ -289,8 +289,11 @@ struct video_frame {
struct fec_desc fec_params;
uint32_t ssrc;
uint32_t seq; ///< sequential number, used internally by JPEG encoder
uint32_t timecode; ///< BCD timecode (hours, minutes, seconds, frame number)
uint32_t seq; ///< seq num, used internally by JPEG enc, file cap
union {
uint32_t timecode; ///< BCD timecode (hrs, min, sec, frm num)
uint32_t duration;
};
uint64_t compress_start; ///< in ms from epoch
uint64_t compress_end; ///< in ms from epoch
unsigned int paused_play:1;

64
src/utils/math.c Normal file
View File

@@ -0,0 +1,64 @@
/**
* @file utils/math.c
* @author Martin Pulec <martin.pulec@cesnet.cz>
*/
/*
* Copyright (c) 2022-2023 CESNET, z. s. p. o.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of CESNET nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "math.h"
long long gcd(long long a, long long b)
{
// Everything divides 0
if (a == 0) {
return b;
}
if (b == 0) {
return a;
}
// base case
if (a == b) {
return a;
}
// a is greater
if (a > b) {
return gcd(a-b, b);
}
return gcd(a, b-a);
}
long long lcm(long long a, long long b) {
return a * b / gcd(a, b);
}

53
src/utils/math.h Normal file
View File

@@ -0,0 +1,53 @@
/**
* @file utils/math.h
* @author Martin Pulec <martin.pulec@cesnet.cz>
*/
/*
* Copyright (c) 2022-2023 CESNET, z. s. p. o.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of CESNET nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTILS_MATH_H_7D3DA851_76A8_4022_B14B_8CCEFEF580B4
#define UTILS_MATH_H_7D3DA851_76A8_4022_B14B_8CCEFEF580B4
#ifdef __cplusplus
extern "C" {
#endif
long long gcd(long long a, long long b);
long long lcm(long long a, long long b);
#ifdef __cplusplus
}
#endif
#endif // ! defined UTILS_MATH_H_7D3DA851_76A8_4022_B14B_8CCEFEF580B4

View File

@@ -77,6 +77,7 @@
#include "utils/fs.h"
#include "utils/list.h"
#include "utils/macros.h"
#include "utils/math.h"
#include "utils/ring_buffer.h"
#include "utils/time.h"
#include "utils/thread.h"
@@ -116,6 +117,7 @@ struct vidcap_state_lavf_decoder {
struct simple_linked_list *video_frame_queue;
int max_queue_len;
struct ring_buffer *audio_data;
int64_t audio_start_ts;
pthread_mutex_t audio_frame_lock;
pthread_t thread_id;
@@ -210,6 +212,9 @@ static void vidcap_file_write_audio(struct vidcap_state_lavf_decoder *s, AVFrame
}
pthread_mutex_lock(&s->audio_frame_lock);
if (ring_get_current_size(s->audio_data) == 0) {
s->audio_start_ts = frame->pts;
}
if (av_sample_fmt_is_planar(s->aud_ctx->sample_fmt)) {
int bps = av_get_bytes_per_sample(s->aud_ctx->sample_fmt);
char tmp[plane_count * bps * frame->nb_samples];
@@ -314,6 +319,12 @@ static struct video_frame *process_video_pkt(struct vidcap_state_lavf_decoder *s
frame->linesize, 0, frame->height, dst,
video_dst_linesize);
}
out->seq = frame->pts < 0 ? UINT32_MAX : MIN(frame->pts, UINT32_MAX);
#ifdef FF_API_PKT_DURATION
out->duration = frame->duration;
#else
out->duration = frame->pkt_duration;
#endif
out->callbacks.dispose = vf_free;
return out;
}
@@ -741,7 +752,11 @@ static void vidcap_file_dispose_audio(struct audio_frame *f) {
free(f);
}
static struct audio_frame *get_audio(struct vidcap_state_lavf_decoder *s, double video_fps) {
static struct audio_frame *get_audio(struct vidcap_state_lavf_decoder *s,
const struct video_frame *vid_frm) {
if (vid_frm == NULL) {
return NULL;
}
pthread_mutex_lock(&s->audio_frame_lock);
if (ring_get_current_size(s->audio_data) == 0) {
pthread_mutex_unlock(&s->audio_frame_lock);
@@ -751,17 +766,55 @@ static struct audio_frame *get_audio(struct vidcap_state_lavf_decoder *s, double
struct audio_frame *ret = (struct audio_frame *) malloc(sizeof(struct audio_frame));
audio_frame_write_desc(ret, s->audio_desc);
// capture more data to ensure the buffer won't grow - it is capped with actually read
// data, still. Moreover there number of audio samples per video frame period may not
// be integer. It shouldn't be much, however, not to confuse adaptible audio buffer.
ret->max_size =
(int)(AUDIO_RATIO * s->audio_desc.sample_rate / video_fps) *
s->audio_desc.bps * s->audio_desc.ch_count;
AVRational atb = s->fmt_ctx->streams[s->audio_stream_idx]->time_base;
if (vid_frm->seq == UINT32_MAX) {
log_msg_once(LOG_LEVEL_WARNING, 0x292B168B,
MOD_NAME "Cannot get video PTS or too high!\n");
// capture more data to ensure the buffer won't grow - it is
// capped with actually read data, still. Moreover there
// number of audio samples per video frame period may not be
// integer. It shouldn't be much, however, not to confuse
// adaptible audio buffer.
ret->max_size = (int)(AUDIO_RATIO * s->audio_desc.sample_rate /
vid_frm->fps) *
s->audio_desc.bps * s->audio_desc.ch_count;
} else {
AVRational vtb =
s->fmt_ctx->streams[s->video_stream_idx]->time_base;
int64_t apts_end =
(((int64_t)vid_frm->seq + vid_frm->duration) *
(int64_t)vtb.num * atb.den +
((int64_t)vtb.den * atb.num - 1)) /
((int64_t)vtb.den * atb.num);
const int64_t l = lcm(s->audio_desc.sample_rate, atb.den);
const int64_t sample_alignment_tb = atb.num * (l / atb.den);
const int64_t samples_aligned_tb =
(apts_end - s->audio_start_ts + sample_alignment_tb + 1) /
sample_alignment_tb * sample_alignment_tb;
const int64_t samples =
samples_aligned_tb *
((int64_t)s->audio_desc.sample_rate * atb.num) / atb.den;
ret->max_size =
samples * s->audio_desc.bps * s->audio_desc.ch_count;
if (ret->max_size <= 0) { // seek - have new audio but old video
free(ret);
pthread_mutex_unlock(&s->audio_frame_lock);
return NULL;
}
}
ret->data = (char *)malloc(ret->max_size);
ret->data_len =
ring_buffer_read(s->audio_data, ret->data, ret->max_size);
ret->dispose = vidcap_file_dispose_audio;
int64_t samples_written =
ret->data_len / (s->audio_desc.bps * s->audio_desc.ch_count);
s->audio_start_ts += samples_written * atb.den /
((int64_t)s->audio_desc.sample_rate * atb.num);
if (ret->data_len == 0) {
vidcap_file_dispose_audio(ret);
ret = NULL;
} else {
ret->dispose = vidcap_file_dispose_audio;
}
pthread_mutex_unlock(&s->audio_frame_lock);
return ret;
@@ -784,7 +837,7 @@ static struct video_frame *vidcap_file_grab(void *state, struct audio_frame **au
pthread_mutex_unlock(&s->lock);
pthread_cond_signal(&s->frame_consumed);
*audio = s->audio_stream_idx != -1 ? get_audio(s, out->fps) : NULL;
*audio = s->audio_stream_idx != -1 ? get_audio(s, out) : NULL;
struct timeval t;
do {