diff --git a/src/audio/codec/libavcodec.cpp b/src/audio/codec/libavcodec.cpp index 521258d6b..19e39b194 100644 --- a/src/audio/codec/libavcodec.cpp +++ b/src/audio/codec/libavcodec.cpp @@ -435,6 +435,7 @@ static audio_channel *libavcodec_compress(void *state, audio_channel * channel) s->output_channel.data_len += pkt.size; av_packet_unref(&pkt); ret = avcodec_receive_packet(s->codec_ctx, &pkt); + s->output_channel.duration += s->codec_ctx->frame_size / (double) s->output_channel.sample_rate; } if (ret != AVERROR(EAGAIN) && ret != 0) { char errbuf[1024]; @@ -460,10 +461,7 @@ static audio_channel *libavcodec_compress(void *state, audio_channel * channel) } if(got_packet) { s->output_channel.data_len += pkt.size; - ///@ todo - /// well, this is wrong, denominator should be actually AVStream::time_base. Where do - /// we get this?? Anyway, seems like it equals sample rate. - s->output_channel.duration += pkt.duration / (double) s->output_channel.sample_rate; + s->output_channel.duration += s->codec_ctx->frame_size / (double) s->output_channel.sample_rate; } #endif offset += chunk_size; @@ -671,3 +669,4 @@ static const struct audio_compress_info libavcodec_audio_codec = { REGISTER_MODULE(libavcodec, &libavcodec_audio_codec, LIBRARY_CLASS_AUDIO_COMPRESS, AUDIO_COMPRESS_ABI_VERSION); +/* vim: set expandtab sw=8 : */ diff --git a/src/audio/types.h b/src/audio/types.h index 30cfa490d..5fef54f68 100644 --- a/src/audio/types.h +++ b/src/audio/types.h @@ -185,7 +185,7 @@ private: int sample_rate; std::vector channels; /* data should be at least 4B aligned */ audio_codec_t codec; - double duration; /// @note currently unused + double duration; ///< for compressed formats where this cannot be directly determined from samples/sample_rate }; #endif // __cplusplus diff --git a/src/audio/utils.cpp b/src/audio/utils.cpp index a9b192250..9b2c9eb6d 100644 --- a/src/audio/utils.cpp +++ b/src/audio/utils.cpp @@ -207,7 +207,7 @@ void demux_channel(char *out, char *in, int bps, int in_len, int in_stream_chann } } -void remux_channel(char *out, char *in, int bps, int in_len, int in_stream_channels, int out_stream_channels, int pos_in_stream, int pos_out_stream) +void remux_channel(char *out, const char *in, int bps, int in_len, int in_stream_channels, int out_stream_channels, int pos_in_stream, int pos_out_stream) { int samples = in_len / (in_stream_channels * bps); int i; diff --git a/src/audio/utils.h b/src/audio/utils.h index ab4b7a638..c5a641633 100644 --- a/src/audio/utils.h +++ b/src/audio/utils.h @@ -89,7 +89,7 @@ void copy_channel(char *out, const char *in, int bps, int in_len /* bytes */, in */ void mux_channel(char *out, const char *in, int bps, int in_len, int out_stream_channels, int chan_pos_stream, double scale); void demux_channel(char *out, char *in, int bps, int in_len, int in_stream_channels, int pos_in_stream); -void remux_channel(char *out, char *in, int bps, int in_len, int in_stream_channels, int out_stream_channels, int pos_in_stream, int pos_out_stream); +void remux_channel(char *out, const char *in, int bps, int in_len, int in_stream_channels, int out_stream_channels, int pos_in_stream, int pos_out_stream); void interleaved2noninterleaved(char *out, const char *in, int bps, int in_len /* bytes */, int channel_count); diff --git a/src/transmit.cpp b/src/transmit.cpp index 2c9480b2e..15c80090b 100644 --- a/src/transmit.cpp +++ b/src/transmit.cpp @@ -62,13 +62,14 @@ #include "config_win32.h" #endif // HAVE_CONFIG_H +#include "audio/audio.h" +#include "audio/codec.h" +#include "audio/utils.h" #include "crypto/random.h" #include "debug.h" #include "host.h" #include "lib_common.h" #include "perf.h" -#include "audio/audio.h" -#include "audio/codec.h" #include "crypto/openssl_encrypt.h" #include "module.h" #include "rtp/fec.h" @@ -102,11 +103,6 @@ #define DEFAULT_CIPHER_MODE MODE_AES128_CFB -// Mulaw audio memory reservation -#define BUFFER_MTU_SIZE 1500 -static char *data_buffer_mulaw; -static int buffer_mulaw_init = 0; - static void tx_update(struct tx *tx, struct video_frame *frame, int substream); static void tx_done(struct module *tx); static uint32_t format_interl_fps_hdr_row(enum interlacing_t interlacing, double input_fps); @@ -147,16 +143,9 @@ struct tx { long long int bitrate; struct rtpenc_h264_state *rtpenc_h264_state; + char tmp_packet[RTP_MAX_MTU]; }; -// Mulaw audio memory reservation -static void init_tx_mulaw_buffer() { - if (!buffer_mulaw_init) { - data_buffer_mulaw = (char *) malloc(BUFFER_MTU_SIZE*20); - buffer_mulaw_init = 1; - } -} - static void tx_update(struct tx *tx, struct video_frame *frame, int substream) { if(!frame) { @@ -876,7 +865,7 @@ void audio_tx_send(struct tx* tx, struct rtp *rtp_session, const audio_frame2 * tx->buffer ++; } -/* +/** * audio_tx_send_standard - Send interleaved channels from the audio_frame2, * as the mulaw and A-law standards (dynamic or std PT). */ @@ -908,47 +897,50 @@ void audio_tx_send_standard(struct tx* tx, struct rtp *rtp_session, assert(buffer->get_data_len(0) == buffer->get_data_len(i)); int data_len = buffer->get_data_len(0) * buffer->get_channel_count(); /* Number of samples to send */ - int payload_size = tx->mtu - 40; /* Max size of an RTP payload field */ + int payload_size = tx->mtu - 40 - 8 - 12; /* Max size of an RTP payload field (minus IPv6, UDP and RTP header lengths) */ - init_tx_mulaw_buffer(); - char *curr_sample = data_buffer_mulaw; - int ch, pos = 0, count = 0, pointerToSend = 0; + if (buffer->get_codec() == AC_OPUS) { // OPUS needs to fit one package + if (payload_size < data_len) { + log_msg(LOG_LEVEL_ERROR, "Transmit: OPUS frame larger than packet! Discarding...\n"); + return; + } + } else { // we may split the data into more packets, compute chunk size + int frame_size = buffer->get_channel_count() * buffer->get_bps(); + payload_size = payload_size / frame_size * frame_size; // align to frame size + } + int pos = 0; do { - for (ch = 0; ch < buffer->get_channel_count(); ch++) { - memcpy(curr_sample, buffer->get_data(ch) + pos, - buffer->get_bps() * sizeof(char)); - curr_sample += buffer->get_bps() * sizeof(char); - count += buffer->get_bps() * sizeof(char); - } - pos += buffer->get_bps() * sizeof(char); + int pkt_len = std::min(payload_size, data_len - pos); - if ((pos * buffer->get_channel_count()) % payload_size == 0) { - // Update first sample timestamp - ts = get_std_audio_local_mediatime((double)payload_size / (double)buffer->get_channel_count()); - gettimeofday(&curr_time, NULL); - rtp_send_ctrl(rtp_session, ts_prev, 0, curr_time); //send RTCP SR - ts_prev = ts; - // Send the packet - rtp_send_data(rtp_session, ts, pt, 0, 0, /* contributing sources */ - 0, /* contributing sources length */ - data_buffer_mulaw + pointerToSend, payload_size, 0, 0, 0); - pointerToSend += payload_size; - } - } while (count < data_len); + // interleave + if (buffer->get_codec() == AC_OPUS) { + assert(buffer->get_channel_count() == 1); // we cannot interleave OPUS here + memcpy(tx->tmp_packet, buffer->get_data(0), pkt_len); + } else { + for (int ch = 0; ch < buffer->get_channel_count(); ch++) { + remux_channel(tx->tmp_packet, buffer->get_data(ch) + pos / buffer->get_channel_count(), buffer->get_bps(), pkt_len / buffer->get_channel_count(), 1, buffer->get_channel_count(), 0, ch); + } + } - if ((pos * buffer->get_channel_count()) % payload_size != 0) { - // Update first sample timestamp - ts = get_std_audio_local_mediatime((double)((pos * buffer->get_channel_count()) % payload_size) / (double)buffer->get_channel_count()); - gettimeofday(&curr_time, NULL); - rtp_send_ctrl(rtp_session, ts_prev, 0, curr_time); //send RTCP SR - ts_prev = ts; - // Send the packet - rtp_send_data(rtp_session, ts, pt, 0, 0, /* contributing sources */ - 0, /* contributing sources length */ - data_buffer_mulaw + pointerToSend, - (pos * buffer->get_channel_count()) % payload_size, 0, 0, 0); - } + // Update first sample timestamp + if (buffer->get_codec() == AC_OPUS) { + /* OPUS packet will be the whole contained in one packet + * according to RFC 7587. For PCMA/PCMU there may be more + * packets so we cannot use the whole frame duration. */ + ts = get_std_audio_local_mediatime(buffer->get_duration(), 48000); + } else { + ts = get_std_audio_local_mediatime((double) pkt_len / (double) buffer->get_channel_count() / (double) buffer->get_sample_rate(), buffer->get_sample_rate()); + } + gettimeofday(&curr_time, NULL); + rtp_send_ctrl(rtp_session, ts_prev, 0, curr_time); //send RTCP SR + ts_prev = ts; + // Send the packet + rtp_send_data(rtp_session, ts, pt, 0, 0, /* contributing sources */ + 0, /* contributing sources length */ + tx->tmp_packet, pkt_len, 0, 0, 0); + pos += pkt_len; + } while (pos < data_len); } /** diff --git a/src/tv.c b/src/tv.c index 530000166..b9dcc6177 100644 --- a/src/tv.c +++ b/src/tv.c @@ -148,7 +148,11 @@ typedef struct { //shared struct for audio and video streams (sync.) std_time_struct standard_time = { true, 0, { 0, 0 }, 25, { 0, 0 }, { 0, 0 } }; -uint32_t get_std_audio_local_mediatime(double samples) +/** + * @param samples number of samples in unit of seconds + * @param rate RTP timestamp scale (usually sample rate, but for OPUS always 48000) + */ +uint32_t get_std_audio_local_mediatime(double samples, int rate) { if (standard_time.init) { gettimeofday(&standard_time.start_time, NULL); @@ -164,7 +168,7 @@ uint32_t get_std_audio_local_mediatime(double samples) tv_add(&standard_time.atime, samples); } - return (double)standard_time.atime.tv_sec + (((double)standard_time.atime.tv_usec) / 1000000.0); + return ((double)standard_time.atime.tv_sec + (((double)standard_time.atime.tv_usec) / 1000000.0)) * rate; } uint32_t get_std_video_local_mediatime(void) diff --git a/src/tv.h b/src/tv.h index 9740f3761..442184560 100644 --- a/src/tv.h +++ b/src/tv.h @@ -54,7 +54,7 @@ uint32_t tv_diff_usec(struct timeval curr_time, struct timeval prev_time); void tv_add(struct timeval *ts, double offset_secs); void tv_add_usec(struct timeval *ts, double offset); int tv_gt(struct timeval a, struct timeval b); -uint32_t get_std_audio_local_mediatime(double samples); +uint32_t get_std_audio_local_mediatime(double samples, int rate); uint32_t get_std_video_local_mediatime(void); #ifdef __cplusplus diff --git a/src/video_rxtx/h264_sdp.cpp b/src/video_rxtx/h264_sdp.cpp index 34b8cf4ae..a1db8b718 100644 --- a/src/video_rxtx/h264_sdp.cpp +++ b/src/video_rxtx/h264_sdp.cpp @@ -50,6 +50,7 @@ #include "lib_common.h" #include "transmit.h" #include "rtp/rtp.h" +#include "rtp/rtp_callback.h" // PCMA/PCMU packet types #include "rtp/rtpenc_h264.h" #include "utils/sdp.h" #include "video_rxtx.h" @@ -65,24 +66,28 @@ h264_sdp_video_rxtx::h264_sdp_video_rxtx(std::map const &p m_sdp = new_sdp(std_H264, params.at("tx_port").i); if (params.at("a_tx_port").i) { new_stream(m_sdp); - sprintf(m_sdp->stream[1].media_info, "m=audio %d RTP/AVP 97\n", params.at("a_tx_port").i); - const char *audio_codec = NULL; - switch (params.at("audio_codec").l) { - case AC_ALAW: - audio_codec = "PCMA"; - break; - case AC_MULAW: - audio_codec = "PCMU"; - break; - case AC_OPUS: - audio_codec = "OPUS"; - break; + if (params.at("audio_sample_rate").i == 8000 && params.at("audio_channels").i == 1 && (params.at("audio_codec").l == AC_ALAW || params.at("audio_codec").l == AC_MULAW)) { + sprintf(m_sdp->stream[1].media_info, "m=audio %d RTP/AVP %d\n", params.at("a_tx_port").i, params.at("audio_codec").l == AC_MULAW ? PT_ITU_T_G711_PCMU : PT_ITU_T_G711_PCMA); + } else { + sprintf(m_sdp->stream[1].media_info, "m=audio %d RTP/AVP 97\n", params.at("a_tx_port").i); + const char *audio_codec = NULL; + switch (params.at("audio_codec").l) { + case AC_ALAW: + audio_codec = "PCMA"; + break; + case AC_MULAW: + audio_codec = "PCMU"; + break; + case AC_OPUS: + audio_codec = "OPUS"; + break; + } + + assert(audio_codec); + + sprintf(m_sdp->stream[1].rtpmap, "a=rtpmap:97 %s/%i/%i", audio_codec, + params.at("audio_codec").l == AC_OPUS ? 48000 : params.at("audio_sample_rate").i, params.at("audio_channels").i); } - - assert(audio_codec); - - sprintf(m_sdp->stream[1].rtpmap, "a=rtpmap:97 %s/%i/%i", audio_codec, - params.at("audio_sample_rate").i, params.at("audio_channels").i); } if (m_sdp == NULL) { throw string("[SDP] SDP creation failed\n");