From 899ca0b56f6729971cf89263f25bcd683bc83bb0 Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Thu, 23 Jul 2015 19:40:12 +0200 Subject: [PATCH] Audio sender: don't auto resample to 48000 + some rework - audio_frame2 has now methods for changing bps and resampling --- src/audio/audio.cpp | 128 ++++++--------------------------- src/audio/audio.h | 29 ++++++++ src/audio/codec.cpp | 10 ++- src/audio/codec.h | 2 - src/audio/codec/dummy_pcm.c | 1 - src/audio/codec/libavcodec.cpp | 2 - src/audio/utils.cpp | 102 +++++++++++++++++++++++++- 7 files changed, 155 insertions(+), 119 deletions(-) diff --git a/src/audio/audio.cpp b/src/audio/audio.cpp index 4ca308b8c..522c54e6a 100644 --- a/src/audio/audio.cpp +++ b/src/audio/audio.cpp @@ -52,7 +52,6 @@ #include "config_win32.h" #endif -#include #include #include #include @@ -242,7 +241,7 @@ struct state_audio * audio_cfg_init(struct module *parent, const char *addrs, in return NULL; } - s = (struct state_audio *) calloc(1, sizeof(struct state_audio)); + s = new state_audio(); s->start_time = *start_time; if (strcmp("none", send_cfg) == 0 && strcmp("none", recv_cfg) == 0) { @@ -285,7 +284,7 @@ struct state_audio * audio_cfg_init(struct module *parent, const char *addrs, in goto error; #else fprintf(stderr, "Speex not compiled in. Could not enable echo cancellation.\n"); - free(s); + delete s; goto error; #endif /* HAVE_SPEEX */ } else { @@ -449,7 +448,7 @@ error: delete s->captured; audio_codec_done(s->audio_coder); - free(s); + delete s; exit_uv(1); return NULL; } @@ -499,7 +498,7 @@ void audio_done(struct state_audio *s) delete s->captured; - free(s); + delete s; } } @@ -728,82 +727,6 @@ echo_play(s->echo_state, &pbuf_data.buffer); return NULL; } -struct state_resample { - struct audio_frame resampled; - char *resample_buffer; - SpeexResamplerState *resampler; - int resample_from, resample_ch_count; - int resample_to; - const int *codec_supported_bytes_per_sample; -}; - -static void resample(struct state_resample *s, struct audio_frame *buffer); -static bool set_contains(const int *vals, int needle); - -static bool set_contains(const int *vals, int needle) -{ - if(!vals) - return true; - while(*vals != 0) { - if(*vals == needle) { - return true; - } - ++vals; - } - return false; -} - -static void resample(struct state_resample *s, struct audio_frame *buffer) -{ - memcpy(&s->resampled, buffer, sizeof(struct audio_frame)); - - if (buffer->sample_rate == s->resample_to && - set_contains(s->codec_supported_bytes_per_sample, buffer->bps)) { - memcpy(&s->resampled, buffer, sizeof(s->resampled)); - s->resampled.data = (char *) malloc(buffer->data_len); - memcpy(s->resampled.data, buffer->data, buffer->data_len); - } else { - // resampler is able only to resample 16-bit samples - assert(set_contains(s->codec_supported_bytes_per_sample, 2)); - // expect that we may got as much as 12-times more data (eg. 8 kHz to 96 kHz) - uint32_t write_frames = 12 * (buffer->data_len / buffer->ch_count / buffer->bps); - s->resampled.data = (char *) malloc(write_frames * 2 * buffer->ch_count); - if(s->resample_from != buffer->sample_rate || s->resample_ch_count != buffer->ch_count) { - s->resample_from = buffer->sample_rate; - s->resample_ch_count = buffer->ch_count; - if(s->resampler) { - speex_resampler_destroy(s->resampler); - } - int err; - s->resampler = speex_resampler_init(buffer->ch_count, s->resample_from, - s->resample_to, 10, &err); - if(err) { - abort(); - } - } - char *in_buf; - int data_len; - if(buffer->bps != 2) { - change_bps(s->resample_buffer, 2, buffer->data, buffer->bps, buffer->data_len); - in_buf = s->resample_buffer; - data_len = buffer->data_len / buffer->bps * 2; - } else { - in_buf = buffer->data; - data_len = buffer->data_len; - } - - uint32_t in_frames = data_len / buffer->ch_count / 2; - uint32_t in_frames_orig = in_frames; - speex_resampler_process_interleaved_int(s->resampler, (spx_int16_t *)(void *) in_buf, &in_frames, - (spx_int16_t *)(void *) s->resampled.data, &write_frames); - assert (in_frames == in_frames_orig); - - s->resampled.data_len = write_frames * 2 /* bps */ * buffer->ch_count; - s->resampled.sample_rate = s->resample_to; - s->resampled.bps = 2; - } -} - static void audio_sender_process_message(struct state_audio *s, struct msg_sender *msg) { assert(s->audio_tx_mode == MODE_SENDER); @@ -876,14 +799,8 @@ static void *audio_sender_thread(void *arg) { struct state_audio *s = (struct state_audio *) arg; struct audio_frame *buffer = NULL; - struct state_resample resample_state; + audio_frame2_resampler resampler_state; - memset(&resample_state, 0, sizeof(resample_state)); - resample_state.resample_to = s->resample_to; - resample_state.resample_buffer = (char *) malloc(1024 * 1024); - resample_state.codec_supported_bytes_per_sample = - audio_codec_get_supported_bps(s->audio_coder); - printf("Audio sending started.\n"); while (!should_exit_audio) { struct message *msg; @@ -920,27 +837,29 @@ static void *audio_sender_thread(void *arg) if (s->paused) { continue; } - audio_frame2 buffer_new; + + audio_frame2 bf_n(buffer); + + // RESAMPLE + if (s->resample_to != 0 && bf_n.get_sample_rate() != s->resample_to) { + if (bf_n.get_bps() != 2) { + bf_n.change_bps(2); + } + + bf_n.resample(resampler_state, s->resample_to); + } + // COMPRESS + process_statistics(s, &bf_n); + // SEND if(s->sender == NET_NATIVE) { - // RESAMPLE - resample(&resample_state, buffer); - // COMPRESS - buffer_new = audio_frame2(&resample_state.resampled); - process_statistics(s, &buffer_new); - free(resample_state.resampled.data); - audio_frame2 *uncompressed = &buffer_new; + audio_frame2 *uncompressed = &bf_n; const audio_frame2 *compressed = NULL; while((compressed = audio_codec_compress(s->audio_coder, uncompressed))) { audio_tx_send(s->tx_session, s->audio_network_device, compressed); uncompressed = NULL; } }else if(s->sender == NET_STANDARD){ - // RESAMPLE - resample(&resample_state, buffer); - // COMPRESS - buffer_new = audio_frame2(&resample_state.resampled); - free(resample_state.resampled.data); - audio_frame2 *uncompressed = &buffer_new; + audio_frame2 *uncompressed = &bf_n; const audio_frame2 *compressed = NULL; while((compressed = audio_codec_compress(s->audio_coder, uncompressed))) { //TODO to be dynamic as a function of the selected codec, now only accepting mulaw without checking errors @@ -955,11 +874,6 @@ static void *audio_sender_thread(void *arg) } } - if(resample_state.resampler) { - speex_resampler_destroy(resample_state.resampler); - } - free(resample_state.resample_buffer); - return NULL; } diff --git a/src/audio/audio.h b/src/audio/audio.h index 7dd0e29b2..250672313 100644 --- a/src/audio/audio.h +++ b/src/audio/audio.h @@ -111,6 +111,21 @@ struct module; #include #include +class audio_frame2; + +class audio_frame2_resampler { +public: + audio_frame2_resampler(); + ~audio_frame2_resampler(); +private: + void *resampler; // type is (SpeexResamplerState *) + int resample_from; + size_t resample_ch_count; + int resample_to; + + friend class audio_frame2; +}; + class audio_frame2 { public: @@ -135,6 +150,20 @@ public: bool has_same_prop_as(audio_frame2 const &frame) const; void set_duration(double duration); static audio_frame2 copy_with_bps_change(audio_frame2 const &frame, int new_bps); + void change_bps(int new_bps); + /** + * @note + * bps of the frame needs to be 16 bits! + * + * @param resampler_state opaque state that can holds resampler that dosn't need + * to be reinitalized during calls on various audio frames. + * It reinitializes itself when needed (when source or new + * sample rate changes). Therefore, it is very recommended + * to use it only in a stream that may change sometimes but + * do not eg. share it between two streams that has different + * properties. + */ + void resample(audio_frame2_resampler &resampler_state, int new_sample_rate); private: int bps; /* bytes per sample */ int sample_rate; diff --git a/src/audio/codec.cpp b/src/audio/codec.cpp index 7109a75f3..f9fbf1df4 100644 --- a/src/audio/codec.cpp +++ b/src/audio/codec.cpp @@ -343,11 +343,6 @@ void audio_codec_done(struct audio_codec_state *s) free(s); } -const int *audio_codec_get_supported_bps(struct audio_codec_state *s) -{ - return audio_codecs[s->index]->supported_bytes_per_second; -} - audio_codec_t get_audio_codec(const char *codec_str) { char *codec = strdup(codec_str); if (strchr(codec, ':')) { @@ -383,6 +378,9 @@ static char *get_val_from_cfg(const char *audio_codec_cfg, const char *key) return NULL; } +/** + * @returns user specified sample rate or 0 if unspecified + */ int get_audio_codec_sample_rate(const char *audio_codec_cfg) { char *val = get_val_from_cfg(audio_codec_cfg, "sample_rate="); @@ -391,7 +389,7 @@ int get_audio_codec_sample_rate(const char *audio_codec_cfg) free(val); return ret; } else { - return 48000; + return 0; } } diff --git a/src/audio/codec.h b/src/audio/codec.h index 619495b25..781a5d1c6 100644 --- a/src/audio/codec.h +++ b/src/audio/codec.h @@ -59,7 +59,6 @@ typedef enum { struct audio_codec { const audio_codec_t *supported_codecs; - const int *supported_bytes_per_second; void *(*init)(audio_codec_t, audio_codec_direction_t, bool, int bitrate); audio_channel *(*compress)(void *, audio_channel *); audio_channel *(*decompress)(void *, audio_channel *); @@ -84,7 +83,6 @@ struct audio_codec_state *audio_codec_reconfigure(struct audio_codec_state *old, audio_codec_t audio_codec, audio_codec_direction_t); const audio_frame2 *audio_codec_compress(struct audio_codec_state *, const audio_frame2 *); audio_frame2 *audio_codec_decompress(struct audio_codec_state *, audio_frame2 *); -const int *audio_codec_get_supported_bps(struct audio_codec_state *); void audio_codec_done(struct audio_codec_state *); void list_audio_codecs(void); diff --git a/src/audio/codec/dummy_pcm.c b/src/audio/codec/dummy_pcm.c index 45abce13b..e2965b9f1 100644 --- a/src/audio/codec/dummy_pcm.c +++ b/src/audio/codec/dummy_pcm.c @@ -107,7 +107,6 @@ static void dummy_pcm_done(void *state) struct audio_codec dummy_pcm_audio_codec = { .supported_codecs = (audio_codec_t[]){ AC_PCM, AC_NONE }, - .supported_bytes_per_second = NULL, .init = dummy_pcm_init, .compress = dummy_pcm_compress, .decompress = dummy_pcm_decompress, diff --git a/src/audio/codec/libavcodec.cpp b/src/audio/codec/libavcodec.cpp index d124f829e..88731d3b8 100644 --- a/src/audio/codec/libavcodec.cpp +++ b/src/audio/codec/libavcodec.cpp @@ -539,11 +539,9 @@ static void libavcodec_done(void *state) } static audio_codec_t supported_codecs[] = { AC_ALAW, AC_MULAW, AC_SPEEX, AC_OPUS, AC_G722, AC_FLAC, AC_MP3, AC_AAC, AC_NONE }; -static int supported_bytes_per_second[] = { 1, 2, 3, 4, 0 }; struct audio_codec libavcodec_audio_codec = { supported_codecs, - supported_bytes_per_second, libavcodec_init, libavcodec_compress, libavcodec_decompress, diff --git a/src/audio/utils.cpp b/src/audio/utils.cpp index 50ea4c9bb..5e9e88028 100644 --- a/src/audio/utils.cpp +++ b/src/audio/utils.cpp @@ -45,9 +45,11 @@ #include "audio/audio.h" #include "audio/codec.h" #include "audio/utils.h" +#include "debug.h" #include #include #include +#include #include #include @@ -59,6 +61,17 @@ using namespace std; +audio_frame2_resampler::audio_frame2_resampler() : resampler(nullptr), resample_from(0), + resample_ch_count(0), resample_to(0) +{ +} + +audio_frame2_resampler::~audio_frame2_resampler() { + if (resampler) { + speex_resampler_destroy((SpeexResamplerState *) resampler); + } +} + /** * @brief Creates empty audio_frame2 */ @@ -237,13 +250,100 @@ audio_frame2 audio_frame2::copy_with_bps_change(audio_frame2 const &frame, int n for (size_t i = 0; i < ret.channels.size(); i++) { ret.channels[i].second = frame.get_data_len(i) / frame.get_bps() * new_bps; ret.channels[i].first = unique_ptr(new char[ret.channels[i].second]); - change_bps(ret.channels[i].first.get(), new_bps, frame.get_data(i), frame.get_bps(), + ::change_bps(ret.channels[i].first.get(), new_bps, frame.get_data(i), frame.get_bps(), frame.get_data_len(i)); } return ret; } +void audio_frame2::change_bps(int new_bps) +{ + if (new_bps == bps) { + return; + } + + std::vector, size_t> > new_channels(channels.size()); + + for (size_t i = 0; i < channels.size(); i++) { + size_t new_size = channels[i].second / bps * new_bps; + new_channels[i] = make_pair(unique_ptr(new char[new_size]), new_size); + } + + for (size_t i = 0; i < channels.size(); i++) { + ::change_bps(new_channels[i].first.get(), new_bps, get_data(i), get_bps(), + get_data_len(i)); + } + + bps = new_bps; + channels = move(new_channels); +} + +void audio_frame2::resample(audio_frame2_resampler & resampler_state, int new_sample_rate) +{ + if (new_sample_rate == sample_rate) { + return; + } + + /// @todo + /// speex supports also floats so there could be possibility also to add support for more bps + if (bps != 2) { + throw logic_error("Only 16 bits per sample are currently for resamling supported!"); + } + + std::vector, size_t> > new_channels(channels.size()); + + if (sample_rate != resampler_state.resample_from || new_sample_rate != resampler_state.resample_to || channels.size() != resampler_state.resample_ch_count) { + if (resampler_state.resampler) { + speex_resampler_destroy((SpeexResamplerState *) resampler_state.resampler); + } + resampler_state.resampler = nullptr; + + int err; + /// @todo + /// Consider lower quality than 10 (max). This will improve both latency and + /// performance. + resampler_state.resampler = speex_resampler_init(channels.size(), sample_rate, + new_sample_rate, 10, &err); + if(err) { + abort(); + } + resampler_state.resample_from = sample_rate; + resampler_state.resample_to = new_sample_rate; + resampler_state.resample_ch_count = channels.size(); + } + + for (size_t i = 0; i < channels.size(); i++) { + // allocate new storage + 10 ms headroom + size_t new_size = channels[i].second * new_sample_rate / sample_rate + new_sample_rate * sizeof(int16_t) / 100; + new_channels[i] = make_pair(unique_ptr(new char[new_size]), new_size); + } + + /// @todo + /// Consider doing this in parallel - complex resampling requires some milliseconds. + /// Parallel resampling would reduce latency (and improve performance if there is not + /// enough single-core power). + for (size_t i = 0; i < channels.size(); i++) { + uint32_t in_frames = get_data_len(i) / sizeof(int16_t); + uint32_t in_frames_orig = in_frames; + uint32_t write_frames = new_channels[i].second; + + speex_resampler_process_int( + (SpeexResamplerState *) resampler_state.resampler, + i, + (spx_int16_t *)get_data(i), &in_frames, + (spx_int16_t *)(void *) new_channels[i].first.get(), &write_frames); + if (in_frames != in_frames_orig) { + LOG(LOG_LEVEL_WARNING) << "Audio frame resampler: not all samples resampled!\n"; + } + new_channels[i].second = write_frames * sizeof(int16_t); + } + + sample_rate = new_sample_rate; + channels = move(new_channels); +} + + static double get_normalized(const char *in, int bps) { int64_t sample = 0; bool negative = false;