Audio sender: don't auto resample to 48000

+ some rework - audio_frame2 has now methods for changing bps and resampling
2026-03-21 17:40:23 +00:00 · 2015-07-23 19:40:12 +02:00
parent d1fc89981c
commit 899ca0b56f
7 changed files with 155 additions and 119 deletions
--- a/src/audio/audio.cpp
+++ b/src/audio/audio.cpp
@@ -52,7 +52,6 @@
 #include "config_win32.h"
 #endif

-#include <speex/speex_resampler.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -242,7 +241,7 @@ struct state_audio * audio_cfg_init(struct module *parent, const char *addrs, in
                return NULL;
        }
        
-        s = (struct state_audio *) calloc(1, sizeof(struct state_audio));
+        s = new state_audio();
        s->start_time = *start_time;

        if (strcmp("none", send_cfg) == 0 && strcmp("none", recv_cfg) == 0) {
@@ -285,7 +284,7 @@ struct state_audio * audio_cfg_init(struct module *parent, const char *addrs, in
                goto error;
 #else
                fprintf(stderr, "Speex not compiled in. Could not enable echo cancellation.\n");
-                free(s);
+                delete s;
                goto error;
 #endif /* HAVE_SPEEX */
        } else {
@@ -449,7 +448,7 @@ error:
        delete s->captured;

        audio_codec_done(s->audio_coder);
-        free(s);
+        delete s;
        exit_uv(1);
        return NULL;
 }
@@ -499,7 +498,7 @@ void audio_done(struct state_audio *s)

                delete s->captured;

-                free(s);
+                delete s;
        }
 }

@@ -728,82 +727,6 @@ echo_play(s->echo_state, &pbuf_data.buffer);
        return NULL;
 }

-struct state_resample {
-        struct audio_frame resampled;
-        char *resample_buffer;
-        SpeexResamplerState *resampler;
-        int resample_from, resample_ch_count;
-        int resample_to;
-        const int *codec_supported_bytes_per_sample;
-};
-
-static void resample(struct state_resample *s, struct audio_frame *buffer);
-static bool set_contains(const int *vals, int needle);
-
-static bool set_contains(const int *vals, int needle)
-{
-        if(!vals)
-                return true;
-        while(*vals != 0) {
-                if(*vals == needle) {
-                        return true;
-                }
-                ++vals;
-        }
-        return false;
-}
-
-static void resample(struct state_resample *s, struct audio_frame *buffer)
-{
-        memcpy(&s->resampled, buffer, sizeof(struct audio_frame));
-
-        if (buffer->sample_rate == s->resample_to &&
-                        set_contains(s->codec_supported_bytes_per_sample, buffer->bps)) {
-                memcpy(&s->resampled, buffer, sizeof(s->resampled));
-                s->resampled.data = (char *) malloc(buffer->data_len);
-                memcpy(s->resampled.data, buffer->data, buffer->data_len);
-        } else {
-                // resampler is able only to resample 16-bit samples
-                assert(set_contains(s->codec_supported_bytes_per_sample, 2));
-                // expect that we may got as much as 12-times more data (eg. 8 kHz to 96 kHz)
-                uint32_t write_frames = 12 * (buffer->data_len / buffer->ch_count / buffer->bps);
-                s->resampled.data = (char *) malloc(write_frames * 2 * buffer->ch_count);
-                if(s->resample_from != buffer->sample_rate || s->resample_ch_count != buffer->ch_count) {
-                        s->resample_from = buffer->sample_rate;
-                        s->resample_ch_count = buffer->ch_count;
-                        if(s->resampler) {
-                                speex_resampler_destroy(s->resampler);
-                        }
-                        int err;
-                        s->resampler = speex_resampler_init(buffer->ch_count, s->resample_from,
-                                        s->resample_to, 10, &err);
-                        if(err) {
-                                abort();
-                        }
-                }
-                char *in_buf;
-                int data_len;
-                if(buffer->bps != 2) {
-                        change_bps(s->resample_buffer, 2, buffer->data, buffer->bps, buffer->data_len);
-                        in_buf = s->resample_buffer;
-                        data_len = buffer->data_len / buffer->bps * 2;
-                } else {
-                        in_buf = buffer->data;
-                        data_len = buffer->data_len;
-                }
-
-                uint32_t in_frames = data_len /  buffer->ch_count / 2;
-                uint32_t in_frames_orig = in_frames;
-                speex_resampler_process_interleaved_int(s->resampler, (spx_int16_t *)(void *) in_buf, &in_frames,
-                                (spx_int16_t *)(void *) s->resampled.data, &write_frames);
-                assert (in_frames == in_frames_orig);
-
-                s->resampled.data_len = write_frames * 2 /* bps */ * buffer->ch_count;
-                s->resampled.sample_rate = s->resample_to;
-                s->resampled.bps = 2;
-        }
-}
-
 static void audio_sender_process_message(struct state_audio *s, struct msg_sender *msg)
 {
        assert(s->audio_tx_mode == MODE_SENDER);
@@ -876,14 +799,8 @@ static void *audio_sender_thread(void *arg)
 {
        struct state_audio *s = (struct state_audio *) arg;
        struct audio_frame *buffer = NULL;
-        struct state_resample resample_state;
+        audio_frame2_resampler resampler_state;

-        memset(&resample_state, 0, sizeof(resample_state));
-        resample_state.resample_to = s->resample_to;
-        resample_state.resample_buffer = (char *) malloc(1024 * 1024);
-        resample_state.codec_supported_bytes_per_sample =
-                audio_codec_get_supported_bps(s->audio_coder);
-        
        printf("Audio sending started.\n");
        while (!should_exit_audio) {
                struct message *msg;
@@ -920,27 +837,29 @@ static void *audio_sender_thread(void *arg)
                        if (s->paused) {
                                continue;
                        }
-                        audio_frame2 buffer_new;
+
+                        audio_frame2 bf_n(buffer);
+
+                        // RESAMPLE
+                        if (s->resample_to != 0 && bf_n.get_sample_rate() != s->resample_to) {
+                                if (bf_n.get_bps() != 2) {
+                                        bf_n.change_bps(2);
+                                }
+
+                                bf_n.resample(resampler_state, s->resample_to);
+                        }
+                        // COMPRESS
+                        process_statistics(s, &bf_n);
+                        // SEND
                        if(s->sender == NET_NATIVE) {
-                                // RESAMPLE
-                                resample(&resample_state, buffer);
-                                // COMPRESS
-                                buffer_new = audio_frame2(&resample_state.resampled);
-                                process_statistics(s, &buffer_new);
-                                free(resample_state.resampled.data);
-                                audio_frame2 *uncompressed = &buffer_new;
+                                audio_frame2 *uncompressed = &bf_n;
                                const audio_frame2 *compressed = NULL;
                                while((compressed = audio_codec_compress(s->audio_coder, uncompressed))) {
                                        audio_tx_send(s->tx_session, s->audio_network_device, compressed);
                                        uncompressed = NULL;
                                }
                        }else if(s->sender == NET_STANDARD){
-                            // RESAMPLE
-                            resample(&resample_state, buffer);
-                            // COMPRESS
-                            buffer_new = audio_frame2(&resample_state.resampled);
-                            free(resample_state.resampled.data);
-                            audio_frame2 *uncompressed = &buffer_new;
+                            audio_frame2 *uncompressed = &bf_n;
                            const audio_frame2 *compressed = NULL;
                            while((compressed = audio_codec_compress(s->audio_coder, uncompressed))) {
                                    //TODO to be dynamic as a function of the selected codec, now only accepting mulaw without checking errors
@@ -955,11 +874,6 @@ static void *audio_sender_thread(void *arg)
                }
        }

-        if(resample_state.resampler) {
-                speex_resampler_destroy(resample_state.resampler);
-        }
-        free(resample_state.resample_buffer);
-
        return NULL;
 }

--- a/src/audio/audio.h
+++ b/src/audio/audio.h
@@ -111,6 +111,21 @@ struct module;
 #include <utility>
 #include <vector>

+class audio_frame2;
+
+class audio_frame2_resampler {
+public:
+        audio_frame2_resampler();
+        ~audio_frame2_resampler();
+private:
+        void *resampler; // type is (SpeexResamplerState *)
+        int resample_from;
+        size_t resample_ch_count;
+        int resample_to;
+
+        friend class audio_frame2;
+};
+
 class audio_frame2
 {
 public:
@@ -135,6 +150,20 @@ public:
        bool has_same_prop_as(audio_frame2 const &frame) const;
        void set_duration(double duration);
        static audio_frame2 copy_with_bps_change(audio_frame2 const &frame, int new_bps);
+        void change_bps(int new_bps);
+        /**
+         * @note
+         * bps of the frame needs to be 16 bits!
+         *
+         * @param resampler_state opaque state that can holds resampler that dosn't need
+         *                        to be reinitalized during calls on various audio frames.
+         *                        It reinitializes itself when needed (when source or new
+         *                        sample rate changes). Therefore, it is very recommended
+         *                        to use it only in a stream that may change sometimes but
+         *                        do not eg. share it between two streams that has different
+         *                        properties.
+         */
+        void resample(audio_frame2_resampler &resampler_state, int new_sample_rate);
 private:
        int bps;                /* bytes per sample */
        int sample_rate;
--- a/src/audio/codec.cpp
+++ b/src/audio/codec.cpp
@@ -343,11 +343,6 @@ void audio_codec_done(struct audio_codec_state *s)
        free(s);
 }

-const int *audio_codec_get_supported_bps(struct audio_codec_state *s)
-{
-        return audio_codecs[s->index]->supported_bytes_per_second;
-}
-
 audio_codec_t get_audio_codec(const char *codec_str) {
        char *codec = strdup(codec_str);
        if (strchr(codec, ':')) {
@@ -383,6 +378,9 @@ static char *get_val_from_cfg(const char *audio_codec_cfg, const char *key)
        return NULL;
 }

+/**
+ * @returns user specified sample rate or 0 if unspecified
+ */
 int get_audio_codec_sample_rate(const char *audio_codec_cfg)
 {
        char *val = get_val_from_cfg(audio_codec_cfg, "sample_rate=");
@@ -391,7 +389,7 @@ int get_audio_codec_sample_rate(const char *audio_codec_cfg)
                free(val);
                return ret;
        } else {
-                return 48000;
+                return 0;
        }
 }

--- a/src/audio/codec.h
+++ b/src/audio/codec.h
@@ -59,7 +59,6 @@ typedef enum {

 struct audio_codec {
        const audio_codec_t *supported_codecs;
-        const int *supported_bytes_per_second;
        void *(*init)(audio_codec_t, audio_codec_direction_t, bool, int bitrate);
        audio_channel *(*compress)(void *, audio_channel *);
        audio_channel *(*decompress)(void *, audio_channel *);
@@ -84,7 +83,6 @@ struct audio_codec_state *audio_codec_reconfigure(struct audio_codec_state *old,
                audio_codec_t audio_codec, audio_codec_direction_t);
 const audio_frame2 *audio_codec_compress(struct audio_codec_state *, const audio_frame2 *);
 audio_frame2 *audio_codec_decompress(struct audio_codec_state *, audio_frame2 *);
-const int *audio_codec_get_supported_bps(struct audio_codec_state *);
 void audio_codec_done(struct audio_codec_state *);

 void list_audio_codecs(void);
--- a/src/audio/codec/dummy_pcm.c
+++ b/src/audio/codec/dummy_pcm.c
@@ -107,7 +107,6 @@ static void dummy_pcm_done(void *state)

 struct audio_codec dummy_pcm_audio_codec = {
        .supported_codecs = (audio_codec_t[]){ AC_PCM, AC_NONE },
-        .supported_bytes_per_second = NULL,
        .init = dummy_pcm_init,
        .compress = dummy_pcm_compress,
        .decompress = dummy_pcm_decompress,
--- a/src/audio/codec/libavcodec.cpp
+++ b/src/audio/codec/libavcodec.cpp
@@ -539,11 +539,9 @@ static void libavcodec_done(void *state)
 }

 static audio_codec_t supported_codecs[] = { AC_ALAW, AC_MULAW, AC_SPEEX, AC_OPUS, AC_G722, AC_FLAC, AC_MP3, AC_AAC, AC_NONE };
-static int supported_bytes_per_second[] = { 1, 2, 3, 4, 0 };

 struct audio_codec libavcodec_audio_codec = {
        supported_codecs,
-        supported_bytes_per_second,
        libavcodec_init,
        libavcodec_compress,
        libavcodec_decompress,
--- a/src/audio/utils.cpp
+++ b/src/audio/utils.cpp
@@ -45,9 +45,11 @@
 #include "audio/audio.h"
 #include "audio/codec.h"
 #include "audio/utils.h" 
+#include "debug.h"
 #include <assert.h>
 #include <limits.h>
 #include <math.h>
+#include <speex/speex_resampler.h>
 #include <stdio.h>
 #include <string.h>

@@ -59,6 +61,17 @@

 using namespace std;

+audio_frame2_resampler::audio_frame2_resampler() : resampler(nullptr), resample_from(0),
+        resample_ch_count(0), resample_to(0)
+{
+}
+
+audio_frame2_resampler::~audio_frame2_resampler() {
+        if (resampler) {
+                speex_resampler_destroy((SpeexResamplerState *) resampler);
+        }
+}
+
 /**
 * @brief Creates empty audio_frame2
 */
@@ -237,13 +250,100 @@ audio_frame2 audio_frame2::copy_with_bps_change(audio_frame2 const &frame, int n
        for (size_t i = 0; i < ret.channels.size(); i++) {
                ret.channels[i].second = frame.get_data_len(i) / frame.get_bps() * new_bps;
                ret.channels[i].first = unique_ptr<char []>(new char[ret.channels[i].second]);
-                change_bps(ret.channels[i].first.get(), new_bps, frame.get_data(i), frame.get_bps(),
+                ::change_bps(ret.channels[i].first.get(), new_bps, frame.get_data(i), frame.get_bps(),
                                frame.get_data_len(i));
        }

        return ret;
 }

+void  audio_frame2::change_bps(int new_bps)
+{
+        if (new_bps == bps) {
+                return;
+        }
+
+        std::vector<pair<unique_ptr<char []>, size_t> > new_channels(channels.size());
+
+        for (size_t i = 0; i < channels.size(); i++) {
+                size_t new_size = channels[i].second / bps * new_bps;
+                new_channels[i] = make_pair(unique_ptr<char []>(new char[new_size]), new_size);
+        }
+
+        for (size_t i = 0; i < channels.size(); i++) {
+                ::change_bps(new_channels[i].first.get(), new_bps, get_data(i), get_bps(),
+                                get_data_len(i));
+        }
+
+        bps = new_bps;
+        channels = move(new_channels);
+}
+
+void audio_frame2::resample(audio_frame2_resampler & resampler_state, int new_sample_rate)
+{
+        if (new_sample_rate == sample_rate) {
+                return;
+        }
+
+        /// @todo
+        /// speex supports also floats so there could be possibility also to add support for more bps
+        if (bps != 2) {
+                throw logic_error("Only 16 bits per sample are currently for resamling supported!");
+        }
+
+        std::vector<pair<unique_ptr<char []>, size_t> > new_channels(channels.size());
+
+        if (sample_rate != resampler_state.resample_from || new_sample_rate != resampler_state.resample_to || channels.size() != resampler_state.resample_ch_count) {
+                if (resampler_state.resampler) {
+                        speex_resampler_destroy((SpeexResamplerState *) resampler_state.resampler);
+                }
+                resampler_state.resampler = nullptr;
+
+                int err;
+                /// @todo
+                /// Consider lower quality than 10 (max). This will improve both latency and
+                /// performance.
+                resampler_state.resampler = speex_resampler_init(channels.size(), sample_rate,
+                                new_sample_rate, 10, &err);
+                if(err) {
+                        abort();
+                }
+                resampler_state.resample_from = sample_rate;
+                resampler_state.resample_to = new_sample_rate;
+                resampler_state.resample_ch_count = channels.size();
+        }
+
+        for (size_t i = 0; i < channels.size(); i++) {
+                // allocate new storage + 10 ms headroom
+                size_t new_size = channels[i].second * new_sample_rate / sample_rate + new_sample_rate * sizeof(int16_t) / 100;
+                new_channels[i] = make_pair(unique_ptr<char []>(new char[new_size]), new_size);
+        }
+
+        /// @todo 
+        /// Consider doing this in parallel - complex resampling requires some milliseconds.
+        /// Parallel resampling would reduce latency (and improve performance if there is not
+        /// enough single-core power).
+        for (size_t i = 0; i < channels.size(); i++) {
+                uint32_t in_frames = get_data_len(i) / sizeof(int16_t);
+                uint32_t in_frames_orig = in_frames;
+                uint32_t write_frames = new_channels[i].second;
+
+                speex_resampler_process_int(
+                                (SpeexResamplerState *) resampler_state.resampler,
+                                i,
+                                (spx_int16_t *)get_data(i), &in_frames,
+                                (spx_int16_t *)(void *) new_channels[i].first.get(), &write_frames);
+                if (in_frames != in_frames_orig) {
+                        LOG(LOG_LEVEL_WARNING) << "Audio frame resampler: not all samples resampled!\n";
+                }
+                new_channels[i].second = write_frames * sizeof(int16_t);
+        }
+
+        sample_rate = new_sample_rate;
+        channels = move(new_channels);
+}
+
+
 static double get_normalized(const char *in, int bps) {
        int64_t sample = 0;
        bool negative = false;