Audio sender: don't auto resample to 48000

+ some rework - audio_frame2 has now methods for changing bps and
resampling
This commit is contained in:
Martin Pulec
2015-07-23 19:40:12 +02:00
parent d1fc89981c
commit 899ca0b56f
7 changed files with 155 additions and 119 deletions

View File

@@ -52,7 +52,6 @@
#include "config_win32.h"
#endif
#include <speex/speex_resampler.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -242,7 +241,7 @@ struct state_audio * audio_cfg_init(struct module *parent, const char *addrs, in
return NULL;
}
s = (struct state_audio *) calloc(1, sizeof(struct state_audio));
s = new state_audio();
s->start_time = *start_time;
if (strcmp("none", send_cfg) == 0 && strcmp("none", recv_cfg) == 0) {
@@ -285,7 +284,7 @@ struct state_audio * audio_cfg_init(struct module *parent, const char *addrs, in
goto error;
#else
fprintf(stderr, "Speex not compiled in. Could not enable echo cancellation.\n");
free(s);
delete s;
goto error;
#endif /* HAVE_SPEEX */
} else {
@@ -449,7 +448,7 @@ error:
delete s->captured;
audio_codec_done(s->audio_coder);
free(s);
delete s;
exit_uv(1);
return NULL;
}
@@ -499,7 +498,7 @@ void audio_done(struct state_audio *s)
delete s->captured;
free(s);
delete s;
}
}
@@ -728,82 +727,6 @@ echo_play(s->echo_state, &pbuf_data.buffer);
return NULL;
}
struct state_resample {
struct audio_frame resampled;
char *resample_buffer;
SpeexResamplerState *resampler;
int resample_from, resample_ch_count;
int resample_to;
const int *codec_supported_bytes_per_sample;
};
static void resample(struct state_resample *s, struct audio_frame *buffer);
static bool set_contains(const int *vals, int needle);
static bool set_contains(const int *vals, int needle)
{
if(!vals)
return true;
while(*vals != 0) {
if(*vals == needle) {
return true;
}
++vals;
}
return false;
}
static void resample(struct state_resample *s, struct audio_frame *buffer)
{
memcpy(&s->resampled, buffer, sizeof(struct audio_frame));
if (buffer->sample_rate == s->resample_to &&
set_contains(s->codec_supported_bytes_per_sample, buffer->bps)) {
memcpy(&s->resampled, buffer, sizeof(s->resampled));
s->resampled.data = (char *) malloc(buffer->data_len);
memcpy(s->resampled.data, buffer->data, buffer->data_len);
} else {
// resampler is able only to resample 16-bit samples
assert(set_contains(s->codec_supported_bytes_per_sample, 2));
// expect that we may got as much as 12-times more data (eg. 8 kHz to 96 kHz)
uint32_t write_frames = 12 * (buffer->data_len / buffer->ch_count / buffer->bps);
s->resampled.data = (char *) malloc(write_frames * 2 * buffer->ch_count);
if(s->resample_from != buffer->sample_rate || s->resample_ch_count != buffer->ch_count) {
s->resample_from = buffer->sample_rate;
s->resample_ch_count = buffer->ch_count;
if(s->resampler) {
speex_resampler_destroy(s->resampler);
}
int err;
s->resampler = speex_resampler_init(buffer->ch_count, s->resample_from,
s->resample_to, 10, &err);
if(err) {
abort();
}
}
char *in_buf;
int data_len;
if(buffer->bps != 2) {
change_bps(s->resample_buffer, 2, buffer->data, buffer->bps, buffer->data_len);
in_buf = s->resample_buffer;
data_len = buffer->data_len / buffer->bps * 2;
} else {
in_buf = buffer->data;
data_len = buffer->data_len;
}
uint32_t in_frames = data_len / buffer->ch_count / 2;
uint32_t in_frames_orig = in_frames;
speex_resampler_process_interleaved_int(s->resampler, (spx_int16_t *)(void *) in_buf, &in_frames,
(spx_int16_t *)(void *) s->resampled.data, &write_frames);
assert (in_frames == in_frames_orig);
s->resampled.data_len = write_frames * 2 /* bps */ * buffer->ch_count;
s->resampled.sample_rate = s->resample_to;
s->resampled.bps = 2;
}
}
static void audio_sender_process_message(struct state_audio *s, struct msg_sender *msg)
{
assert(s->audio_tx_mode == MODE_SENDER);
@@ -876,14 +799,8 @@ static void *audio_sender_thread(void *arg)
{
struct state_audio *s = (struct state_audio *) arg;
struct audio_frame *buffer = NULL;
struct state_resample resample_state;
audio_frame2_resampler resampler_state;
memset(&resample_state, 0, sizeof(resample_state));
resample_state.resample_to = s->resample_to;
resample_state.resample_buffer = (char *) malloc(1024 * 1024);
resample_state.codec_supported_bytes_per_sample =
audio_codec_get_supported_bps(s->audio_coder);
printf("Audio sending started.\n");
while (!should_exit_audio) {
struct message *msg;
@@ -920,27 +837,29 @@ static void *audio_sender_thread(void *arg)
if (s->paused) {
continue;
}
audio_frame2 buffer_new;
audio_frame2 bf_n(buffer);
// RESAMPLE
if (s->resample_to != 0 && bf_n.get_sample_rate() != s->resample_to) {
if (bf_n.get_bps() != 2) {
bf_n.change_bps(2);
}
bf_n.resample(resampler_state, s->resample_to);
}
// COMPRESS
process_statistics(s, &bf_n);
// SEND
if(s->sender == NET_NATIVE) {
// RESAMPLE
resample(&resample_state, buffer);
// COMPRESS
buffer_new = audio_frame2(&resample_state.resampled);
process_statistics(s, &buffer_new);
free(resample_state.resampled.data);
audio_frame2 *uncompressed = &buffer_new;
audio_frame2 *uncompressed = &bf_n;
const audio_frame2 *compressed = NULL;
while((compressed = audio_codec_compress(s->audio_coder, uncompressed))) {
audio_tx_send(s->tx_session, s->audio_network_device, compressed);
uncompressed = NULL;
}
}else if(s->sender == NET_STANDARD){
// RESAMPLE
resample(&resample_state, buffer);
// COMPRESS
buffer_new = audio_frame2(&resample_state.resampled);
free(resample_state.resampled.data);
audio_frame2 *uncompressed = &buffer_new;
audio_frame2 *uncompressed = &bf_n;
const audio_frame2 *compressed = NULL;
while((compressed = audio_codec_compress(s->audio_coder, uncompressed))) {
//TODO to be dynamic as a function of the selected codec, now only accepting mulaw without checking errors
@@ -955,11 +874,6 @@ static void *audio_sender_thread(void *arg)
}
}
if(resample_state.resampler) {
speex_resampler_destroy(resample_state.resampler);
}
free(resample_state.resample_buffer);
return NULL;
}

View File

@@ -111,6 +111,21 @@ struct module;
#include <utility>
#include <vector>
class audio_frame2;
class audio_frame2_resampler {
public:
audio_frame2_resampler();
~audio_frame2_resampler();
private:
void *resampler; // type is (SpeexResamplerState *)
int resample_from;
size_t resample_ch_count;
int resample_to;
friend class audio_frame2;
};
class audio_frame2
{
public:
@@ -135,6 +150,20 @@ public:
bool has_same_prop_as(audio_frame2 const &frame) const;
void set_duration(double duration);
static audio_frame2 copy_with_bps_change(audio_frame2 const &frame, int new_bps);
void change_bps(int new_bps);
/**
* @note
* bps of the frame needs to be 16 bits!
*
* @param resampler_state opaque state that can holds resampler that dosn't need
* to be reinitalized during calls on various audio frames.
* It reinitializes itself when needed (when source or new
* sample rate changes). Therefore, it is very recommended
* to use it only in a stream that may change sometimes but
* do not eg. share it between two streams that has different
* properties.
*/
void resample(audio_frame2_resampler &resampler_state, int new_sample_rate);
private:
int bps; /* bytes per sample */
int sample_rate;

View File

@@ -343,11 +343,6 @@ void audio_codec_done(struct audio_codec_state *s)
free(s);
}
const int *audio_codec_get_supported_bps(struct audio_codec_state *s)
{
return audio_codecs[s->index]->supported_bytes_per_second;
}
audio_codec_t get_audio_codec(const char *codec_str) {
char *codec = strdup(codec_str);
if (strchr(codec, ':')) {
@@ -383,6 +378,9 @@ static char *get_val_from_cfg(const char *audio_codec_cfg, const char *key)
return NULL;
}
/**
* @returns user specified sample rate or 0 if unspecified
*/
int get_audio_codec_sample_rate(const char *audio_codec_cfg)
{
char *val = get_val_from_cfg(audio_codec_cfg, "sample_rate=");
@@ -391,7 +389,7 @@ int get_audio_codec_sample_rate(const char *audio_codec_cfg)
free(val);
return ret;
} else {
return 48000;
return 0;
}
}

View File

@@ -59,7 +59,6 @@ typedef enum {
struct audio_codec {
const audio_codec_t *supported_codecs;
const int *supported_bytes_per_second;
void *(*init)(audio_codec_t, audio_codec_direction_t, bool, int bitrate);
audio_channel *(*compress)(void *, audio_channel *);
audio_channel *(*decompress)(void *, audio_channel *);
@@ -84,7 +83,6 @@ struct audio_codec_state *audio_codec_reconfigure(struct audio_codec_state *old,
audio_codec_t audio_codec, audio_codec_direction_t);
const audio_frame2 *audio_codec_compress(struct audio_codec_state *, const audio_frame2 *);
audio_frame2 *audio_codec_decompress(struct audio_codec_state *, audio_frame2 *);
const int *audio_codec_get_supported_bps(struct audio_codec_state *);
void audio_codec_done(struct audio_codec_state *);
void list_audio_codecs(void);

View File

@@ -107,7 +107,6 @@ static void dummy_pcm_done(void *state)
struct audio_codec dummy_pcm_audio_codec = {
.supported_codecs = (audio_codec_t[]){ AC_PCM, AC_NONE },
.supported_bytes_per_second = NULL,
.init = dummy_pcm_init,
.compress = dummy_pcm_compress,
.decompress = dummy_pcm_decompress,

View File

@@ -539,11 +539,9 @@ static void libavcodec_done(void *state)
}
static audio_codec_t supported_codecs[] = { AC_ALAW, AC_MULAW, AC_SPEEX, AC_OPUS, AC_G722, AC_FLAC, AC_MP3, AC_AAC, AC_NONE };
static int supported_bytes_per_second[] = { 1, 2, 3, 4, 0 };
struct audio_codec libavcodec_audio_codec = {
supported_codecs,
supported_bytes_per_second,
libavcodec_init,
libavcodec_compress,
libavcodec_decompress,

View File

@@ -45,9 +45,11 @@
#include "audio/audio.h"
#include "audio/codec.h"
#include "audio/utils.h"
#include "debug.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <speex/speex_resampler.h>
#include <stdio.h>
#include <string.h>
@@ -59,6 +61,17 @@
using namespace std;
audio_frame2_resampler::audio_frame2_resampler() : resampler(nullptr), resample_from(0),
resample_ch_count(0), resample_to(0)
{
}
audio_frame2_resampler::~audio_frame2_resampler() {
if (resampler) {
speex_resampler_destroy((SpeexResamplerState *) resampler);
}
}
/**
* @brief Creates empty audio_frame2
*/
@@ -237,13 +250,100 @@ audio_frame2 audio_frame2::copy_with_bps_change(audio_frame2 const &frame, int n
for (size_t i = 0; i < ret.channels.size(); i++) {
ret.channels[i].second = frame.get_data_len(i) / frame.get_bps() * new_bps;
ret.channels[i].first = unique_ptr<char []>(new char[ret.channels[i].second]);
change_bps(ret.channels[i].first.get(), new_bps, frame.get_data(i), frame.get_bps(),
::change_bps(ret.channels[i].first.get(), new_bps, frame.get_data(i), frame.get_bps(),
frame.get_data_len(i));
}
return ret;
}
void audio_frame2::change_bps(int new_bps)
{
if (new_bps == bps) {
return;
}
std::vector<pair<unique_ptr<char []>, size_t> > new_channels(channels.size());
for (size_t i = 0; i < channels.size(); i++) {
size_t new_size = channels[i].second / bps * new_bps;
new_channels[i] = make_pair(unique_ptr<char []>(new char[new_size]), new_size);
}
for (size_t i = 0; i < channels.size(); i++) {
::change_bps(new_channels[i].first.get(), new_bps, get_data(i), get_bps(),
get_data_len(i));
}
bps = new_bps;
channels = move(new_channels);
}
void audio_frame2::resample(audio_frame2_resampler & resampler_state, int new_sample_rate)
{
if (new_sample_rate == sample_rate) {
return;
}
/// @todo
/// speex supports also floats so there could be possibility also to add support for more bps
if (bps != 2) {
throw logic_error("Only 16 bits per sample are currently for resamling supported!");
}
std::vector<pair<unique_ptr<char []>, size_t> > new_channels(channels.size());
if (sample_rate != resampler_state.resample_from || new_sample_rate != resampler_state.resample_to || channels.size() != resampler_state.resample_ch_count) {
if (resampler_state.resampler) {
speex_resampler_destroy((SpeexResamplerState *) resampler_state.resampler);
}
resampler_state.resampler = nullptr;
int err;
/// @todo
/// Consider lower quality than 10 (max). This will improve both latency and
/// performance.
resampler_state.resampler = speex_resampler_init(channels.size(), sample_rate,
new_sample_rate, 10, &err);
if(err) {
abort();
}
resampler_state.resample_from = sample_rate;
resampler_state.resample_to = new_sample_rate;
resampler_state.resample_ch_count = channels.size();
}
for (size_t i = 0; i < channels.size(); i++) {
// allocate new storage + 10 ms headroom
size_t new_size = channels[i].second * new_sample_rate / sample_rate + new_sample_rate * sizeof(int16_t) / 100;
new_channels[i] = make_pair(unique_ptr<char []>(new char[new_size]), new_size);
}
/// @todo
/// Consider doing this in parallel - complex resampling requires some milliseconds.
/// Parallel resampling would reduce latency (and improve performance if there is not
/// enough single-core power).
for (size_t i = 0; i < channels.size(); i++) {
uint32_t in_frames = get_data_len(i) / sizeof(int16_t);
uint32_t in_frames_orig = in_frames;
uint32_t write_frames = new_channels[i].second;
speex_resampler_process_int(
(SpeexResamplerState *) resampler_state.resampler,
i,
(spx_int16_t *)get_data(i), &in_frames,
(spx_int16_t *)(void *) new_channels[i].first.get(), &write_frames);
if (in_frames != in_frames_orig) {
LOG(LOG_LEVEL_WARNING) << "Audio frame resampler: not all samples resampled!\n";
}
new_channels[i].second = write_frames * sizeof(int16_t);
}
sample_rate = new_sample_rate;
channels = move(new_channels);
}
static double get_normalized(const char *in, int bps) {
int64_t sample = 0;
bool negative = false;