UltraGrid/src/audio/codec/libavcodec.cpp

/**
 * @file   audio/codec/libavcodec.cpp
 * @author Martin Pulec     <pulec@cesnet.cz>
 */
/*
 * Copyright (c) 2012-2015 CESNET z.s.p.o.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, is permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * 3. Neither the name of CESNET nor the names of its contributors may be
 *    used to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 * EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#define __STDC_CONSTANT_MACROS

#ifdef HAVE_CONFIG_H
#include "config.h"
#include "config_unix.h"
#include "config_win32.h"
#endif /* HAVE_CONFIG_H */

#include "audio/codec/libavcodec.h"
#include "debug.h"

#include <memory>

extern "C" {
#include <libavcodec/avcodec.h>
#if LIBAVCODEC_VERSION_MAJOR >= 54
#include <libavutil/channel_layout.h>
#endif
#include <libavutil/mem.h>
}

#include <vector>
#include <unordered_map>
#include "audio/audio.h"
#include "audio/codec.h"
#include "audio/utils.h"
#include "libavcodec_common.h"
#include "utils/resource_manager.h"

#define MAGIC 0xb135ca11

#if LIBAVCODEC_VERSION_MAJOR < 54
#define AV_CODEC_ID_AAC CODEC_ID_AAC
#define AV_CODEC_ID_PCM_ALAW CODEC_ID_PCM_ALAW
#define AV_CODEC_ID_PCM_MULAW CODEC_ID_PCM_MULAW
#define AV_CODEC_ID_SPEEX CODEC_ID_SPEEX
#define AV_CODEC_ID_OPUS CODEC_ID_OPUS
#define AV_CODEC_ID_ADPCM_G722 CODEC_ID_ADPCM_G722
#define AV_CODEC_ID_FLAC CODEC_ID_FLAC
#define AV_CODEC_ID_MP3 CODEC_ID_MP3
#endif

using namespace std;

static void *libavcodec_init(audio_codec_t audio_codec, audio_codec_direction_t direction,
                bool try_init, int bitrate);
static audio_channel *libavcodec_compress(void *, audio_channel *);
static audio_channel *libavcodec_decompress(void *, audio_channel *);
static void libavcodec_done(void *);

static void register_module(void) __attribute__((constructor));

static void register_module(void)
{
        register_audio_codec(&libavcodec_audio_codec);
}

static std::unordered_map<audio_codec_t, AVCodecID, std::hash<int>> mapping {
        { AC_ALAW, AV_CODEC_ID_PCM_ALAW },
        { AC_MULAW, AV_CODEC_ID_PCM_MULAW },
        { AC_SPEEX, AV_CODEC_ID_SPEEX },
#if LIBAVCODEC_VERSION_MAJOR >= 54
        { AC_OPUS, AV_CODEC_ID_OPUS },
#endif
        { AC_G722, AV_CODEC_ID_ADPCM_G722 },
        { AC_FLAC, AV_CODEC_ID_FLAC },
        { AC_MP3, AV_CODEC_ID_MP3 },
        { AC_AAC, AV_CODEC_ID_AAC },
};

struct libavcodec_codec_state {
        uint32_t magic;
        pthread_mutex_t    *libav_global_lock;
        AVCodecContext     *codec_ctx;
        AVCodec            *codec;

        AVPacket            pkt;
        AVFrame            *av_frame;

        struct audio_desc   saved_desc;

        audio_channel       tmp;
        audio_channel       output_channel;

        void               *samples;

        int                 bitrate;
};

/**
 * Initializates selected audio codec
 * @param audio_codec requested audio codec
 * @param direction   which direction will be used (encoding or decoding)
 * @param try_init    if true no error messages will be printed.
 *                    This is intended for checking which codecs are present
 * @retval NULL if initialization failed
 * @retval !=NULL codec state
 */
static void *libavcodec_init(audio_codec_t audio_codec, audio_codec_direction_t direction, bool try_init,
                int bitrate)
{
        enum AVCodecID codec_id = AV_CODEC_ID_NONE;

        auto it = mapping.find(audio_codec);

        if (it == mapping.end()) {
                if (!try_init) {
                        fprintf(stderr, "[Libavcodec] Cannot find mapping for codec \"%s\"!\n",
                                        get_name_to_audio_codec(audio_codec));
                }
                return NULL;
        } else {
                codec_id = it->second;
        }

        avcodec_register_all();

        struct libavcodec_codec_state *s = (struct libavcodec_codec_state *)
                calloc(1, sizeof(struct libavcodec_codec_state));
        if(direction == AUDIO_CODER) {
                s->codec = avcodec_find_encoder(codec_id);
        } else {
                s->codec = avcodec_find_decoder(codec_id);
        }
        if(!s->codec) {
                if (!try_init) {
                        fprintf(stderr, "Your Libavcodec build doesn't contain codec \"%s\".\n",
                                get_name_to_audio_codec(audio_codec));
                }
                free(s);
                return NULL;
        }

        s->magic = MAGIC;
        s->libav_global_lock = rm_acquire_shared_lock(LAVCD_LOCK_NAME);
        s->codec_ctx = avcodec_alloc_context3(s->codec);
        if(!s->codec_ctx) { // not likely :)
                if (!try_init) {
                        fprintf(stderr, "Could not allocate audio codec context\n");
                }
                free(s);
                return NULL;
        }

        s->codec_ctx->strict_std_compliance = -2;

        s->bitrate = bitrate;

        s->samples = NULL;

        av_init_packet(&s->pkt);
        s->pkt.size = 0;
        s->pkt.data = NULL;

        s->av_frame = av_frame_alloc();

        memset(&s->tmp, 0, sizeof(audio_channel));
        memset(&s->output_channel, 0, sizeof(audio_channel));
        s->tmp.data = (char *) malloc(1024*1024);
        s->output_channel.data = (char *) malloc(1024*1024);

        if(direction == AUDIO_CODER) {
                s->output_channel.codec = audio_codec;
        } else {
                s->output_channel.codec = AC_PCM;
        }

        return s;
}

/* check that a given sample format is supported by the encoder */
static int check_sample_fmt(AVCodec *codec, enum AVSampleFormat sample_fmt)
{
    const enum AVSampleFormat *p = codec->sample_fmts;

    while (*p != AV_SAMPLE_FMT_NONE) {
        if (*p == sample_fmt)
            return 1;
        p++;
    }
    return 0;
}

static bool reinitialize_coder(struct libavcodec_codec_state *s, struct audio_desc desc)
{
        av_freep(&s->samples);
        pthread_mutex_lock(s->libav_global_lock);
        avcodec_close(s->codec_ctx);
        pthread_mutex_unlock(s->libav_global_lock);

        /*  put sample parameters */
        if (s->bitrate > 0) {
                s->codec_ctx->bit_rate = s->bitrate;
        }
        s->codec_ctx->sample_rate = desc.sample_rate;

        vector<enum AVSampleFormat> sample_fmts;

        switch(desc.bps) {
                case 1:
                        sample_fmts.push_back(AV_SAMPLE_FMT_U8);
                        sample_fmts.push_back(AV_SAMPLE_FMT_U8P);
                        break;
                case 2:
                        sample_fmts.push_back(AV_SAMPLE_FMT_S16);
                        sample_fmts.push_back(AV_SAMPLE_FMT_S16P);
                        break;
                case 3:
                case 4:
                        sample_fmts.push_back(AV_SAMPLE_FMT_S32);
                        sample_fmts.push_back(AV_SAMPLE_FMT_S32P);
                        sample_fmts.push_back(AV_SAMPLE_FMT_FLT);
                        sample_fmts.push_back(AV_SAMPLE_FMT_FLTP);
                        break;
        }

        s->codec_ctx->sample_fmt = AV_SAMPLE_FMT_NONE;

        for (auto it = sample_fmts.begin(); it != sample_fmts.end(); ++it) {
                if (check_sample_fmt(s->codec, *it)) {
                        s->codec_ctx->sample_fmt = *it;
                        break;
                }
        }

        if (s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_NONE) {
                int i = 0;
                while (s->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE) {
                        if (s->codec->sample_fmts[i] != AV_SAMPLE_FMT_DBL &&
                                        s->codec->sample_fmts[i] != AV_SAMPLE_FMT_DBLP) {
                                s->codec_ctx->sample_fmt = s->codec->sample_fmts[i];
                                break;
                        }
                        i++;
                }
        }

        if (s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_NONE) {
                log_msg(LOG_LEVEL_ERROR, "[Libavcodec] Unsupported audio sample!\n");
                return false;
        }

        s->codec_ctx->channels = 1;
#if LIBAVCODEC_VERSION_MAJOR >= 54
        s->codec_ctx->channel_layout = AV_CH_LAYOUT_MONO;
#endif

        pthread_mutex_lock(s->libav_global_lock);
        /* open it */
        if (avcodec_open2(s->codec_ctx, s->codec, NULL) < 0) {
                fprintf(stderr, "Could not open codec\n");
                pthread_mutex_unlock(s->libav_global_lock);
                return false;
        }
        pthread_mutex_unlock(s->libav_global_lock);

        if(s->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE) {
                s->codec_ctx->frame_size = 1;
        }

        s->av_frame->nb_samples     = s->codec_ctx->frame_size;
        s->av_frame->format         = s->codec_ctx->sample_fmt;
#if LIBAVCODEC_VERSION_MAJOR >= 54
        s->av_frame->channel_layout = AV_CH_LAYOUT_MONO;
        s->av_frame->sample_rate    = s->codec_ctx->sample_rate;
#endif

        int channels = 1;
        /* the codec gives us the frame size, in samples,
         * we calculate the size of the samples buffer in bytes */
        int buffer_size = av_samples_get_buffer_size(NULL, channels, s->codec_ctx->frame_size,
                        s->codec_ctx->sample_fmt, 1);

        s->samples = av_malloc(buffer_size);
        if (!s->samples) {
                fprintf(stderr, "could not allocate %d bytes for samples buffer\n",
                                buffer_size);
                return false;
        }
        /* setup the data pointers in the AVFrame */
        int ret = avcodec_fill_audio_frame(s->av_frame, channels, s->codec_ctx->sample_fmt,
                        (const uint8_t*)s->samples, buffer_size, 1);
        if (ret < 0) {
                fprintf(stderr, "could not setup audio frame\n");
                return false;
        }

        s->output_channel.sample_rate = desc.sample_rate;
        s->output_channel.bps = av_get_bytes_per_sample(s->codec_ctx->sample_fmt);
        s->saved_desc = desc;

        return true;
}

static bool reinitialize_decoder(struct libavcodec_codec_state *s, struct audio_desc desc)
{
        pthread_mutex_lock(s->libav_global_lock);
        avcodec_close(s->codec_ctx);
        pthread_mutex_unlock(s->libav_global_lock);

        s->codec_ctx->channels = 1;

        s->codec_ctx->bits_per_coded_sample = 4; // ADPCM
        s->codec_ctx->sample_rate = desc.sample_rate;

        pthread_mutex_lock(s->libav_global_lock);
        /* open it */
        if (avcodec_open2(s->codec_ctx, s->codec, NULL) < 0) {
                fprintf(stderr, "Could not open codec\n");
                pthread_mutex_unlock(s->libav_global_lock);
                return false;
        }
        pthread_mutex_unlock(s->libav_global_lock);

        s->saved_desc = desc;

        return true;
}

static audio_channel *libavcodec_compress(void *state, audio_channel * channel)
{
        struct libavcodec_codec_state *s = (struct libavcodec_codec_state *) state;
        assert(s->magic == MAGIC);

        assert(s->codec_ctx->sample_fmt != AV_SAMPLE_FMT_DBL && // not supported yet
                        s->codec_ctx->sample_fmt != AV_SAMPLE_FMT_DBLP);

        if(channel) {
                if(!audio_desc_eq(s->saved_desc, audio_desc_from_audio_channel(channel))) {
                        if(!reinitialize_coder(s, audio_desc_from_audio_channel(channel))) {
                                fprintf(stderr, "Unable to reinitialize audio compress!\n");
                                return NULL;
                        }
                }

                if (s->output_channel.bps != channel->bps || s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_FLT || s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_FLTP) {
                        if (s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_FLT || s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_FLTP) {
                                if (s->output_channel.bps == channel->bps) {
                                        int2float((char *) s->tmp.data + s->tmp.data_len, channel->data, channel->data_len);
                                        s->tmp.data_len += channel->data_len;
                                } else {
                                        size_t data_len = channel->data_len / channel->bps * 4;
                                        unique_ptr<char []> tmp(new char[data_len]);
                                        change_bps((char *) tmp.get(), 4, channel->data, channel->bps, channel->data_len);
                                        int2float((char *) s->tmp.data + s->tmp.data_len, tmp.get(), data_len);
                                        s->tmp.data_len += data_len;
                                }
                        } else {
                                change_bps((char *) s->tmp.data + s->tmp.data_len, s->output_channel.bps,
                                                channel->data, s->saved_desc.bps, channel->data_len);
                                s->tmp.data_len += channel->data_len / s->saved_desc.bps * s->output_channel.bps;
                        }
                } else {
                        memcpy((char *) s->tmp.data + s->tmp.data_len, channel->data, channel->data_len);
                        s->tmp.data_len += channel->data_len;
                }
        }

        int bps = s->output_channel.bps;
        int offset = 0;
        s->output_channel.data_len = 0;
        s->output_channel.duration = 0.0;
        int chunk_size = s->codec_ctx->frame_size * bps;
        //while(offset + chunk_size <= s->tmp.data_len) {
        while(offset + chunk_size <= s->tmp.data_len) {
                s->pkt.data = (unsigned char *) s->output_channel.data + s->output_channel.data_len;
                s->pkt.size = 1024*1024 - s->output_channel.data_len;
                int got_packet;
                memcpy(s->samples, s->tmp.data + offset, chunk_size);
                int ret = avcodec_encode_audio2(s->codec_ctx, &s->pkt, s->av_frame,
                                &got_packet);
                if(ret) {
                        char errbuf[1024];
                        av_strerror(ret, errbuf, sizeof(errbuf));
                        fprintf(stderr, "Warning: unable to compress audio: %s\n",
                                        errbuf);
                }
                if(got_packet) {
                        s->output_channel.data_len += s->pkt.size;
                        ///@ todo
                        /// well, this is wrong, denominator should be actually AVStream::time_base. Where do
                        /// we get this?? Anyway, seems like it equals sample rate.
                        s->output_channel.duration += s->pkt.duration / (double) s->output_channel.sample_rate;
                }
                offset += chunk_size;
                if(!(s->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE))
                        break;
        }

        s->tmp.data_len -= offset;
        memmove((char *) s->tmp.data, s->tmp.data + offset, s->tmp.data_len);

        ///fprintf(stderr, "%d %d\n", i++% 2, s->output_channel.data_len);
        if(s->output_channel.data_len) {
                return &s->output_channel;
        } else {
                return NULL;
        }
}

static audio_channel *libavcodec_decompress(void *state, audio_channel * channel)
{
        struct libavcodec_codec_state *s = (struct libavcodec_codec_state *) state;
        assert(s->magic == MAGIC);

        if(!audio_desc_eq(s->saved_desc, audio_desc_from_audio_channel(channel))) {
                if(!reinitialize_decoder(s, audio_desc_from_audio_channel(channel))) {
                        fprintf(stderr, "Unable to reinitialize audio decompress!\n");
                        return NULL;
                }
        }

        int offset = 0;
        // FFMPEG buffer needs to be FF_INPUT_BUFFER_PADDING_SIZE longer than data
        unique_ptr<unsigned char []> tmp_buffer(new unsigned char[channel->data_len + FF_INPUT_BUFFER_PADDING_SIZE]);
        memcpy(tmp_buffer.get(), channel->data, channel->data_len);

        s->pkt.data = tmp_buffer.get();
        s->pkt.size = channel->data_len;
        s->output_channel.data_len = 0;
        while (s->pkt.size > 0) {
                int got_frame = 0;

                av_frame_unref(s->av_frame);

                int len = avcodec_decode_audio4(s->codec_ctx, s->av_frame, &got_frame,
                                &s->pkt);
                if (len < 0) {
                        fprintf(stderr, "Error while decoding\n");
                        return NULL;
                }
                if (got_frame) {
                        int channels = 1;
                        /* if a frame has been decoded, output it */
                        int data_size = av_samples_get_buffer_size(NULL, channels,
                                        s->av_frame->nb_samples,
                                        s->codec_ctx->sample_fmt, 1);
                        memcpy((char *) s->output_channel.data + offset, s->av_frame->data[0],
                                        data_size);
                        offset += len;
                        s->output_channel.data_len += data_size;
                }
                s->pkt.size -= len;
                s->pkt.data += len;
                s->pkt.dts = s->pkt.pts = AV_NOPTS_VALUE;
#if 0
                if (s->pkt.size < AUDIO_REFILL_THRESH) {
                        /* Refill the input buffer, to avoid trying to decode
                         * incomplete frames. Instead of this, one could also use
                         * a parser, or use a proper container format through
                         * libavformat. */
                        memmove(inbuf, avpkt.data, avpkt.size);
                        avpkt.data = inbuf;
                        len = fread(avpkt.data + avpkt.size, 1,
                                        AUDIO_INBUF_SIZE - avpkt.size, f);
                        if (len > 0)
                                avpkt.size += len;
                }
#endif
        }

        //
        // perform needed conversions (float->int32, int32->dest_bps)
        //
        assert(s->codec_ctx->sample_fmt != AV_SAMPLE_FMT_DBL && // not supported yet
                        s->codec_ctx->sample_fmt != AV_SAMPLE_FMT_DBLP);

        // convert from float if needed
        if (s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_FLT ||
                        s->codec_ctx->sample_fmt == AV_SAMPLE_FMT_FLTP) {
                unique_ptr<char []> int32_data(unique_ptr<char []>(new char [s->output_channel.data_len]));
                float2int(int32_data.get(), s->output_channel.data, s->output_channel.data_len);
                memcpy((char *) s->output_channel.data, int32_data.get(), s->output_channel.data_len);
                s->output_channel.bps = 4;
        } else {
                s->output_channel.bps =
                        av_get_bytes_per_sample(s->codec_ctx->sample_fmt);
        }

        s->output_channel.sample_rate = s->codec_ctx->sample_rate;

        return &s->output_channel;
}

static void libavcodec_done(void *state)
{
        struct libavcodec_codec_state *s = (struct libavcodec_codec_state *) state;
        assert(s->magic == MAGIC);

        pthread_mutex_lock(s->libav_global_lock);
        avcodec_close(s->codec_ctx);
        pthread_mutex_unlock(s->libav_global_lock);

        rm_release_shared_lock(LAVCD_LOCK_NAME);
        free((void *) s->output_channel.data);
        free((void *) s->tmp.data);
        av_free_packet(&s->pkt);
        av_freep(&s->samples);
        av_frame_free(&s->av_frame);

        free(s);
}

static audio_codec_t supported_codecs[] = { AC_ALAW, AC_MULAW, AC_SPEEX, AC_OPUS, AC_G722, AC_FLAC, AC_MP3, AC_AAC, AC_NONE };
static int supported_bytes_per_second[] = { 1, 2, 3, 4, 0 };

struct audio_codec libavcodec_audio_codec = {
        supported_codecs,
        supported_bytes_per_second,
        libavcodec_init,
        libavcodec_compress,
        libavcodec_decompress,
        libavcodec_done
};