Files
UltraGrid/src/video_display/vrg.cpp
2021-05-18 11:13:44 +02:00

429 lines
15 KiB
C++

/**
* @file video_display/vrg.cpp
* @author Martin Pulec <pulec@cesnet.cz>
*/
/*
* Copyright (c) 2020-2021 CESNET, z. s. p. o.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of CESNET nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "config_win32.h"
#include <chrono>
#include <condition_variable>
#include <iostream>
#include <mutex>
#include <rtp/rtp.h>
#include <queue>
#include <string>
#include <unordered_map>
// VrgInputFormat::RGBA conflicts with codec_t::RGBA
#define RGBA VR_RGBA
#ifdef HAVE_VRG_H
#include <vrgstream.h>
#else
#include "vrgstream-fallback.h"
#endif
#undef RGBA
#include "vrgstream-serialize.hpp"
#include "debug.h"
#include "lib_common.h"
#include "utils/misc.h"
#include "utils/video_frame_pool.h"
#include "video.h"
#include "video_display.h"
#define MAX_QUEUE_SIZE 1
#define MOD_NAME "[VRG] "
#define MAGIC_VRG to_fourcc('V', 'R', 'G', ' ')
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#endif
using std::chrono::duration_cast;
using std::chrono::high_resolution_clock;
using std::chrono::microseconds;
using std::chrono::seconds;
using std::condition_variable;
using std::cout;
using std::mutex;
using std::queue;
using std::unique_lock;
using std::unordered_map;
using namespace std::string_literals;
#ifdef HAVE_CUDA
struct cuda_malloc_host_allocate {
cudaError_t operator()(void **ptr, size_t size) {
return cudaMallocHost(ptr, size);
}
};
struct cuda_malloc_managed_allocate {
cudaError_t operator()(void **ptr, size_t size) {
return cudaMallocManaged(ptr, size);
}
};
#endif
template<typename cuda_allocator>
struct vrg_cuda_allocator : public video_frame_pool_allocator {
void *allocate(size_t size) override {
void *ptr = nullptr;
#ifdef HAVE_CUDA
if (cuda_allocator()(&ptr, size) != cudaSuccess) {
LOG(LOG_LEVEL_ERROR) << MOD_NAME << "Cannot alloc CUDA buffer!\n";
return nullptr;
}
#else
LOG(LOG_LEVEL_WARNING) << MOD_NAME << "CUDA not compiled in, falling back to plain malloc!\n";
return malloc(size);
#endif
return ptr;
}
void deallocate(void *ptr) override {
#ifdef HAVE_CUDA
cudaFree(ptr);
#else
free(ptr);
#endif
}
video_frame_pool_allocator *clone() const override {
return new vrg_cuda_allocator(*this);
}
};
struct state_vrg {
uint32_t magic;
struct video_desc saved_desc;
#ifdef HAVE_CUDA
video_frame_pool pool{0, vrg_cuda_allocator<cuda_malloc_host_allocate>()};
#else
video_frame_pool pool{0, vrg_cuda_allocator<default_data_allocator>()};
#endif
high_resolution_clock::time_point t0 = high_resolution_clock::now();
long long int frames;
long long int frames_last;
struct rtp *rtp;
queue<struct video_frame *> queue;
mutex lock;
condition_variable cv;
unordered_map<uint32_t, high_resolution_clock::time_point> render_packet_ts;
};
static void display_vrg_probe(struct device_info **available_cards, int *count, void (**deleter)(void *))
{
*deleter = free;
*available_cards = NULL;
*count = 0;
}
static void *display_vrg_init(struct module *parent, const char *fmt, unsigned int flags)
{
UNUSED(flags);
UNUSED(parent);
if ("help"s == fmt) {
cout << "Usage:\n\t-d vrg[:managed|:malloc]\n";
cout << "where\n\tmanaged - use managed memory\n";
return NULL;
}
struct state_vrg *s = new state_vrg();
if (s == NULL) {
return NULL;
}
s->magic = MAGIC_VRG;
if ("managed"s == fmt) {
#ifdef HAVE_CUDA
s->pool.replace_allocator(vrg_cuda_allocator<cuda_malloc_managed_allocate>());
#endif
} else if ("malloc"s == fmt) {
s->pool.replace_allocator(default_data_allocator());
} else if (fmt && strlen(fmt) > 0) {
LOG(LOG_LEVEL_ERROR) << MOD_NAME << "Wrong option: " << fmt << "\n";
delete s;
return nullptr;
}
return s;
}
static void render_packet_ts_gc(struct state_vrg *s)
{
auto now = high_resolution_clock::now();
for (auto it = s->render_packet_ts.begin(); it != s->render_packet_ts.end(); ) {
if (duration_cast<seconds>(now - it->second).count() > 60) {
it = s->render_packet_ts.erase(it);
} else {
++it;
}
}
}
static void display_vrg_run(void *state)
{
struct state_vrg *s = (struct state_vrg *) state;
codec_t configured_codec = VIDEO_CODEC_NONE;
while (1) {
unique_lock<mutex> lk(s->lock);
s->cv.wait(lk, [s]{ return s->queue.size() > 0; });
struct video_frame *f = s->queue.front();
s->queue.pop();
lk.unlock();
if (f == nullptr) { // poison pill
break;
}
if (configured_codec != f->color_spec) {
enum VrgInputFormat vrg_format{};
switch (f->color_spec) {
case CUDA_I420:
case I420:
vrg_format = YUV420;
break;
case CUDA_RGBA:
case RGBA:
vrg_format = VR_RGBA;
break;
default:
abort();
}
enum VrgStreamApiError ret = vrgStreamInit(vrg_format);
if (ret != Ok) {
LOG(LOG_LEVEL_ERROR) << MOD_NAME "Initialization failed: " << ret << "\n";
vf_free(f);
continue;
}
configured_codec = f->color_spec;
}
struct RenderPacket render_packet{};
enum VrgStreamApiError ret;
high_resolution_clock::time_point render_frame_start = high_resolution_clock::now();
ret = vrgStreamRenderFrame(&render_packet);
high_resolution_clock::time_point render_frame_end = high_resolution_clock::now();
if (ret != Ok) {
LOG(LOG_LEVEL_ERROR) << MOD_NAME "Render Frame failed: " << ret << "\n";
} else {
double seconds = duration_cast<microseconds>(render_frame_end - render_frame_start).count() / 1000000.0;
LOG(LOG_LEVEL_DEBUG) << MOD_NAME "Received RenderPacket for frame " << render_packet.frame << ": " << render_packet << ", dur: " << seconds << " s.\n";
if (render_packet.pix_width_eye > 0 && render_packet.pix_height_eye > 0 &&
render_packet.pix_width_eye <= 7680 && render_packet.pix_height_eye <= 4320) {
s->render_packet_ts.emplace(render_packet.frame, high_resolution_clock::now());
rtp_send_rtcp_app(s->rtp, "VIEW", sizeof render_packet, (char *) &render_packet);
} else {
LOG(LOG_LEVEL_ERROR) << MOD_NAME "Wrong RenderPacket dimensions: " << render_packet << "\n";
}
}
if (f->render_packet.pix_width_eye != 0 && f->render_packet.pix_height_eye != 0) {
high_resolution_clock::time_point t_start = high_resolution_clock::now();
ret = vrgStreamSubmitFrame(&f->render_packet, f->tiles[0].data, CPU);
if (ret != Ok) {
LOG(LOG_LEVEL_ERROR) << MOD_NAME "Submit Frame failed: " << ret << "\n";
}
high_resolution_clock::time_point now = high_resolution_clock::now();
high_resolution_clock::time_point render_packet_start = s->render_packet_ts.find(f->render_packet.frame) != s->render_packet_ts.end()
? s->render_packet_ts.at(f->render_packet.frame) : high_resolution_clock::time_point();
double render_packet_rtt = duration_cast<microseconds>(now - render_packet_start).count() / 1000000.0;
LOG(LOG_LEVEL_DEBUG) << "[VRG] Frame submit took " <<
duration_cast<microseconds>(now - t_start).count() / 1000000.0
<< " seconds, RTT: " << render_packet_rtt << "s\n";
s->frames += 1;
} else {
LOG(LOG_LEVEL_VERBOSE) << MOD_NAME << "Dismissed frame with zero dimensions!\n";
}
high_resolution_clock::time_point now = high_resolution_clock::now();
double seconds = duration_cast<microseconds>(now - s->t0).count() / 1000000.0;
if (seconds >= 5) {
long long frames = s->frames - s->frames_last;
LOG(LOG_LEVEL_INFO) << "[VRG] " << frames << " frames in "
<< seconds << " seconds = " << frames / seconds << " FPS\n";
render_packet_ts_gc(s);
s->t0 = now;
s->frames_last = s->frames;
}
vf_free(f);
}
}
static void display_vrg_done(void *state)
{
struct state_vrg *s = (struct state_vrg *) state;
assert(s->magic == MAGIC_VRG);
delete s;
}
static struct video_frame *display_vrg_getf(void *state)
{
struct state_vrg *s = (struct state_vrg *) state;
assert(s->magic == MAGIC_VRG);
return s->pool.get_pod_frame();
}
static int display_vrg_putf(void *state, struct video_frame *frame, int flags)
{
struct state_vrg *s = (struct state_vrg *) state;
assert(s->magic == MAGIC_VRG);
if (flags == PUTF_DISCARD) {
vf_free(frame);
return 0;
}
unique_lock<mutex> lk(s->lock);
if ((flags & PUTF_NONBLOCK) != 0u && s->queue.size() >= MAX_QUEUE_SIZE) {
LOG(LOG_LEVEL_DEBUG) << MOD_NAME << "Dropping frame - queue full!\n";
vf_free(frame);
return 1;
}
s->queue.push(frame);
lk.unlock();
s->cv.notify_one();
return 0;
}
static int display_vrg_ctl_property(void *state, int property, void *val, size_t *len)
{
struct state_vrg *s = (struct state_vrg *) state;
codec_t codecs[] = {
#ifdef HAVE_CUDA
CUDA_I420,
CUDA_RGBA,
#endif
I420,
RGBA,
};
int rgb_shift[] = {0, 8, 16};
switch (property) {
case DISPLAY_PROPERTY_CODECS:
if(sizeof(codecs) > *len) {
return FALSE;
}
memcpy(val, codecs, sizeof(codecs));
*len = sizeof(codecs);
break;
case DISPLAY_PROPERTY_RGB_SHIFT:
if(sizeof(rgb_shift) > *len) {
return FALSE;
}
memcpy(val, rgb_shift, sizeof(rgb_shift));
*len = sizeof(rgb_shift);
break;
case DISPLAY_PROPERTY_BUF_PITCH:
*(int *) val = PITCH_DEFAULT;
*len = sizeof(int);
break;
case DISPLAY_PROPERTY_S_RTP:
s->rtp = *(struct rtp **) val;
break;
default:
return FALSE;
}
return TRUE;
}
static int display_vrg_reconfigure(void *state, struct video_desc desc)
{
struct state_vrg *s = (struct state_vrg *) state;
assert(s->magic == MAGIC_VRG);
assert(desc.color_spec == CUDA_I420 || desc.color_spec == CUDA_RGBA || desc.color_spec == RGBA || desc.color_spec == I420);
s->saved_desc = desc;
#if 0
if (desc.color_spec == CUDA_I420 || desc.color_spec == CUDA_RGBA) {
s->pool.replace_allocator(vrg_cuda_allocator());
} else {
s->pool.replace_allocator(default_data_allocator());
}
#endif
// use headroom - VRG uses pitches
s->pool.reconfigure(desc, vc_get_datalen(desc.width + 2048, desc.height, desc.color_spec));
return TRUE;
}
static void display_vrg_put_audio_frame(void *state, struct audio_frame *frame)
{
UNUSED(state);
UNUSED(frame);
}
static int display_vrg_reconfigure_audio(void *state, int quant_samples, int channels,
int sample_rate)
{
UNUSED(state);
UNUSED(quant_samples);
UNUSED(channels);
UNUSED(sample_rate);
return FALSE;
}
static const struct video_display_info display_vrg_info = {
display_vrg_probe,
display_vrg_init,
display_vrg_run,
display_vrg_done,
display_vrg_getf,
display_vrg_putf,
display_vrg_reconfigure,
display_vrg_ctl_property,
display_vrg_put_audio_frame,
display_vrg_reconfigure_audio,
DISPLAY_DOESNT_NEED_MAINLOOP,
};
REGISTER_MODULE(vrg, &display_vrg_info, LIBRARY_CLASS_VIDEO_DISPLAY, VIDEO_DISPLAY_ABI_VERSION);