mirror of
https://github.com/outbackdingo/UltraGrid.git
synced 2026-03-20 04:09:03 +00:00
Added CUDA DXT compression
This commit is contained in:
@@ -602,6 +602,10 @@ libavcodec: @LIBAVCODEC_DECOMPRESS_LIB_TARGET@ @LIBAVCODEC_COMPRESS_LIB_TARGET@
|
||||
mkdir -p lib/ultragrid
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_jpeg.so.@video_compress_abi_version@ $^ @JPEG_LIB@ -o $@
|
||||
|
||||
@CUDA_DXT_COMPRESS_LIB_TARGET@: @CUDA_DXT_OBJ@
|
||||
mkdir -p lib/ultragrid
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_cuda_dxt.so.@video_compress_abi_version@ $^ @CUDA_DXT_LIB@ -o $@
|
||||
|
||||
@RTDXT_DECOMPRESS_LIB_TARGET@: @GL_COMMON_OBJ@ @X_OBJ@ @RTDXT_COMMON_OBJ@ @RTDXT_DECOMPRESS_OBJ@ @RTDXT_COMMON_HEADERS@
|
||||
mkdir -p lib/ultragrid
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_rtdxt.so.@video_decompress_abi_version@ @GL_COMMON_OBJ@ @X_OBJ@ @RTDXT_COMMON_OBJ@ @RTDXT_DECOMPRESS_OBJ@ @RTDXT_LIB@ -o $@
|
||||
|
||||
53
configure.ac
53
configure.ac
@@ -1962,6 +1962,50 @@ AC_SUBST(JPEG_DECOMPRESS_OBJ)
|
||||
AC_SUBST(JPEG_INC)
|
||||
AC_SUBST(JPEG_LIB)
|
||||
|
||||
# -------------------------------------------------------------------------------------------------
|
||||
# CUDA DXT
|
||||
# -------------------------------------------------------------------------------------------------
|
||||
CUDA_DXT_OBJ=
|
||||
CUDA_DXT_INC=
|
||||
CUDA_DXT_LIB=
|
||||
|
||||
cuda_dxt=no
|
||||
|
||||
AC_ARG_ENABLE(cuda-dxt,
|
||||
AS_HELP_STRING([--disable-cuda-dxt], [disable CUDA DXT compression (auto)]),
|
||||
[cuda_dxt_req=$enableval],
|
||||
[cuda_dxt_req=auto])
|
||||
|
||||
LIBS=$SAVED_LIBS
|
||||
|
||||
if test "$jpeg_req" != no -a $HAVE_CUDA = yes
|
||||
then
|
||||
cuda_dxt=yes
|
||||
|
||||
CUDA_DXT_LIB=" $CUDA_LIB"
|
||||
CUDA_DXT_INC=$CUDA_INC
|
||||
CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o cuda_dxt/cuda_dxt.cu.o"
|
||||
AC_DEFINE([HAVE_CUDA_DXT], [1], [Build with CUDA DXT support])
|
||||
AC_SUBST(CUDA_DXT_COMPRESS_LIB_TARGET, "lib/ultragrid/vcompress_cuda_dxt.so.$video_compress_abi_version")
|
||||
LIB_TARGETS="$LIB_TARGETS $CUDA_DXT_COMPRESS_LIB_TARGET"
|
||||
LIB_OBJS="$LIB_OBJS src/video_compress/cuda_dxt.o"
|
||||
if test -z "$included_shared_cuda_dxt_cu"; then
|
||||
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o"
|
||||
included_shared_cuda_dxt_cu=yes
|
||||
fi
|
||||
DEFINE_CUDA
|
||||
fi
|
||||
|
||||
if test $cuda_dxt_req = yes -a $cuda_dxt = no; then
|
||||
AC_MSG_ERROR([CUDA DXT not found]);
|
||||
fi
|
||||
|
||||
LIB_MODULES="$LIB_MODULES $CUDA_DXT_LIB"
|
||||
|
||||
AC_SUBST(CUDA_DXT_OBJ)
|
||||
AC_SUBST(CUDA_DXT_INC)
|
||||
AC_SUBST(CUDA_DXT_LIB)
|
||||
|
||||
# -------------------------------------------------------------------------------------------------
|
||||
# JPEG to DXT plugin
|
||||
# -------------------------------------------------------------------------------------------------
|
||||
@@ -1973,14 +2017,19 @@ AC_ARG_ENABLE(jpeg_to_dxt,
|
||||
|
||||
if test $jpeg_env_ok = yes -a $jpeg_to_dxt_req != no
|
||||
then
|
||||
jpeg_to_dxt=yes
|
||||
JPEG_TO_DXT_INC=" $CUDA_INC"
|
||||
JPEG_TO_DXT_LIB=" $CUDA_LIB -lgpujpeg"
|
||||
JPEG_TO_DXT_OBJ="src/video_decompress/jpeg_to_dxt.o cuda_dxt/cuda_dxt.cu.o"
|
||||
AC_SUBST(JPEG_TO_DXT_DECOMPRESS_LIB_TARGET, "lib/ultragrid/vdecompress_jpeg_to_dxt.so.$video_decompress_abi_version")
|
||||
LIB_TARGETS="$LIB_TARGETS $JPEG_TO_DXT_DECOMPRESS_LIB_TARGET"
|
||||
LIB_OBJS="$LIB_OBJS $JPEG_TO_DXT_OBJ"
|
||||
LIB_OBJS="$LIB_OBJS src/video_decompress/jpeg_to_dxt.o"
|
||||
AC_DEFINE([HAVE_JPEG_TO_DXT], [1], [Build with JPEG to DXT transcode support])
|
||||
jpeg_to_dxt=yes
|
||||
if test -z "$included_shared_cuda_dxt_cu"; then
|
||||
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o"
|
||||
included_shared_cuda_dxt_cu=yes
|
||||
fi
|
||||
DEFINE_CUDA
|
||||
fi
|
||||
|
||||
if test $jpeg_to_dxt = no -a $jpeg_to_dxt_req = yes
|
||||
|
||||
@@ -693,6 +693,42 @@ __global__ static void dxt_kernel(const void * src, void * out, int size_x, int
|
||||
dxt_encode<DXT_TYPE>(out, block_idx, r, g, b);
|
||||
}
|
||||
|
||||
__global__ static void yuv422_to_yuv444_kernel(const void * src, void * out, int pix_count) {
|
||||
// coordinates of this thread
|
||||
const int block_idx_x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
// skip if out of bounds
|
||||
if(block_idx_x >= pix_count / 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
uchar4 *this_src = ((uchar4 *) src) + block_idx_x * 2;
|
||||
uchar4 *this_out = ((uchar4 *) out) + block_idx_x * 3;
|
||||
|
||||
uchar4 pix12 = this_src[0];
|
||||
uchar4 pix34 = this_src[1];
|
||||
|
||||
uchar4 out_pix[3];
|
||||
out_pix[0].x = pix12.y;
|
||||
out_pix[0].y = pix12.x;
|
||||
out_pix[0].z = pix12.z;
|
||||
|
||||
out_pix[0].w = pix12.w;
|
||||
out_pix[1].x = pix12.x;
|
||||
out_pix[1].y = pix12.z;
|
||||
|
||||
out_pix[1].z = pix34.y;
|
||||
out_pix[1].w = pix34.x;
|
||||
out_pix[2].x = pix34.z;
|
||||
|
||||
out_pix[2].z = pix34.w;
|
||||
out_pix[2].w = pix34.x;
|
||||
out_pix[2].x = pix34.z;
|
||||
|
||||
this_out[0] = out_pix[0];
|
||||
this_out[1] = out_pix[1];
|
||||
this_out[2] = out_pix[2];
|
||||
}
|
||||
|
||||
/// Compute grid size and launch DXT kernel.
|
||||
template <bool YUV_TO_RGB, int DXT_TYPE>
|
||||
@@ -722,6 +758,14 @@ static int dxt_launch(const void * src, void * out, int sx, int sy, cudaStream_t
|
||||
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
|
||||
}
|
||||
|
||||
int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str) {
|
||||
// grid and threadblock sizes
|
||||
const dim3 tsiz(64, 1);
|
||||
int thread_count = pix_count / 4; // we process block of 4 pixels
|
||||
const dim3 gsiz((thread_count + tsiz.x - 1) / tsiz.x, 1);
|
||||
yuv422_to_yuv444_kernel<<<gsiz, tsiz, 0, str>>>(src, out, pix_count);
|
||||
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
|
||||
}
|
||||
|
||||
/// CUDA DXT1 compression (only RGB without alpha).
|
||||
/// @param src Pointer to top-left source pixel in device-memory buffer.
|
||||
@@ -768,3 +812,8 @@ int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaS
|
||||
int cuda_rgb_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
|
||||
return dxt_launch<false, 6>(src, out, size_x, size_y, stream);
|
||||
}
|
||||
|
||||
int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
|
||||
return dxt_launch<true, 6>(src, out, size_x, size_y, stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -84,6 +84,8 @@ int cuda_rgb_to_dxt6
|
||||
cudaStream_t stream
|
||||
);
|
||||
|
||||
int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream);
|
||||
int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of extern "C" */
|
||||
|
||||
@@ -1100,3 +1100,45 @@ int codec_is_a_rgb(codec_t codec)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns line decoder for specifiedn input and output codec.
|
||||
*/
|
||||
bool get_decoder_from_to(codec_t in, codec_t out, decoder_t *decoder)
|
||||
{
|
||||
struct item {
|
||||
decoder_t decoder;
|
||||
codec_t in;
|
||||
codec_t out;
|
||||
};
|
||||
|
||||
struct item decoders[] = {
|
||||
{ (decoder_t) vc_copylineDVS10, DVS10, UYVY },
|
||||
{ (decoder_t) vc_copylinev210, v210, UYVY },
|
||||
{ (decoder_t) vc_copylineYUYV, YUYV, UYVY },
|
||||
{ (decoder_t) vc_copyliner10k, R10k, RGBA },
|
||||
//{ vc_copylineRGBA, RGBA, RGBA },
|
||||
{ (decoder_t) vc_copylineDVS10toV210, DVS10, v210 },
|
||||
{ (decoder_t) vc_copylineRGBAtoRGB, RGBA, RGB },
|
||||
{ (decoder_t) vc_copylineRGBtoRGBA, RGB, RGBA },
|
||||
// following is disabled - shouldn't be senected automatically
|
||||
//{ (decoder_t) vc_copylineRGBtoUYVY, RGB, UYVY },
|
||||
//{ (decoder_t) vc_copylineUYVYtoRGB, UYVY, RGB },
|
||||
//{ vc_copylineBGRtoUYVY, BGR, UYVY },
|
||||
//{ vc_copylineRGBAtoUYVY, RGBA, UYVY },
|
||||
{ (decoder_t) vc_copylineBGRtoRGB, BGR, RGB },
|
||||
{ (decoder_t) vc_copylineDPX10toRGBA, DPX10, RGBA },
|
||||
{ (decoder_t) vc_copylineDPX10toRGB, DPX10, RGB },
|
||||
//{ vc_copylineRGB, RGB, RGB }.
|
||||
};
|
||||
|
||||
for (unsigned int i = 0; i < sizeof(decoders)/sizeof(struct item); ++i) {
|
||||
if (decoders[i].in == in && decoders[i].out == out) {
|
||||
*decoder = decoders[i].decoder;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -139,6 +139,8 @@ void vc_copylineDPX10toRGB(unsigned char *dst, const unsigned char *src, int dst
|
||||
void vc_copylineRGB(unsigned char *dst, const unsigned char *src, int dst_len,
|
||||
int rshift, int gshift, int bshift);
|
||||
|
||||
bool get_decoder_from_to(codec_t in, codec_t out, decoder_t *decoder);
|
||||
|
||||
int codec_is_a_rgb(codec_t codec);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -49,6 +49,7 @@
|
||||
#include "module.h"
|
||||
#include "video.h"
|
||||
#include "video_compress.h"
|
||||
#include "video_compress/cuda_dxt.h"
|
||||
#include "video_compress/dxt_glsl.h"
|
||||
#include "video_compress/fastdxt.h"
|
||||
#include "video_compress/libavcodec.h"
|
||||
@@ -184,6 +185,16 @@ struct compress_t compress_modules[] = {
|
||||
MK_NAME(libavcodec_compress_tile),
|
||||
NULL
|
||||
},
|
||||
#endif
|
||||
#if defined HAVE_CUDA_DXT || defined BUILD_LIBRARIES
|
||||
{
|
||||
"cuda_dxt",
|
||||
"cuda_dxt",
|
||||
MK_NAME(cuda_dxt_compress_init),
|
||||
MK_NAME(NULL),
|
||||
MK_NAME(cuda_dxt_compress_tile),
|
||||
NULL
|
||||
},
|
||||
#endif
|
||||
{
|
||||
"none",
|
||||
|
||||
260
src/video_compress/cuda_dxt.cpp
Normal file
260
src/video_compress/cuda_dxt.cpp
Normal file
@@ -0,0 +1,260 @@
|
||||
/**
|
||||
* @file video_compress/cuda_dxt.cpp
|
||||
* @author Martin Pulec <pulec@cesnet.cz>
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2012-2013 CESNET z.s.p.o.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, is permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of CESNET nor the names of its contributors may be
|
||||
* used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#include "config_unix.h"
|
||||
#include "config_win32.h"
|
||||
#endif // HAVE_CONFIG_H
|
||||
|
||||
#include "video_compress/cuda_dxt.h"
|
||||
|
||||
#include "cuda_dxt/cuda_dxt.h"
|
||||
|
||||
#include "host.h"
|
||||
#include "module.h"
|
||||
#include "video.h"
|
||||
|
||||
struct state_video_compress_cuda_dxt {
|
||||
state_video_compress_cuda_dxt() {
|
||||
memset(&saved_desc, 0, sizeof(saved_desc));
|
||||
out[0] = out[1] = NULL;
|
||||
in_buffer = NULL;
|
||||
cuda_in_buffer = NULL;
|
||||
cuda_uyvy_buffer = NULL;
|
||||
}
|
||||
struct module module_data;
|
||||
struct video_desc saved_desc;
|
||||
char *in_buffer; ///< for decoded data
|
||||
char *cuda_uyvy_buffer; ///< same as in_buffer but in device memory
|
||||
char *cuda_in_buffer; ///< same as in_buffer but in device memory
|
||||
struct tile *out[2];
|
||||
codec_t in_codec;
|
||||
codec_t out_codec;
|
||||
decoder_t decoder;
|
||||
};
|
||||
|
||||
static void cuda_dxt_compress_done(struct module *mod);
|
||||
|
||||
struct module *cuda_dxt_compress_init(struct module *parent, char *fmt)
|
||||
{
|
||||
state_video_compress_cuda_dxt *s =
|
||||
new state_video_compress_cuda_dxt;
|
||||
|
||||
s->out_codec = DXT1;
|
||||
|
||||
if (fmt && fmt[0] != '\0') {
|
||||
if (strcasecmp(fmt, "DXT5") == 0) {
|
||||
s->out_codec = DXT5;
|
||||
} else if (strcasecmp(fmt, "DXT1") == 0) {
|
||||
s->out_codec = DXT1;
|
||||
} else {
|
||||
printf("usage:\n"
|
||||
"\t-c cuda_dxt[:DXT1|:DXT5]\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
module_init_default(&s->module_data);
|
||||
s->module_data.cls = MODULE_CLASS_DATA;
|
||||
s->module_data.priv_data = s;
|
||||
s->module_data.deleter = cuda_dxt_compress_done;
|
||||
module_register(&s->module_data, parent);
|
||||
|
||||
return &s->module_data;
|
||||
}
|
||||
|
||||
static void cleanup(struct state_video_compress_cuda_dxt *s)
|
||||
{
|
||||
if (s->in_buffer) {
|
||||
free(s->in_buffer);
|
||||
s->in_buffer = NULL;
|
||||
}
|
||||
if (s->cuda_uyvy_buffer) {
|
||||
cudaFree(s->cuda_uyvy_buffer);
|
||||
s->cuda_uyvy_buffer = NULL;
|
||||
}
|
||||
if (s->cuda_in_buffer) {
|
||||
cudaFree(s->cuda_in_buffer);
|
||||
s->cuda_in_buffer = NULL;
|
||||
}
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
if (s->out[i] != NULL) {
|
||||
cudaFree(s->out[i]->data);
|
||||
s->out[i]->data = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video_desc desc)
|
||||
{
|
||||
cleanup(s);
|
||||
|
||||
if (desc.color_spec == RGB || desc.color_spec == UYVY) {
|
||||
s->in_codec = desc.color_spec;
|
||||
} else if (get_decoder_from_to(desc.color_spec, RGB, &s->decoder)) {
|
||||
s->in_codec = RGB;
|
||||
} else if (get_decoder_from_to(desc.color_spec, UYVY, &s->decoder)) {
|
||||
s->in_codec = UYVY;
|
||||
} else {
|
||||
fprintf(stderr, "Unsupported codec: %s\n", get_codec_name(desc.color_spec));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s->in_codec == UYVY) {
|
||||
if (cudaSuccess != cudaMalloc((void **) &s->cuda_uyvy_buffer,
|
||||
desc.width * desc.height * 2)) {
|
||||
fprintf(stderr, "Could not allocate CUDA UYVY buffer.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
s->in_buffer = (char *) malloc(desc.width * desc.height * 3);
|
||||
|
||||
if (cudaSuccess != cudaMalloc((void **) &s->cuda_in_buffer,
|
||||
desc.width * desc.height * 3)) {
|
||||
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
struct video_desc compressed_desc = desc;
|
||||
compressed_desc.color_spec = s->out_codec;
|
||||
|
||||
s->out[i] = tile_alloc_desc(compressed_desc);
|
||||
s->out[i]->data_len = desc.width * desc.height / (s->out_codec == DXT1 ? 2 : 1);
|
||||
if (cudaSuccess != cudaMallocHost((void **) &s->out[i]->data,
|
||||
s->out[i]->data_len)) {
|
||||
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct tile *cuda_dxt_compress_tile(struct module *mod, struct tile *tx, struct video_desc *desc,
|
||||
int buffer)
|
||||
{
|
||||
struct state_video_compress_cuda_dxt *s =
|
||||
(struct state_video_compress_cuda_dxt *) mod->priv_data;
|
||||
|
||||
cudaSetDevice(cuda_devices[0]);
|
||||
|
||||
if (!video_desc_eq(*desc, s->saved_desc)) {
|
||||
if(configure_with(s, *desc)) {
|
||||
s->saved_desc = *desc;
|
||||
} else {
|
||||
fprintf(stderr, "[CUDA DXT] Reconfiguration failed!\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
char *in_buffer;
|
||||
if (desc->color_spec == s->in_codec) {
|
||||
in_buffer = tx->data;
|
||||
} else {
|
||||
unsigned char *line1 = (unsigned char *) tx->data;
|
||||
unsigned char *line2 = (unsigned char *) s->in_buffer;
|
||||
|
||||
for (int i = 0; i < (int) tx->height; ++i) {
|
||||
s->decoder(line2, line1, vc_get_linesize(tx->width, s->in_codec),
|
||||
0, 8, 16);
|
||||
line1 += vc_get_linesize(tx->width, desc->color_spec);
|
||||
line2 += vc_get_linesize(tx->width, s->in_codec);
|
||||
}
|
||||
in_buffer = s->in_buffer;
|
||||
}
|
||||
|
||||
if (s->in_codec == UYVY) {
|
||||
if (cudaMemcpy(s->cuda_uyvy_buffer, in_buffer, desc->width * desc->height * 2,
|
||||
cudaMemcpyHostToDevice) != cudaSuccess) {
|
||||
fprintf(stderr, "Memcpy failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
return NULL;
|
||||
}
|
||||
if (cuda_yuv422_to_yuv444(s->cuda_uyvy_buffer, s->cuda_in_buffer,
|
||||
desc->width * desc->height, 0) != 0) {
|
||||
fprintf(stderr, "UYVY kernel failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
}
|
||||
} else {
|
||||
if (cudaMemcpy(s->cuda_in_buffer, in_buffer, desc->width * desc->height * 3,
|
||||
cudaMemcpyHostToDevice) != cudaSuccess) {
|
||||
fprintf(stderr, "Memcpy failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int (*cuda_dxt_enc_func)(const void * src, void * out, int size_x, int size_y, cudaStream_t stream);
|
||||
|
||||
if (s->out_codec == DXT1) {
|
||||
if (s->in_codec == RGB) {
|
||||
cuda_dxt_enc_func = cuda_rgb_to_dxt1;
|
||||
} else {
|
||||
cuda_dxt_enc_func = cuda_yuv_to_dxt1;
|
||||
}
|
||||
} else {
|
||||
if (s->in_codec == RGB) {
|
||||
cuda_dxt_enc_func = cuda_rgb_to_dxt6;
|
||||
} else {
|
||||
cuda_dxt_enc_func = cuda_yuv_to_dxt6;
|
||||
}
|
||||
}
|
||||
int ret = cuda_dxt_enc_func(s->cuda_in_buffer, s->out[buffer]->data, s->saved_desc.width,
|
||||
s->saved_desc.height, 0);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Encoding failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
desc->color_spec = s->out_codec;
|
||||
return s->out[buffer];
|
||||
}
|
||||
|
||||
static void cuda_dxt_compress_done(struct module *mod)
|
||||
{
|
||||
struct state_video_compress_cuda_dxt *s =
|
||||
(struct state_video_compress_cuda_dxt *) mod->priv_data;
|
||||
|
||||
cleanup(s);
|
||||
|
||||
delete s;
|
||||
}
|
||||
|
||||
55
src/video_compress/cuda_dxt.h
Normal file
55
src/video_compress/cuda_dxt.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* @file video_compress/cuda_dxt.h
|
||||
* @author Martin Pulec <pulec@cesnet.cz>
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013 CESNET z.s.p.o.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, is permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of CESNET nor the names of its contributors may be
|
||||
* used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#define JPEG_TO_DXT_MAGIC 0x20BF0088
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
struct module;
|
||||
|
||||
struct module *cuda_dxt_compress_init(struct module *parent, char * opts);
|
||||
struct tile *cuda_dxt_compress_tile(struct module *mod, struct tile *tx, struct video_desc *desc,
|
||||
int buffer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
Reference in New Issue
Block a user