Windows: enable CUDA stuff

+ make VS project for for GPUJPEG
This commit is contained in:
Martin Pulec
2013-08-23 15:16:44 +02:00
parent dc40fc274b
commit ac28e9eb17
45 changed files with 1048 additions and 362 deletions

View File

@@ -153,7 +153,8 @@ modules: @LIB_TARGETS@
$(TARGET): $(OBJS) $(ULTRAGRID_OBJS) $(HEADERS)
@if [ ! -d bin ]; then mkdir bin; fi
$(LINKER) -rdynamic $(LDFLAGS) $(OBJS) $(ULTRAGRID_OBJS) $(LIBS) -o $(TARGET)
$(LINKER) $(LDFLAGS) $(OBJS) $(ULTRAGRID_OBJS) $(LIBS) -o $(TARGET)
if [ -n "@DLL_LIBS@" ]; then $(INSTALL) -m 644 @DLL_LIBS@ bin; fi
bin/import_control_keyboard: src/import_control_keyboard.o
$(LINKER) $(LDFLAGS) $< @IMPORT_CONTROL_KEYBOARD_LIBS@ -o $@
@@ -173,7 +174,9 @@ $(REFLECTOR_TARGET): $(OBJS) $(HEADERS) $(REFLECTOR_OBJS)
# Pattern rule for compiling CUDA files
%.cu.o: %.cu
$(NVCC) $(NVCCFLAGS) -Xcompiler -fPIC -c $< -o $@
$(NVCC) $(NVCCFLAGS) $(INC) -c $< -o $@
%.cu.lib: %.cu
$(NVCC) $(NVCCFLAGS) -DEXPORT_DLL_SYMBOLS $(INC) --shared $< -o $<.dll
src/audio/resample.o:
$(CC) $(CFLAGS) $(INC) -DEXPORT="" -DRANDOM_PREFIX=speex -DFLOATING_POINT -DOUTSIDE_SPEEX -I. -I speex-1.2rc1/include/speex -Iinclude -fvisibility=hidden -c speex-1.2rc1/libspeex/resample.c -fPIC -DPIC -o $@
@@ -415,7 +418,7 @@ clean:
-rm -rf $(BUNDLE)
-rm -rf $(PERF) src/uv_perf.o
-rm -rf $(REFLECTOR_TARGET) $(REFLECTOR_OBJS)
-rm -rf @LIB_OBJS@ @LIB_TARGETS@ @LIB_HEADERS@ @X_OBJ@ @GL_COMMON_OBJ@
-rm -rf @LIB_OBJS@ @LIB_TARGETS@ @LIB_HEADERS@ @X_OBJ@ @GL_COMMON_OBJ@ @CUDA_COMMON_OBJ@
-rm -rf bin/import_control_keyboard
[ ! -f gpujpeg/Makefile ] || make -C gpujpeg/ clean
[ -z "@FASTDXT_PATH@" ] || make -C @FASTDXT_PATH@/ clean
@@ -581,9 +584,9 @@ libavcodec: @LIBAVCODEC_DECOMPRESS_LIB_TARGET@ @LIBAVCODEC_COMPRESS_LIB_TARGET@
mkdir -p lib/ultragrid
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_jpeg.so.@video_compress_abi_version@ $^ @JPEG_LIB@ -o $@
@CUDA_DXT_COMPRESS_LIB_TARGET@: @CUDA_DXT_OBJ@
@CUDA_DXT_COMPRESS_LIB_TARGET@: @CUDA_DXT_OBJ@ @CUDA_COMMON_OBJ@
mkdir -p lib/ultragrid
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_cuda_dxt.so.@video_compress_abi_version@ $^ @CUDA_DXT_LIB@ -o $@
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_cuda_dxt.so.@video_compress_abi_version@ $^ @CUDA_DXT_LIB@ @CUDA_COMMON_OBJ@ -o $@
@RTDXT_DECOMPRESS_LIB_TARGET@: @GL_COMMON_OBJ@ @X_OBJ@ @RTDXT_COMMON_OBJ@ @RTDXT_DECOMPRESS_OBJ@ @RTDXT_COMMON_HEADERS@
mkdir -p lib/ultragrid
@@ -593,9 +596,9 @@ libavcodec: @LIBAVCODEC_DECOMPRESS_LIB_TARGET@ @LIBAVCODEC_COMPRESS_LIB_TARGET@
mkdir -p lib/ultragrid
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg.so.@video_decompress_abi_version@ $^ @JPEG_LIB@ -o $@
@JPEG_TO_DXT_DECOMPRESS_LIB_TARGET@: @JPEG_TO_DXT_OBJ@ @JPEG_DECOMPRESS_OBJ@ @JPEG_COMMON_OBJ@
@JPEG_TO_DXT_DECOMPRESS_LIB_TARGET@: @JPEG_TO_DXT_OBJ@ @JPEG_DECOMPRESS_OBJ@ @JPEG_COMMON_OBJ@ @CUDA_COMMON_OBJ@
mkdir -p lib/ultragrid
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg_to_dxt.so.@video_decompress_abi_version@ $^ @JPEG_TO_DXT_LIB@ -o $@
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg_to_dxt.so.@video_decompress_abi_version@ $^ @JPEG_TO_DXT_LIB@ @CUDA_COMMON_OBJ@ -o $@
@ALSA_PLAY_LIB_TARGET@: @ALSA_PLAY_OBJ@
@@ -647,6 +650,7 @@ install: all
$(INSTALL) -m 755 data/ultragrid-bugreport-collect.sh $(DESTDIR)/$(uv_datadir)
$(INSTALL) -d -m 755 $(DESTDIR)/$(docdir)
$(INSTALL) -m 644 $(DOCS) $(DESTDIR)/$(docdir)
$(INSTALL) -m 644 @DLL_LIBS@ $(DESTDIR)/$(bindir)
uninstall:
$(RM) $(DESTDIR)/$(bindir)/uv

View File

@@ -3,17 +3,6 @@ set -e
[ -d m4 ] || mkdir m4
# variables
if [ `uname -s` = "Darwin" ]; then
LIBTOOLIZE=glibtoolize
else if [ `uname -s` = "Linux" ]; then
LIBTOOLIZE=libtoolize
else # Windows
LIBTOOLIZE=true
fi
fi
srcdir=`dirname $0`
test -z "$srcdir" && srcdir=.
@@ -22,7 +11,6 @@ ORIGDIR=`pwd`
cd $srcdir
aclocal
autoheader
$LIBTOOLIZE --copy
autoconf
$srcdir/configure --enable-gpl $@

View File

@@ -5,7 +5,6 @@ AM_INIT_AUTOMAKE([1.10])
AC_PREREQ(2.61)
AC_CONFIG_SRCDIR([src/main.c])
AC_CONFIG_MACRO_DIR([m4])
LT_INIT
PARENT=`echo $PWD | sed -e 's%/[[^/]]*$%%'`
@@ -161,7 +160,12 @@ then
CFLAGS="$CFLAGS -fPIC"
CXXFLAGS="$CXXFLAGS -fPIC"
NVCCFLAGS="$NVCCFLAGS -Xcompiler -fPIC"
CU_SUFFIX="cu.o"
else
NVCCFLAGS="-m32"
CU_SUFFIX="cu.lib"
fi
AC_SUBST(NVCCFLAGS)
AH_BOTTOM([
/*
@@ -237,19 +241,30 @@ AC_DEFUN([DEFINE_GL], [
AC_SUBST(GL_COMMON_OBJ)
])
AC_DEFUN([DEFINE_CUDA], [
if test -z "$cuda_var_defined"; then
AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system])
LINKER=$CXX
POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in JPEG support.\nIn order to use use JPEG compression and \
AC_DEFUN([CUDA_MESSAGE], [
if test -z "$cuda_msg_defined"; then
POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in sort of CUDA code.\nIn order to use use it compression and \
decompression, you will need to have CUDA libraries visible to your OS.\n\
If not done so, you can accomplish this by adding line:\n\
export LD_LIBRARY_PATH=$CUDA_LIB_PATH:\\\$\$LD_LIBRARY_PATH\n\
to your .bashrc file (in home directory). To take effect immediatelly, you will need to enter:\n\
exec bash\n***\n"
cuda_msg_defined=yes
fi
])
AC_DEFUN([DEFINE_CUDA], [
if test -z "$cuda_var_defined"; then
AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system])
CUDA_COMMON_OBJ=src/cuda_wrapper.$CU_SUFFIX
if test $system = Windows; then
DLL_LIBS="$DLL_LIBS src/cuda_wrapper.cu.dll"
fi
AC_SUBST(CUDA_COMMON_OBJ)
cuda_var_defined=yes
fi
])
AC_SUBST(DLL_LIBS)
AC_MSG_CHECKING([if_nametoindex])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
@@ -402,7 +417,8 @@ if test "$build_libraries" = yes
then
if test "$system" = "Linux"
then
LIBS+=' -ldl'
LIBS="$LIBS -ldl"
LDFLAGS+="-rdynamic -ldl"
else
AC_MSG_WARN([*** Building libraries is not supported with other system than Linux])
build_libraries=no
@@ -438,7 +454,7 @@ AC_SUBST(vo_pp_abi_version)
# ------------------------------------------------------------------------------------------------
# environment variables
#
AC_ARG_VAR([CUDA_DIRECTORY], [Directory of your Nvidia toolkit instalation.])
AC_ARG_VAR([CUDA_PATH], [Directory of your Nvidia toolkit instalation.])
AC_ARG_VAR([SAGE_DIRECTORY], [Directory of your SAGE installation.])
AC_ARG_VAR([DVS_DIRECTORY], [Path to DVS installation.])
AC_ARG_VAR([DELTACAST_DIRECTORY], [Placement of VideoMasterHD directory (Deltacast).])
@@ -690,7 +706,6 @@ then
DECKLINK_COMMON="src/video_capture/DeckLinkAPIDispatch.o"
fi
AC_DEFINE([HAVE_DECKLINK], [1], [Build with DeckLink support])
LINKER=$CXX
AC_SUBST(DECKLINK_CAP_LIB_TARGET, "lib/ultragrid/vidcap_decklink.so.$video_capture_abi_version")
AC_SUBST(DECKLINK_DISP_LIB_TARGET, "lib/ultragrid/display_decklink.so.$video_display_abi_version")
AC_SUBST(DECKLINK_SOUND_PLAY_LIB_TARGET, "lib/ultragrid/aplay_decklink.so.$audio_playback_abi_version")
@@ -1655,7 +1670,6 @@ then
FASTDXT_LIB=""
FASTDXT_OBJ="$FASTDXT_OBJ src/video_compress/fastdxt.o ${FASTDXT_PATH}/libdxt.a"
AC_DEFINE([HAVE_FASTDXT], [1], [Build with support for FastDXT])
LINKER=$CXX
AC_SUBST(FASTDXT_LIB_TARGET, "lib/ultragrid/vcompress_fastdxt.so.$video_compress_abi_version")
LIB_TARGETS="$LIB_TARGETS $FASTDXT_LIB_TARGET"
LIB_OBJS="$LIB_OBJS $FASTDXT_OBJ"
@@ -1782,10 +1796,8 @@ AC_SUBST(UYVY_LIB)
CUDA_INC=
CUDA_LIB=
NVCC=
NVCCFLAGS=
CUDA_LIB_PATH=
FOUND_CUDA=no
HAVE_CUDA=no
CUDA_PATH=$CUDA_DIRECTORY
AC_ARG_WITH(cuda,
@@ -1797,18 +1809,12 @@ SAVED_LIBS=$LIBS
LIBS="$LIBS -lcudart"
if test -z "$CUDA_PATH"
then
AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <cuda_runtime_api.h>
]],
[[int devices_count;
cudaGetDeviceCount(&devices_count);
]])],FOUND_CUDA=yes,FOUND_CUDA=no)
AC_CHECK_PROG([NVCC], [nvcc], [nvcc],
[])
fi
if test $FOUND_CUDA = no -o -z "$NVCC"
if test -z "$NVCC"
then
FOUND_CUDA=no
NVCC=
if test -z "$CUDA_PATH"; then
CUDA_PATH=/usr/local/cuda
@@ -1821,11 +1827,6 @@ then
else
LIBS="$LIBS -L$CUDA_PATH/lib"
fi
AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <cuda_runtime_api.h>
]],
[[int devices_count;
cudaGetDeviceCount(&devices_count);
]])],FOUND_CUDA=yes,FOUND_CUDA=no)
CFLAGS=$SAVED_CFLAGS
AC_CHECK_PROG([NVCC], [nvcc], [$CUDA_PATH/bin/nvcc],
[],
@@ -1834,9 +1835,17 @@ fi
LIBS=$SAVED_LIBS
if test -n "$NVCC" -a $FOUND_CUDA = yes
if test -n "$NVCC"
then
HAVE_CUDA=yes
if test $system = Windows; then
AC_CHECK_PROG([CL], [cl], [cl],
[])
if test -n $CL; then
FOUND_CUDA=yes
fi
else
FOUND_CUDA=yes
fi
AC_MSG_CHECKING([CUDA Toolkit version - major])
nvcc_major=`$NVCC --version |grep release|sed 's/^.*release \(@<:@0-9@:>@@<:@0-9@:>@*\).*$/\1/'`
AC_MSG_RESULT($nvcc_major)
@@ -1859,10 +1868,8 @@ then
CUDA_LIB="-L$CUDA_LIB_PATH -lcudart"
fi
AC_SUBST(HAVE_CUDA)
AC_SUBST(NVCC)
AC_SUBST(FOUND_CUDA)
AC_SUBST(NVCCFLAGS)
AC_SUBST(CUDA_PATH)
AC_SUBST(CUDA_LIB)
AC_SUBST(CUDA_INC)
@@ -1893,46 +1900,23 @@ CFLAGS=$SAVED_CFLAGS
CXXFLAGS=$SAVED_CXXFLAGS
CPPFLAGS=$SAVED_CPPFLAGS
if test $HAVE_CUDA = yes
if test $system = Windows
then
if test \( $nvcc_major -gt 4 \) -o \( $nvcc_major -eq 4 -a $nvcc_minor -ge 1 \) -o $version_check = no
then
jpeg_cuda_version_ok=yes
else
AC_MSG_WARN([*** CUDA Toolkit older than 4.1 detected. Please install at least v4.1 to use JPEG.])
HM_VERSION_WARNING
fi
if test $version_check = no -o \
\( $system = Linux -o $os_version_major -ge 10 \) # Linux or Mac at least Snow Leopard
then
jpeg_os_version_ok=yes
else
AC_MSG_WARN([*** Detected unsupported OS version for CUDA.])
HM_VERSION_WARNING
fi
JPEG_LIB="$JPEG_LIB -Lgpujpeg/Release"
DLL_LIBS="$DLL_LIBS gpujpeg/Release/gpujpeg.dll"
fi
SAVED_LIBS=$LIBS
LIBS="$LIBS $JPEG_LIB"
AC_CHECK_LIB([gpujpeg], [gpujpeg_encoder_create])
LIBS=$SAVED_LIBS
# Used also by JPEG_TO_DXT
if test "$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes -a \
$HAVE_CUDA = yes -a "$jpeg_cuda_version_ok" = yes -a \
"$jpeg_os_version_ok" = yes
then
jpeg_env_ok=yes
else
jpeg_env_ok=no
fi
if test "$jpeg_req" != no -a $jpeg_env_ok = yes
if test "$jpeg_req" != no -a \
"$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes
then
jpeg=yes
JPEG_LIB=" $CUDA_LIB"
JPEG_INC=$CUDA_INC
JPEG_INC="$JPEG_INC -Igpujpeg"
JPEG_COMMON_OBJ="src/video_compress/jpeg.o"
JPEG_LIB="$JPEG_LIB -lgpujpeg"
JPEG_DECOMPRESS_OBJ="src/video_decompress/jpeg.o "
@@ -1941,7 +1925,7 @@ then
AC_SUBST(JPEG_DECOMPRESS_LIB_TARGET, "lib/ultragrid/vdecompress_jpeg.so.$video_decompress_abi_version")
LIB_TARGETS="$LIB_TARGETS $JPEG_COMPRESS_LIB_TARGET $JPEG_DECOMPRESS_LIB_TARGET"
LIB_OBJS="$LIB_OBJS $JPEG_DECOMPRESS_OBJ $JPEG_COMMON_OBJ"
DEFINE_CUDA
CUDA_MESSAGE
fi
if test $jpeg_req = yes -a $jpeg = no; then
@@ -1965,6 +1949,16 @@ CUDA_DXT_LIB=
cuda_dxt=no
AC_DEFUN([ADD_CUDA_DXT_OBJ], [
if test -z "$included_shared_cuda_dxt_cu"; then
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.$CU_SUFFIX"
if test $system = Windows; then
DLL_LIBS="$DLL_LIBS cuda_dxt/cuda_dxt.cu.dll"
fi
included_shared_cuda_dxt_cu=yes
fi
])
AC_ARG_ENABLE(cuda-dxt,
AS_HELP_STRING([--disable-cuda-dxt], [disable CUDA DXT compression (auto)]),
[cuda_dxt_req=$enableval],
@@ -1972,22 +1966,20 @@ AC_ARG_ENABLE(cuda-dxt,
LIBS=$SAVED_LIBS
if test "$cuda_dxt_req" != no -a $HAVE_CUDA = yes
if test "$cuda_dxt_req" != no -a $FOUND_CUDA = yes
then
cuda_dxt=yes
CUDA_DXT_LIB=" $CUDA_LIB"
CUDA_DXT_INC=$CUDA_INC
CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o cuda_dxt/cuda_dxt.cu.o"
CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o cuda_dxt/cuda_dxt.$CU_SUFFIX"
AC_DEFINE([HAVE_CUDA_DXT], [1], [Build with CUDA DXT support])
AC_SUBST(CUDA_DXT_COMPRESS_LIB_TARGET, "lib/ultragrid/vcompress_cuda_dxt.so.$video_compress_abi_version")
LIB_TARGETS="$LIB_TARGETS $CUDA_DXT_COMPRESS_LIB_TARGET"
LIB_OBJS="$LIB_OBJS src/video_compress/cuda_dxt.o"
if test -z "$included_shared_cuda_dxt_cu"; then
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o"
included_shared_cuda_dxt_cu=yes
fi
ADD_CUDA_DXT_OBJ
DEFINE_CUDA
CUDA_MESSAGE
fi
if test $cuda_dxt_req = yes -a $cuda_dxt = no; then
@@ -2009,21 +2001,20 @@ AC_ARG_ENABLE(jpeg_to_dxt,
[jpeg_to_dxt_req=$enableval],
[jpeg_to_dxt_req=auto])
if test $jpeg_env_ok = yes -a $jpeg_to_dxt_req != no
if test $jpeg_to_dxt_req != no -a $FOUND_CUDA = yes -a \
"$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes
then
jpeg_to_dxt=yes
JPEG_TO_DXT_INC=" $CUDA_INC"
JPEG_TO_DXT_LIB=" $CUDA_LIB -lgpujpeg"
JPEG_TO_DXT_OBJ="src/video_decompress/jpeg_to_dxt.o cuda_dxt/cuda_dxt.cu.o"
JPEG_TO_DXT_OBJ="src/video_decompress/jpeg_to_dxt.o cuda_dxt/cuda_dxt.$CU_SUFFIX"
AC_SUBST(JPEG_TO_DXT_DECOMPRESS_LIB_TARGET, "lib/ultragrid/vdecompress_jpeg_to_dxt.so.$video_decompress_abi_version")
LIB_TARGETS="$LIB_TARGETS $JPEG_TO_DXT_DECOMPRESS_LIB_TARGET"
LIB_OBJS="$LIB_OBJS src/video_decompress/jpeg_to_dxt.o"
AC_DEFINE([HAVE_JPEG_TO_DXT], [1], [Build with JPEG to DXT transcode support])
if test -z "$included_shared_cuda_dxt_cu"; then
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o"
included_shared_cuda_dxt_cu=yes
fi
ADD_CUDA_DXT_OBJ
DEFINE_CUDA
CUDA_MESSAGE
fi
if test $jpeg_to_dxt = no -a $jpeg_to_dxt_req = yes
@@ -2083,7 +2074,6 @@ then
SAGE_LIB=-"L${SAGE_LIB} -lsail -lquanta"
SAGE_OBJ="src/video_display/sage.o"
AC_DEFINE([HAVE_SAGE], [1], [Build with SAGE support])
LINKER=$CXX
AC_SUBST(SAGE_LIB_TARGET, "lib/ultragrid/display_sage.so.$video_display_abi_version")
CXXFLAGS="$CXXFLAGS -DQUANTA_USE_PTHREADS -DQUANTA_THREAD_SAFE -DGLSL_YUV"
LIB_TARGETS="$LIB_TARGETS $SAGE_LIB_TARGET"
@@ -2563,7 +2553,7 @@ fi
if test "$build_libraries" != yes
then
LIBS="$LIBS $LIB_MODULES"
OBJS="$OBJS $LIB_OBJS $GL_COMMON_OBJ $X_OBJ"
OBJS="$OBJS $LIB_OBJS $GL_COMMON_OBJ $X_OBJ $CUDA_COMMON_OBJ"
HEADERS="$HEADERS $LIB_HEADERS"
LIB_OBJS=
LIB_TARGETS=
@@ -2663,6 +2653,7 @@ RESULT=\
Realtime DXT (OpenGL) ....... $rtdxt
JPEG ........................ $jpeg
JPEG to DXT ................. $jpeg_to_dxt
CUDA DXT .................... $cuda_dxt
UYVY dummy compression ...... $uyvy
Libavcodec .................. $libavcodec (audio: $libavcodec_audio)

View File

@@ -4,6 +4,7 @@
/// @brief CUDA implementation of DXT compression
///
#include <cuda_runtime_api.h>
#include <stdio.h>
#include "cuda_dxt.h"
@@ -721,9 +722,9 @@ __global__ static void yuv422_to_yuv444_kernel(const void * src, void * out, int
out_pix[1].w = pix34.x;
out_pix[2].x = pix34.z;
out_pix[2].z = pix34.w;
out_pix[2].w = pix34.x;
out_pix[2].x = pix34.z;
out_pix[2].y = pix34.w;
out_pix[2].z = pix34.x;
out_pix[2].w = pix34.z;
this_out[0] = out_pix[0];
this_out[1] = out_pix[1];
@@ -758,13 +759,14 @@ static int dxt_launch(const void * src, void * out, int sx, int sy, cudaStream_t
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
}
int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str) {
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
int pix_count, cuda_wrapper_stream_t str) {
// grid and threadblock sizes
const dim3 tsiz(64, 1);
int thread_count = pix_count / 4; // we process block of 4 pixels
const dim3 gsiz((thread_count + tsiz.x - 1) / tsiz.x, 1);
yuv422_to_yuv444_kernel<<<gsiz, tsiz, 0, str>>>(src, out, pix_count);
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
yuv422_to_yuv444_kernel<<<gsiz, tsiz, 0, (cudaStream_t) str>>>(src, out, pix_count);
return cudaSuccess != cudaStreamSynchronize((cudaStream_t) str) ? -3 : 0;
}
/// CUDA DXT1 compression (only RGB without alpha).
@@ -777,8 +779,9 @@ int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStrea
/// @param size_y Height of the input image (must be divisible by 4).
/// @param stream CUDA stream to run in, or 0 for default stream.
/// @return 0 if OK, nonzero if failed.
int cuda_rgb_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
return dxt_launch<false, 1>(src, out, size_x, size_y, stream);
CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<false, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
}
@@ -793,8 +796,9 @@ int cuda_rgb_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaS
/// @param size_y Height of the input image (must be divisible by 4).
/// @param stream CUDA stream to run in, or 0 for default stream.
/// @return 0 if OK, nonzero if failed.
int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
return dxt_launch<true, 1>(src, out, size_x, size_y, stream);
CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<true, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
}
@@ -809,11 +813,13 @@ int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaS
/// (Input is read bottom up if negative)
/// @param stream CUDA stream to run in, or 0 for default stream.
/// @return 0 if OK, nonzero if failed.
int cuda_rgb_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
return dxt_launch<false, 6>(src, out, size_x, size_y, stream);
CUDA_DLL_API int cuda_rgb_to_dxt6(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<false, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
}
int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
return dxt_launch<true, 6>(src, out, size_x, size_y, stream);
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<true, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
}

View File

@@ -8,13 +8,12 @@
#ifndef CUDA_DXT_H
#define CUDA_DXT_H
#include "cuda_wrapper.h"
#ifdef __cplusplus
extern "C" {
#endif
#include <cuda_runtime_api.h>
/**
* CUDA DXT1 compression (only RGB without alpha).
* @param src Pointer to top-left source pixel in device-memory buffer.
@@ -28,13 +27,13 @@ extern "C" {
* @param stream CUDA stream to run in, or 0 for default stream.
* @return 0 if OK, nonzero if failed.
*/
int cuda_rgb_to_dxt1
CUDA_DLL_API int cuda_rgb_to_dxt1
(
const void * src,
void * out,
int size_x,
int size_y,
cudaStream_t stream
cuda_wrapper_stream_t stream
);
@@ -52,13 +51,13 @@ int cuda_rgb_to_dxt1
* @param stream CUDA stream to run in, or 0 for default stream.
* @return 0 if OK, nonzero if failed.
*/
int cuda_yuv_to_dxt1
CUDA_DLL_API int cuda_yuv_to_dxt1
(
const void * src,
void * out,
int size_x,
int size_y,
cudaStream_t stream
cuda_wrapper_stream_t stream
);
@@ -75,17 +74,19 @@ int cuda_yuv_to_dxt1
* @param stream CUDA stream to run in, or 0 for default stream.
* @return 0 if OK, nonzero if failed.
*/
int cuda_rgb_to_dxt6
CUDA_DLL_API int cuda_rgb_to_dxt6
(
const void * src,
void * out,
int size_x,
int size_y,
cudaStream_t stream
cuda_wrapper_stream_t stream
);
int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream);
int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str);
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream);
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
int pix_count, cuda_wrapper_stream_t str);
#ifdef __cplusplus
} /* end of extern "C" */

9
gpujpeg/.gitignore vendored
View File

@@ -1,4 +1,3 @@
gpujpeg
.libs
src/*.o
src/*.lo
@@ -28,3 +27,11 @@ src/.dirstamp
stamp-h1
libgpujpeg.la
# VS
*.pdb
*.sdf
*.suo
*.user
Debug
Release

View File

@@ -27,21 +27,21 @@ pkgconfig_DATA = libgpujpeg.pc
library_include_HEADERS = libgpujpeg/*.h
nodist_gpujpeg_libinclude_HEADERS = config.h
gpujpeg_SOURCES = src/main.c
gpujpeg_SOURCES = src/main.c
gpujpeg_CFLAGS = -std=c99 @COMMON_CFLAGS@
gpujpeg_LDADD = libgpujpeg.la
gpujpeg_LDFLAGS = @GPUJPEG_LDFLAGS@
# gpu jpeg library sources
libgpujpeg_la_SOURCES = src/gpujpeg_common.c \
src/gpujpeg_dct_cpu.c \
src/gpujpeg_decoder.c \
src/gpujpeg_encoder.c \
src/gpujpeg_huffman_cpu_decoder.c \
src/gpujpeg_huffman_cpu_encoder.c \
src/gpujpeg_reader.c \
src/gpujpeg_table.c \
src/gpujpeg_writer.c
libgpujpeg_la_SOURCES = src/gpujpeg_common.cpp \
src/gpujpeg_dct_cpu.cpp \
src/gpujpeg_decoder.cpp \
src/gpujpeg_encoder.cpp \
src/gpujpeg_huffman_cpu_decoder.cpp \
src/gpujpeg_huffman_cpu_encoder.cpp \
src/gpujpeg_reader.cpp \
src/gpujpeg_table.cpp \
src/gpujpeg_writer.cpp
libgpujpeg_la_DEPENDENCIES = @LIBGPUJPEG_CUDA_OBJS@

View File

@@ -10,13 +10,6 @@ else
LIBTOOLIZE=libtoolize
fi
if [ ! -x ../ltmain.sh ]
then
cd ..
$LIBTOOLIZE --copy
cd -
fi
autoheader && \
$LIBTOOLIZE --copy && \
( [ -d m4 ] || mkdir m4 ) && \

View File

@@ -2,6 +2,8 @@ AC_PREREQ([2.65])
AC_INIT([libgpujpeg], [0.0.1], [martin.srom@mail.muni.cz], [libgpujpeg], [https://sourceforge.net/p/gpujpeg/])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([src/main.c])
AC_CONFIG_AUX_DIR([.])
AM_MAINTAINER_MODE
AM_INIT_AUTOMAKE
AC_CONFIG_HEADERS([config.h])

20
gpujpeg/gpujpeg.sln Normal file
View File

@@ -0,0 +1,20 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Express 2012 for Windows Desktop
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpujpeg", "gpujpeg.vcxproj", "{B9D06885-F4F3-4B01-8C43-E131210B9F27}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.ActiveCfg = Debug|Win32
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.Build.0 = Debug|Win32
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.ActiveCfg = Release|Win32
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

132
gpujpeg/gpujpeg.vcxproj Normal file
View File

@@ -0,0 +1,132 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{B9D06885-F4F3-4B01-8C43-E131210B9F27}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>gpujpeg</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v110</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v110</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CudaCompile>
<CodeGeneration>compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;%(CodeGeneration)</CodeGeneration>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CudaCompile>
<CodeGeneration>compute_20,sm_20;compute_30,sm_30;compute_35,sm_53;%(CodeGeneration)</CodeGeneration>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="libgpujpeg\gpujpeg.h" />
<ClInclude Include="libgpujpeg\gpujpeg_common.h" />
<ClInclude Include="libgpujpeg\gpujpeg_common_internal.h" />
<ClInclude Include="libgpujpeg\gpujpeg_decoder.h" />
<ClInclude Include="libgpujpeg\gpujpeg_decoder_internal.h" />
<ClInclude Include="libgpujpeg\gpujpeg_encoder.h" />
<ClInclude Include="libgpujpeg\gpujpeg_encoder_internal.h" />
<ClInclude Include="libgpujpeg\gpujpeg_reader.h" />
<ClInclude Include="libgpujpeg\gpujpeg_table.h" />
<ClInclude Include="libgpujpeg\gpujpeg_type.h" />
<ClInclude Include="libgpujpeg\gpujpeg_util.h" />
<ClInclude Include="libgpujpeg\gpujpeg_writer.h" />
<ClInclude Include="src\gpujpeg_colorspace.h" />
<ClInclude Include="src\gpujpeg_dct_cpu.h" />
<ClInclude Include="src\gpujpeg_dct_gpu.h" />
<ClInclude Include="src\gpujpeg_huffman_cpu_decoder.h" />
<ClInclude Include="src\gpujpeg_huffman_cpu_encoder.h" />
<ClInclude Include="src\gpujpeg_huffman_gpu_decoder.h" />
<ClInclude Include="src\gpujpeg_huffman_gpu_encoder.h" />
<ClInclude Include="src\gpujpeg_preprocessor.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\gpujpeg_common.cpp" />
<ClCompile Include="src\gpujpeg_dct_cpu.cpp" />
<ClCompile Include="src\gpujpeg_decoder.cpp" />
<ClCompile Include="src\gpujpeg_encoder.cpp" />
<ClCompile Include="src\gpujpeg_huffman_cpu_decoder.cpp" />
<ClCompile Include="src\gpujpeg_huffman_cpu_encoder.cpp" />
<ClCompile Include="src\gpujpeg_reader.cpp" />
<ClCompile Include="src\gpujpeg_table.cpp" />
<ClCompile Include="src\gpujpeg_writer.cpp" />
</ItemGroup>
<ItemGroup>
<CudaCompile Include="src\gpujpeg_dct_gpu.cu" />
<CudaCompile Include="src\gpujpeg_huffman_gpu_decoder.cu" />
<CudaCompile Include="src\gpujpeg_huffman_gpu_encoder.cu" />
<CudaCompile Include="src\gpujpeg_preprocessor.cu" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
</ImportGroup>
</Project>

View File

@@ -0,0 +1,122 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="libgpujpeg\gpujpeg.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_common.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_common_internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_decoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_decoder_internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_encoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_encoder_internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_reader.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_table.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_type.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_util.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="libgpujpeg\gpujpeg_writer.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_colorspace.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_dct_cpu.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_dct_gpu.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_huffman_cpu_decoder.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_huffman_cpu_encoder.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_huffman_gpu_decoder.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_huffman_gpu_encoder.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src\gpujpeg_preprocessor.h">
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\gpujpeg_common.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_dct_cpu.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_decoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_encoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_huffman_cpu_decoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_huffman_cpu_encoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_reader.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_table.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\gpujpeg_writer.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<CudaCompile Include="src\gpujpeg_dct_gpu.cu">
<Filter>Source Files</Filter>
</CudaCompile>
<CudaCompile Include="src\gpujpeg_huffman_gpu_decoder.cu">
<Filter>Source Files</Filter>
</CudaCompile>
<CudaCompile Include="src\gpujpeg_huffman_gpu_encoder.cu">
<Filter>Source Files</Filter>
</CudaCompile>
<CudaCompile Include="src\gpujpeg_preprocessor.cu">
<Filter>Source Files</Filter>
</CudaCompile>
</ItemGroup>
</Project>

View File

@@ -37,6 +37,16 @@
extern "C" {
#endif
#if defined _MSC_VER || defined __MINGW32__
#ifdef GPUJPEG_EXPORTS
#define GPUJPEG_API __declspec(dllexport)
#else
#define GPUJPEG_API __declspec(dllimport)
#endif
#else // other platforms
#define GPUJPEG_API
#endif
/** Marker used as segment info */
#define GPUJPEG_MARKER_SEGMENT_INFO GPUJPEG_MARKER_APP13
@@ -80,7 +90,7 @@ struct gpujpeg_devices_info
*
* @return devices info
*/
struct gpujpeg_devices_info
GPUJPEG_API struct gpujpeg_devices_info
gpujpeg_get_devices_info();
/**
@@ -88,7 +98,7 @@ gpujpeg_get_devices_info();
*
* @return void
*/
void
GPUJPEG_API void
gpujpeg_print_devices_info();
/**
@@ -99,7 +109,7 @@ gpujpeg_print_devices_info();
* enable OpenGL interoperability (GPUJPEG_OPENGL_INTEROPERABILITY)
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_init_device(int device_id, int flags);
/**
@@ -144,7 +154,7 @@ struct gpujpeg_parameters
* @param param Parameters for JPEG coder
* @return void
*/
void
GPUJPEG_API void
gpujpeg_set_default_parameters(struct gpujpeg_parameters* param);
/**
@@ -153,7 +163,7 @@ gpujpeg_set_default_parameters(struct gpujpeg_parameters* param);
* @param param Parameters for coder
* @return void
*/
void
GPUJPEG_API void
gpujpeg_parameters_chroma_subsampling(struct gpujpeg_parameters* param);
/**
@@ -181,7 +191,7 @@ struct gpujpeg_image_parameters {
* @param param Parameters for image
* @return void
*/
void
GPUJPEG_API void
gpujpeg_image_set_default_parameters(struct gpujpeg_image_parameters* param);
/** Image file formats */
@@ -206,9 +216,16 @@ enum gpujpeg_image_file_format {
* @param filename Filename of image file
* @return image_file_format or GPUJPEG_IMAGE_FILE_UNKNOWN if type cannot be determined
*/
enum gpujpeg_image_file_format
GPUJPEG_API enum gpujpeg_image_file_format
gpujpeg_image_get_file_format(const char* filename);
/**
* Sets cuda device.
*
* @param index Index of the CUDA device to be activated.
*/
GPUJPEG_API void gpujpeg_set_device(int index);
/**
* JPEG segment structure. Segment is data in scan generated by huffman coder
* for N consecutive MCUs, where N is restart interval (e.g. data for MCUs between
@@ -428,7 +445,7 @@ gpujpeg_coder_deinit(struct gpujpeg_coder* coder);
* @param param Image parameters
* @return calculate size
*/
int
GPUJPEG_API int
gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param);
/**
@@ -439,7 +456,7 @@ gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param);
* @param image_size Image data buffer size (can be specified for verification or 0 for retrieval)
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_image_load_from_file(const char* filename, uint8_t** image, int* image_size);
/**
@@ -450,7 +467,7 @@ gpujpeg_image_load_from_file(const char* filename, uint8_t** image, int* image_s
* @param image_size Image data buffer size
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_image_save_to_file(const char* filename, uint8_t* image, int image_size);
/**
@@ -459,7 +476,7 @@ gpujpeg_image_save_to_file(const char* filename, uint8_t* image, int image_size)
* @param image Image data buffer
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_image_destroy(uint8_t* image);
/**
@@ -470,7 +487,7 @@ gpujpeg_image_destroy(uint8_t* image);
* @param height
* @param sampling_factor
*/
void
GPUJPEG_API void
gpujpeg_image_range_info(const char* filename, int width, int height, enum gpujpeg_sampling_factor sampling_factor);
/**
@@ -481,7 +498,7 @@ gpujpeg_image_range_info(const char* filename, int width, int height, enum gpujp
* @param param_image_from
* @param param_image_to
*/
void
GPUJPEG_API void
gpujpeg_image_convert(const char* input, const char* output, struct gpujpeg_image_parameters param_image_from,
struct gpujpeg_image_parameters param_image_to);
@@ -490,7 +507,7 @@ gpujpeg_image_convert(const char* input, const char* output, struct gpujpeg_imag
*
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_opengl_init();
/**
@@ -501,7 +518,7 @@ gpujpeg_opengl_init();
* @param data
* @return nonzero texture id if succeeds, otherwise 0
*/
int
GPUJPEG_API int
gpujpeg_opengl_texture_create(int width, int height, uint8_t* data);
/**
@@ -511,7 +528,7 @@ gpujpeg_opengl_texture_create(int width, int height, uint8_t* data);
* @param data
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_opengl_texture_set_data(int texture_id, uint8_t* data);
/**
@@ -522,7 +539,7 @@ gpujpeg_opengl_texture_set_data(int texture_id, uint8_t* data);
* @param data_size
* @return 0 data if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_opengl_texture_get_data(int texture_id, uint8_t* data, int* data_size);
/**
@@ -530,7 +547,7 @@ gpujpeg_opengl_texture_get_data(int texture_id, uint8_t* data, int* data_size);
*
* @param texture_id
*/
void
GPUJPEG_API void
gpujpeg_opengl_texture_destroy(int texture_id);
/**
@@ -592,7 +609,7 @@ struct gpujpeg_opengl_texture
* @param texture_id
* @return allocated registred texture structure
*/
struct gpujpeg_opengl_texture*
GPUJPEG_API struct gpujpeg_opengl_texture*
gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type texture_type);
/**
@@ -601,7 +618,7 @@ gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type
*
* @param texture
*/
void
GPUJPEG_API void
gpujpeg_opengl_texture_unregister(struct gpujpeg_opengl_texture* texture);
/**
@@ -627,81 +644,6 @@ gpujpeg_opengl_texture_map(struct gpujpeg_opengl_texture* texture, int* data_siz
void
gpujpeg_opengl_texture_unmap(struct gpujpeg_opengl_texture* texture);
/**
* Declare timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_DECLARE(name) \
cudaEvent_t name ## _start__; \
cudaEvent_t name ## _stop__; \
float name ## _elapsedTime__; \
/**
* Create timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_CREATE(name) \
cudaEventCreate(&name ## _start__); \
cudaEventCreate(&name ## _stop__); \
/**
* Start timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_START(name) \
cudaEventRecord(name ## _start__, 0) \
/**
* Stop timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_STOP(name) \
cudaEventRecord(name ## _stop__, 0); \
cudaEventSynchronize(name ## _stop__); \
cudaEventElapsedTime(&name ## _elapsedTime__, name ## _start__, name ## _stop__) \
/**
* Get duration for timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_DURATION(name) name ## _elapsedTime__
/**
* Stop timer and print result
*
* @param name
* @param text
*/
#define GPUJPEG_CUSTOM_TIMER_STOP_PRINT(name, text) \
GPUJPEG_CUSTOM_TIMER_STOP(name); \
printf("%s %f ms\n", text, name ## _elapsedTime__) \
/**
* Destroy timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_DESTROY(name) \
cudaEventDestroy(name ## _start__); \
cudaEventDestroy(name ## _stop__); \
/**
* Default timer implementation
*/
#define GPUJPEG_TIMER_INIT() \
GPUJPEG_CUSTOM_TIMER_DECLARE(def) \
GPUJPEG_CUSTOM_TIMER_CREATE(def)
#define GPUJPEG_TIMER_START() GPUJPEG_CUSTOM_TIMER_START(def)
#define GPUJPEG_TIMER_STOP() GPUJPEG_CUSTOM_TIMER_STOP(def)
#define GPUJPEG_TIMER_DURATION() GPUJPEG_CUSTOM_TIMER_DURATION(def)
#define GPUJPEG_TIMER_STOP_PRINT(text) GPUJPEG_CUSTOM_TIMER_STOP_PRINT(def, text)
#define GPUJPEG_TIMER_DEINIT() GPUJPEG_CUSTOM_TIMER_DESTROY(def)
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,110 @@
/**
* Copyright (c) 2011, CESNET z.s.p.o
* Copyright (c) 2011, Silicon Genome, LLC.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GPUJPEG_COMMON_INTERNAL_H
#define GPUJPEG_COMMON_INTERNAL_H
#include "cuda_runtime.h"
/**
* Declare timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_DECLARE(name) \
cudaEvent_t name ## _start__; \
cudaEvent_t name ## _stop__; \
float name ## _elapsedTime__; \
/**
* Create timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_CREATE(name) \
cudaEventCreate(&name ## _start__); \
cudaEventCreate(&name ## _stop__); \
/**
* Start timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_START(name) \
cudaEventRecord(name ## _start__, 0) \
/**
* Stop timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_STOP(name) \
cudaEventRecord(name ## _stop__, 0); \
cudaEventSynchronize(name ## _stop__); \
cudaEventElapsedTime(&name ## _elapsedTime__, name ## _start__, name ## _stop__) \
/**
* Get duration for timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_DURATION(name) name ## _elapsedTime__
/**
* Stop timer and print result
*
* @param name
* @param text
*/
#define GPUJPEG_CUSTOM_TIMER_STOP_PRINT(name, text) \
GPUJPEG_CUSTOM_TIMER_STOP(name); \
printf("%s %f ms\n", text, name ## _elapsedTime__) \
/**
* Destroy timer
*
* @param name
*/
#define GPUJPEG_CUSTOM_TIMER_DESTROY(name) \
cudaEventDestroy(name ## _start__); \
cudaEventDestroy(name ## _stop__); \
/**
* Default timer implementation
*/
#define GPUJPEG_TIMER_INIT() \
GPUJPEG_CUSTOM_TIMER_DECLARE(def) \
GPUJPEG_CUSTOM_TIMER_CREATE(def)
#define GPUJPEG_TIMER_START() GPUJPEG_CUSTOM_TIMER_START(def)
#define GPUJPEG_TIMER_STOP() GPUJPEG_CUSTOM_TIMER_STOP(def)
#define GPUJPEG_TIMER_DURATION() GPUJPEG_CUSTOM_TIMER_DURATION(def)
#define GPUJPEG_TIMER_STOP_PRINT(text) GPUJPEG_CUSTOM_TIMER_STOP_PRINT(def, text)
#define GPUJPEG_TIMER_DEINIT() GPUJPEG_CUSTOM_TIMER_DESTROY(def)
#endif // GPUJPEG_COMMON_INTERNAL_H

View File

@@ -30,14 +30,26 @@
#ifndef GPUJPEG_DECODER_H
#define GPUJPEG_DECODER_H
#include <stdint.h>
#include <libgpujpeg/gpujpeg_common.h>
#include <libgpujpeg/gpujpeg_table.h>
#include <libgpujpeg/gpujpeg_reader.h>
#include <libgpujpeg/gpujpeg_type.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined _MSC_VER || defined __MINGW32__
#ifdef GPUJPEG_EXPORTS
#define GPUJPEG_API __declspec(dllexport)
#else
#define GPUJPEG_API __declspec(dllimport)
#endif
#else // other platforms
#define GPUJPEG_API
#endif
struct gpujpeg_decoder;
/**
* Decoder output type
*/
@@ -76,7 +88,7 @@ struct gpujpeg_decoder_output
* @param output Decoder output structure
* @return void
*/
void
GPUJPEG_API void
gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output);
/**
@@ -86,7 +98,7 @@ gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output);
* @param custom_buffer Custom buffer
* @return void
*/
void
GPUJPEG_API void
gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t* custom_buffer);
/**
@@ -95,7 +107,7 @@ gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t
* @param output Decoder output structure
* @return void
*/
void
GPUJPEG_API void
gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct gpujpeg_opengl_texture* texture);
/**
@@ -103,39 +115,9 @@ gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct
*
* @param output Decoder output structure
*/
void
GPUJPEG_API void
gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output);
/**
* JPEG decoder structure
*/
struct gpujpeg_decoder
{
// JPEG coder structure
struct gpujpeg_coder coder;
// JPEG reader structure
struct gpujpeg_reader* reader;
// Quantization tables
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
// Huffman coder tables
struct gpujpeg_table_huffman_decoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
// Huffman coder tables in device memory
struct gpujpeg_table_huffman_decoder* d_table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
// Current segment count for decoded image
int segment_count;
// Current data compressed size for decoded image
int data_compressed_size;
// Timers
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
};
/**
* Create JPEG decoder
*
@@ -143,7 +125,7 @@ struct gpujpeg_decoder
* @param param_image Parameters for image data
* @return decoder structure if succeeds, otherwise NULL
*/
struct gpujpeg_decoder*
GPUJPEG_API struct gpujpeg_decoder*
gpujpeg_decoder_create();
/**
@@ -154,7 +136,7 @@ gpujpeg_decoder_create();
* @param param_image Parameters for image data
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image);
/**
@@ -167,7 +149,7 @@ gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters*
* @param image_decompressed_size Pointer to variable where decompressed image size will be placed
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output);
/**
@@ -176,9 +158,21 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int imag
* @param decoder Decoder structure
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder);
/**
* Sets output format
*
* @param decoder Decoder structure
* @param color_space Requested output color space
* @param sampling_factor Requestd color sampling factor
*/
GPUJPEG_API void
gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder,
enum gpujpeg_color_space color_space,
enum gpujpeg_sampling_factor sampling_factor);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,69 @@
/**
* Copyright (c) 2011, CESNET z.s.p.o
* Copyright (c) 2011, Silicon Genome, LLC.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GPUJPEG_DECODER_INTERNAL_H
#define GPUJPEG_DECODER_INTERNAL_H
#include <libgpujpeg/gpujpeg_common.h>
#include <libgpujpeg/gpujpeg_common_internal.h>
#include <libgpujpeg/gpujpeg_table.h>
#include <libgpujpeg/gpujpeg_reader.h>
/**
* JPEG decoder structure
*/
struct gpujpeg_decoder
{
// JPEG coder structure
struct gpujpeg_coder coder;
// JPEG reader structure
struct gpujpeg_reader* reader;
// Quantization tables
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
// Huffman coder tables
struct gpujpeg_table_huffman_decoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
// Huffman coder tables in device memory
struct gpujpeg_table_huffman_decoder* d_table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
// Current segment count for decoded image
int segment_count;
// Current data compressed size for decoded image
int data_compressed_size;
// Timers
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
};
#endif // GPUJPEG_DECODER_INTERNAL_H

View File

@@ -31,13 +31,24 @@
#define GPUJPEG_ENCODER_H
#include <libgpujpeg/gpujpeg_common.h>
#include <libgpujpeg/gpujpeg_table.h>
#include <libgpujpeg/gpujpeg_writer.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined _MSC_VER || defined __MINGW32__
#ifdef GPUJPEG_EXPORTS
#define GPUJPEG_API __declspec(dllexport)
#else
#define GPUJPEG_API __declspec(dllimport)
#endif
#else // other platforms
#define GPUJPEG_API
#endif
struct gpujpeg_encoder;
/**
* Encoder input type
*/
@@ -70,7 +81,7 @@ struct gpujpeg_encoder_input
* @param image Input image data
* @return void
*/
void
GPUJPEG_API void
gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* image);
/**
@@ -80,31 +91,9 @@ gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* im
* @param texture_id OpenGL texture id
* @return void
*/
void
GPUJPEG_API void
gpujpeg_encoder_input_set_texture(struct gpujpeg_encoder_input* input, struct gpujpeg_opengl_texture* texture);
/**
* JPEG encoder structure
*/
struct gpujpeg_encoder
{
// JPEG coder structure
struct gpujpeg_coder coder;
// JPEG writer structure
struct gpujpeg_writer* writer;
// Quantization tables
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
// Huffman coder tables
struct gpujpeg_table_huffman_encoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
// Timers
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
};
/**
* Create JPEG encoder
*
@@ -112,7 +101,7 @@ struct gpujpeg_encoder
* @param param_image Parameters for image data
* @return encoder structure if succeeds, otherwise NULL
*/
struct gpujpeg_encoder*
GPUJPEG_API struct gpujpeg_encoder*
gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image);
/**
@@ -124,7 +113,7 @@ gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_pa
* @param image_compressed_size Pointer to variable where compressed image size will be placed
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_input* input, uint8_t** image_compressed, int* image_compressed_size);
/**
@@ -133,7 +122,7 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_i
* @param encoder Encoder structure
* @return 0 if succeeds, otherwise nonzero
*/
int
GPUJPEG_API int
gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder);
#ifdef __cplusplus

View File

@@ -0,0 +1,65 @@
/**
* Copyright (c) 2011, CESNET z.s.p.o
* Copyright (c) 2011, Silicon Genome, LLC.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GPUJPEG_ENCODER_INTERNAL_H
#define GPUJPEG_ENCODER_INTERNAL_H
#include <libgpujpeg/gpujpeg_common.h>
#include <libgpujpeg/gpujpeg_common_internal.h>
#include <libgpujpeg/gpujpeg_table.h>
#include <libgpujpeg/gpujpeg_writer.h>
#ifdef __cplusplus
extern "C" {
#endif
struct gpujpeg_encoder
{
// JPEG coder structure
struct gpujpeg_coder coder;
// JPEG writer structure
struct gpujpeg_writer* writer;
// Quantization tables
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
// Huffman coder tables
struct gpujpeg_table_huffman_encoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
// Timers
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
};
#ifdef __cplusplus
}
#endif
#endif // GPUJPEG_ENCODER_INTERNAL_H

View File

@@ -31,7 +31,6 @@
#define GPUJPEG_TYPE_H
#include <stdint.h>
#include <cuda_runtime.h>
#ifdef __cplusplus
extern "C" {
@@ -71,7 +70,7 @@ enum gpujpeg_color_space {
*
* @param color_space
*/
static inline __device__ __host__ const char*
static inline const char*
gpujpeg_color_space_get_name(enum gpujpeg_color_space color_space)
{
switch ( color_space ) {
@@ -199,6 +198,9 @@ enum gpujpeg_marker_code {
GPUJPEG_MARKER_ERROR = 0x100
};
static const char*
gpujpeg_marker_name(enum gpujpeg_marker_code code) __attribute__((unused));
/**
* Get marker name from code
*

View File

@@ -34,9 +34,8 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <assert.h>
#include <cuda_runtime.h>
#include "cuda_runtime.h"
#ifdef __cplusplus
extern "C" {

View File

@@ -31,6 +31,8 @@
#include "config.h"
#endif /* HAVE_CONFIG_H */
#include <algorithm>
#include <ctype.h>
#include <libgpujpeg/gpujpeg_common.h>
#include <libgpujpeg/gpujpeg_util.h>
#include "gpujpeg_preprocessor.h"
@@ -39,8 +41,8 @@
#define GL_GLEXT_PROTOTYPES
#include <GL/gl.h>
#include <GL/glx.h>
#include <cuda_gl_interop.h>
#endif
#include <cuda_gl_interop.h>
// rounds number of segment bytes up to next multiple of 128
#define SEGMENT_ALIGN(b) (((b) + 127) & ~127)
@@ -142,10 +144,12 @@ gpujpeg_init_device(int device_id, int flags)
return -1;
}
#ifdef GPUJPEG_USE_OPENGL
if ( flags & GPUJPEG_OPENGL_INTEROPERABILITY ) {
cudaGLSetGLDevice(device_id);
gpujpeg_cuda_check_error("Enabling OpenGL interoperability");
}
#endif
if ( flags & GPUJPEG_VERBOSE ) {
int cuda_driver_version = 0;
@@ -234,18 +238,26 @@ gpujpeg_image_get_file_format(const char* filename)
GPUJPEG_IMAGE_FILE_JPEG
};
char * ext = strrchr(filename, '.');
const char * ext = strrchr(filename, '.');
if ( ext == NULL )
return -1;
return GPUJPEG_IMAGE_FILE_UNKNOWN;
ext++;
char ext_lc[3];
strncpy(ext_lc, ext, 3);
std::transform(ext_lc, ext_lc + sizeof(ext_lc), ext_lc, ::tolower);
for ( int i = 0; i < sizeof(format) / sizeof(*format); i++ ) {
if ( strncasecmp(ext, extension[i], 3) == 0 ) {
if ( strncmp(ext_lc, extension[i], 3) == 0 ) {
return format[i];
}
}
return GPUJPEG_IMAGE_FILE_UNKNOWN;
}
void gpujpeg_set_device(int index)
{
cudaSetDevice(index);
}
/** Documented at declaration */
void
gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data)

View File

@@ -226,7 +226,7 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder)
int index = y * width + x;
gpujpeg_idct_cpu_perform(
&component->data_quantized[index * 64],
decoder->table_quantization[type].table
(int16_t *) decoder->table_quantization[type].table
);
}
}

View File

@@ -30,8 +30,8 @@
#ifndef GPUJPEG_DCT_CPU_H
#define GPUJPEG_DCT_CPU_H
#include <libgpujpeg/gpujpeg_encoder.h>
#include <libgpujpeg/gpujpeg_decoder.h>
#include <libgpujpeg/gpujpeg_encoder_internal.h>
#include <libgpujpeg/gpujpeg_decoder_internal.h>
/**
* Peform inverse DCT on CPU

View File

@@ -30,8 +30,8 @@
#ifndef GPUJPEG_DCT_GPU_H
#define GPUJPEG_DCT_GPU_H
#include <libgpujpeg/gpujpeg_encoder.h>
#include <libgpujpeg/gpujpeg_decoder.h>
#include <libgpujpeg/gpujpeg_encoder_internal.h>
#include <libgpujpeg/gpujpeg_decoder_internal.h>
#ifdef __cplusplus
extern "C" {

View File

@@ -28,6 +28,7 @@
*/
#include <libgpujpeg/gpujpeg_decoder.h>
#include <libgpujpeg/gpujpeg_decoder_internal.h>
#include "gpujpeg_preprocessor.h"
#include "gpujpeg_dct_cpu.h"
#include "gpujpeg_dct_gpu.h"
@@ -77,7 +78,7 @@ gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output)
struct gpujpeg_decoder*
gpujpeg_decoder_create()
{
struct gpujpeg_decoder* decoder = malloc(sizeof(struct gpujpeg_decoder));
struct gpujpeg_decoder* decoder = (struct gpujpeg_decoder*) malloc(sizeof(struct gpujpeg_decoder));
if ( decoder == NULL )
return NULL;
@@ -336,6 +337,15 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int imag
return 0;
}
void
gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder,
enum gpujpeg_color_space color_space,
enum gpujpeg_sampling_factor sampling_factor)
{
decoder->coder.param_image.color_space = color_space;
decoder->coder.param_image.sampling_factor = sampling_factor;
}
/** Documented at declaration */
int
gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder)

View File

@@ -63,7 +63,7 @@ gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_pa
assert(param->restart_interval >= 0);
assert(param->interleaved == 0 || param->interleaved == 1);
struct gpujpeg_encoder* encoder = malloc(sizeof(struct gpujpeg_encoder));
struct gpujpeg_encoder* encoder = (struct gpujpeg_encoder*) malloc(sizeof(struct gpujpeg_encoder));
if ( encoder == NULL )
return NULL;

View File

@@ -30,7 +30,7 @@
#ifndef GPUJPEG_HUFFMAN_CPU_DECODER_H
#define GPUJPEG_HUFFMAN_CPU_DECODER_H
#include <libgpujpeg/gpujpeg_decoder.h>
#include <libgpujpeg/gpujpeg_decoder_internal.h>
/**
* Perform huffman decoding

View File

@@ -30,7 +30,7 @@
#ifndef GPUJPEG_HUFFMAN_CPU_ENCODER_H
#define GPUJPEG_HUFFMAN_CPU_ENCODER_H
#include <libgpujpeg/gpujpeg_encoder.h>
#include <libgpujpeg/gpujpeg_encoder_internal.h>
/**
* Perform huffman encoding

View File

@@ -30,7 +30,7 @@
#ifndef GPUJPEG_HUFFMAN_GPU_DECODER_H
#define GPUJPEG_HUFFMAN_GPU_DECODER_H
#include <libgpujpeg/gpujpeg_decoder.h>
#include <libgpujpeg/gpujpeg_decoder_internal.h>
#ifdef __cplusplus
extern "C" {

View File

@@ -30,7 +30,7 @@
#ifndef GPUJPEG_HUFFMAN_GPU_ENCODER_H
#define GPUJPEG_HUFFMAN_GPU_ENCODER_H
#include <libgpujpeg/gpujpeg_encoder.h>
#include <libgpujpeg/gpujpeg_encoder_internal.h>
#ifdef __cplusplus
extern "C" {

View File

@@ -30,8 +30,8 @@
#ifndef GPUJPEG_PREPROCESSOR_H
#define GPUJPEG_PREPROCESSOR_H
#include <libgpujpeg/gpujpeg_encoder.h>
#include <libgpujpeg/gpujpeg_decoder.h>
#include <libgpujpeg/gpujpeg_encoder_internal.h>
#include <libgpujpeg/gpujpeg_decoder_internal.h>
#ifdef __cplusplus
extern "C" {

View File

@@ -29,13 +29,15 @@
#include <libgpujpeg/gpujpeg_reader.h>
#include <libgpujpeg/gpujpeg_decoder.h>
#include <libgpujpeg/gpujpeg_decoder_internal.h>
#include <libgpujpeg/gpujpeg_util.h>
/** Documented at declaration */
struct gpujpeg_reader*
gpujpeg_reader_create()
{
struct gpujpeg_reader* reader = malloc(sizeof(struct gpujpeg_reader));
struct gpujpeg_reader* reader = (struct gpujpeg_reader*)
malloc(sizeof(struct gpujpeg_reader));
if ( reader == NULL )
return NULL;
reader->comp_count = 0;
@@ -122,7 +124,7 @@ gpujpeg_reader_read_app0(uint8_t** image)
return -1;
}
char jfif[4];
char jfif[5];
jfif[0] = gpujpeg_reader_read_byte(*image);
jfif[1] = gpujpeg_reader_read_byte(*image);
jfif[2] = gpujpeg_reader_read_byte(*image);

View File

@@ -29,20 +29,21 @@
#include <libgpujpeg/gpujpeg_writer.h>
#include <libgpujpeg/gpujpeg_encoder.h>
#include <libgpujpeg/gpujpeg_encoder_internal.h>
#include <libgpujpeg/gpujpeg_util.h>
/** Documented at declaration */
struct gpujpeg_writer*
gpujpeg_writer_create(struct gpujpeg_encoder* encoder)
{
struct gpujpeg_writer* writer = malloc(sizeof(struct gpujpeg_writer));
struct gpujpeg_writer* writer = (struct gpujpeg_writer*) malloc(sizeof(struct gpujpeg_writer));
if ( writer == NULL )
return NULL;
// Allocate output buffer
int buffer_size = 1000;
buffer_size += encoder->coder.param_image.width * encoder->coder.param_image.height * encoder->coder.param_image.comp_count * 2;
writer->buffer = malloc(buffer_size * sizeof(uint8_t));
writer->buffer = (uint8_t *) malloc(buffer_size * sizeof(uint8_t));
if ( writer->buffer == NULL )
return NULL;
writer->buffer_current = NULL;

View File

@@ -27,6 +27,9 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <libgpujpeg/gpujpeg_common_internal.h> // TIMER
#include <libgpujpeg/gpujpeg_encoder_internal.h> // TIMER
#include <libgpujpeg/gpujpeg_decoder_internal.h> // TIMER
#include <libgpujpeg/gpujpeg.h>
#include <libgpujpeg/gpujpeg_util.h>
#include <getopt.h>

View File

@@ -114,6 +114,10 @@ static ssize_t write_all(fd_t fd, const void *buf, size_t count);
static void * control_thread(void *args);
static void send_response(fd_t fd, struct response *resp);
#ifndef HAVE_LINUX
#define MSG_NOSIGNAL 0
#endif
static ssize_t write_all(fd_t fd, const void *buf, size_t count)
{
char *p = (char *) buf;

119
src/cuda_wrapper.cu Normal file
View File

@@ -0,0 +1,119 @@
/**
* @file cuda_wrapper.h
* @author Martin Pulec <pulec@cesnet.cz>
*
* @brief This file contais wrapper around CUDA functions.
*/
/*
* Copyright (c) 2013 CESNET z.s.p.o.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of CESNET nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cuda_wrapper.h"
#include "cuda_runtime.h"
#include <stdlib.h>
typedef void *cuda_wrapper_stream_t;
static inline int map_cuda_error(cudaError_t cuda_error) {
struct error_mapping {
cudaError_t cuda_error;
int wrapper_error;
};
struct error_mapping mapping[] = {
{ cudaSuccess, CUDA_WRAPPER_SUCCESS },
};
int i;
for (i = 0; i < sizeof(mapping)/sizeof(struct error_mapping); ++i) {
if (cuda_error == mapping[i].cuda_error) {
return mapping[i].wrapper_error;
}
}
return CUDA_UNKNOWN_ERROR;
};
static inline enum cudaMemcpyKind map_cuda_memcpy_kind(int our_kind) {
struct kind_mapping {
enum cudaMemcpyKind kind;
int our_kind;
};
struct kind_mapping mapping[] = {
{ cudaMemcpyHostToDevice, CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE },
{ cudaMemcpyDeviceToHost, CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST },
};
int i;
for (i = 0; i < sizeof(mapping)/sizeof(struct kind_mapping); ++i) {
if (our_kind == mapping[i].our_kind) {
return mapping[i].kind;
}
}
abort(); // should not reach here
};
CUDA_DLL_API int cuda_wrapper_free(void *buffer)
{
return map_cuda_error(cudaFree(buffer));
}
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len)
{
return map_cuda_error(cudaMalloc(buffer, data_len));
}
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len)
{
return map_cuda_error(cudaMallocHost(buffer, data_len));
}
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
size_t count, int kind)
{
return map_cuda_error(
cudaMemcpy(dst, src, count,
map_cuda_memcpy_kind(kind)));
}
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void)
{
return cudaGetErrorString(cudaGetLastError());
}
CUDA_DLL_API int cuda_wrapper_set_device(int index)
{
return map_cuda_error(
cudaSetDevice(index));
}

82
src/cuda_wrapper.h Normal file
View File

@@ -0,0 +1,82 @@
/**
* @file cuda_wrapper.h
* @author Martin Pulec <pulec@cesnet.cz>
*/
/*
* Copyright (c) 2013 CESNET z.s.p.o.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of CESNET nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CUDA_WRAPPER_H_
#define CUDA_WRAPPER_H_
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#if defined _MSC_VER || defined __MINGW32__
#ifdef EXPORT_DLL_SYMBOLS
#define CUDA_DLL_API __declspec(dllexport)
#else
#define CUDA_DLL_API __declspec(dllimport)
#endif
#else // other platforms
#define CUDA_DLL_API
#endif
/// @{
#define CUDA_WRAPPER_SUCCESS 0
#define CUDA_UNKNOWN_ERROR 1 ///< error for which there is no mapping in wrapper
/// @}
/// @{
#define CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE 0
#define CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST 1
/// @}
typedef void *cuda_wrapper_stream_t;
CUDA_DLL_API int cuda_wrapper_free(void *buffer);
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len);
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len);
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
size_t count, int kind);
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void);
CUDA_DLL_API int cuda_wrapper_set_device(int index);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // CUDA_WRAPPER_H_

View File

@@ -1127,7 +1127,7 @@ int main(int argc, char *argv[])
echo_cancellation = true;
break;
case OPT_CUDA_DEVICE:
#ifdef HAVE_CUDA
#ifdef HAVE_JPEG
if(strcmp("help", optarg) == 0) {
struct compress_state *compression;
int ret = compress_init(&root_mod, "JPEG:list_devices", &compression);

View File

@@ -44,6 +44,7 @@
#include "video_compress/cuda_dxt.h"
#include "cuda_dxt/cuda_dxt.h"
#include "cuda_wrapper.h"
#include "host.h"
#include "module.h"
@@ -57,12 +58,14 @@ struct state_video_compress_cuda_dxt {
in_buffer = NULL;
cuda_in_buffer = NULL;
cuda_uyvy_buffer = NULL;
cuda_out_buffer = NULL;
}
struct module module_data;
struct video_desc saved_desc;
char *in_buffer; ///< for decoded data
char *cuda_uyvy_buffer; ///< same as in_buffer but in device memory
char *cuda_in_buffer; ///< same as in_buffer but in device memory
char *cuda_out_buffer; ///< same as in_buffer but in device memory
struct video_frame *out[2];
codec_t in_codec;
codec_t out_codec;
@@ -107,16 +110,20 @@ static void cleanup(struct state_video_compress_cuda_dxt *s)
s->in_buffer = NULL;
}
if (s->cuda_uyvy_buffer) {
cudaFree(s->cuda_uyvy_buffer);
cuda_wrapper_free(s->cuda_uyvy_buffer);
s->cuda_uyvy_buffer = NULL;
}
if (s->cuda_in_buffer) {
cudaFree(s->cuda_in_buffer);
cuda_wrapper_free(s->cuda_in_buffer);
s->cuda_in_buffer = NULL;
}
if (s->cuda_out_buffer) {
cuda_wrapper_free(s->cuda_out_buffer);
s->cuda_out_buffer = NULL;
}
for (int i = 0; i < 2; ++i) {
if (s->out[i] != NULL) {
cudaFree(s->out[i]->tiles[0].data);
cuda_wrapper_free(s->out[i]->tiles[0].data);
s->out[i]->tiles[0].data = NULL;
}
}
@@ -138,7 +145,7 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video
}
if (s->in_codec == UYVY) {
if (cudaSuccess != cudaMalloc((void **) &s->cuda_uyvy_buffer,
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **) &s->cuda_uyvy_buffer,
desc.width * desc.height * 2)) {
fprintf(stderr, "Could not allocate CUDA UYVY buffer.\n");
return false;
@@ -147,7 +154,7 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video
s->in_buffer = (char *) malloc(desc.width * desc.height * 3);
if (cudaSuccess != cudaMalloc((void **) &s->cuda_in_buffer,
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **) &s->cuda_in_buffer,
desc.width * desc.height * 3)) {
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
return false;
@@ -160,12 +167,18 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video
s->out[i] = vf_alloc_desc(compressed_desc);
s->out[i]->tiles[0].data_len = desc.width * desc.height / (s->out_codec == DXT1 ? 2 : 1);
if (cudaSuccess != cudaMallocHost((void **) &s->out[i]->tiles[0].data,
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc_host((void **) &s->out[i]->tiles[0].data,
s->out[i]->tiles[0].data_len)) {
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
fprintf(stderr, "Could not allocate CUDA output host buffer.\n");
return false;
}
}
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **)
&s->cuda_out_buffer,
s->out[0]->tiles[0].data_len)) {
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
return false;
}
return true;
}
@@ -176,7 +189,7 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram
struct state_video_compress_cuda_dxt *s =
(struct state_video_compress_cuda_dxt *) mod->priv_data;
cudaSetDevice(cuda_devices[0]);
cuda_wrapper_set_device(cuda_devices[0]);
if (!video_desc_eq_excl_param(video_desc_from_frame(tx),
s->saved_desc, PARAM_TILE_COUNT)) {
@@ -205,30 +218,28 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram
}
if (s->in_codec == UYVY) {
if (cudaMemcpy(s->cuda_uyvy_buffer, in_buffer, tx->tiles[tile_idx].width *
if (cuda_wrapper_memcpy(s->cuda_uyvy_buffer, in_buffer, tx->tiles[tile_idx].width *
tx->tiles[tile_idx].height * 2,
cudaMemcpyHostToDevice) != cudaSuccess) {
fprintf(stderr, "Memcpy failed: %s\n",
cudaGetErrorString(cudaGetLastError()));
CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE) != CUDA_WRAPPER_SUCCESS) {
fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string());
return NULL;
}
if (cuda_yuv422_to_yuv444(s->cuda_uyvy_buffer, s->cuda_in_buffer,
tx->tiles[tile_idx].width *
tx->tiles[tile_idx].height, 0) != 0) {
fprintf(stderr, "UYVY kernel failed: %s\n",
cudaGetErrorString(cudaGetLastError()));
tx->tiles[tile_idx].height, 0) != CUDA_WRAPPER_SUCCESS) {
fprintf(stderr, "Kernel failed: %s\n", cuda_wrapper_last_error_string());
}
} else {
if (cudaMemcpy(s->cuda_in_buffer, in_buffer, tx->tiles[tile_idx].width *
if (cuda_wrapper_memcpy(s->cuda_in_buffer, in_buffer, tx->tiles[tile_idx].width *
tx->tiles[tile_idx].height * 3,
cudaMemcpyHostToDevice) != cudaSuccess) {
fprintf(stderr, "Memcpy failed: %s\n",
cudaGetErrorString(cudaGetLastError()));
CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE) != CUDA_WRAPPER_SUCCESS) {
fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string());
return NULL;
}
}
int (*cuda_dxt_enc_func)(const void * src, void * out, int size_x, int size_y, cudaStream_t stream);
int (*cuda_dxt_enc_func)(const void * src, void * out, int size_x, int size_y,
cuda_wrapper_stream_t stream);
if (s->out_codec == DXT1) {
if (s->in_codec == RGB) {
@@ -243,11 +254,18 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram
cuda_dxt_enc_func = cuda_yuv_to_dxt6;
}
}
int ret = cuda_dxt_enc_func(s->cuda_in_buffer, s->out[buffer]->tiles[0].data,
int ret = cuda_dxt_enc_func(s->cuda_in_buffer, s->cuda_out_buffer,
s->saved_desc.width, s->saved_desc.height, 0);
if (ret != 0) {
fprintf(stderr, "Encoding failed: %s\n",
cudaGetErrorString(cudaGetLastError()));
fprintf(stderr, "Encoding failed: %s\n", cuda_wrapper_last_error_string());
return NULL;
}
if (cuda_wrapper_memcpy(s->out[buffer]->tiles[0].data,
s->cuda_out_buffer,
s->out[buffer]->tiles[0].data_len,
CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST) != CUDA_WRAPPER_SUCCESS) {
fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string());
return NULL;
}

View File

@@ -58,7 +58,6 @@
#include "module.h"
#include "video_compress/jpeg.h"
#include "libgpujpeg/gpujpeg_encoder.h"
#include "libgpujpeg/gpujpeg_common.h"
#include "video.h"
#include <pthread.h>
#include <stdlib.h>
@@ -346,7 +345,7 @@ struct video_frame * jpeg_compress(struct module *mod, struct video_frame * tx,
unsigned int x;
cudaSetDevice(cuda_devices[0]);
gpujpeg_set_device(cuda_devices[0]);
if(!s->encoder) {
int ret;

View File

@@ -57,7 +57,6 @@
#include "libgpujpeg/gpujpeg_decoder.h"
//#include "compat/platform_semaphore.h"
#include <cuda_runtime.h>
#include <pthread.h>
#include <stdlib.h>
#include "video_decompress/jpeg.h"
@@ -82,11 +81,11 @@ static int configure_with(struct state_decompress_jpeg *s, struct video_desc des
return FALSE;
}
if(s->out_codec == RGB) {
s->decoder->coder.param_image.color_space = GPUJPEG_RGB;
s->decoder->coder.param_image.sampling_factor = GPUJPEG_4_4_4;
gpujpeg_decoder_set_output_format(s->decoder, GPUJPEG_RGB,
GPUJPEG_4_4_4);
} else {
s->decoder->coder.param_image.color_space = GPUJPEG_YCBCR_BT709;
s->decoder->coder.param_image.sampling_factor = GPUJPEG_4_2_2;
gpujpeg_decoder_set_output_format(s->decoder, GPUJPEG_YCBCR_BT709,
GPUJPEG_4_2_2);
}
return TRUE;
@@ -118,7 +117,6 @@ int jpeg_decompress_reconfigure(void *state, struct video_desc desc,
int rshift, int gshift, int bshift, int pitch, codec_t out_codec)
{
struct state_decompress_jpeg *s = (struct state_decompress_jpeg *) state;
int ret;
assert(out_codec == RGB || out_codec == UYVY);
@@ -157,7 +155,7 @@ int jpeg_decompress(void *state, unsigned char *dst, unsigned char *buffer,
linesize = s->desc.width * 2;
}
cudaSetDevice(cuda_devices[0]);
gpujpeg_set_device(cuda_devices[0]);
if((s->out_codec != RGB || (s->rshift == 0 && s->gshift == 8 && s->bshift == 16)) &&
s->pitch == linesize) {

View File

@@ -158,9 +158,10 @@ static void *worker_thread(void *arg)
msg_frame *output_frame = new msg_frame(s->desc.width * s->desc.height / s->ppb);
if (cudaSuccess != cudaMemcpy((char*) output_frame->data, s->dxt_out_buff,
if (cuda_wrapper_memcpy((char*) output_frame->data, s->dxt_out_buff,
output_frame->data_len,
cudaMemcpyDeviceToHost)) {
CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST) !=
CUDA_WRAPPER_SUCCESS) {
fprintf(stderr, "[jpeg_to_dxt] unable to copy from device.");
}
s->m_out.push(output_frame);
@@ -173,7 +174,7 @@ static void *worker_thread(void *arg)
gpujpeg_decoder_destroy(s->jpeg_decoder);
}
cudaFree(s->dxt_out_buff);
cuda_wrapper_free(s->dxt_out_buff);
return NULL;
}
@@ -272,11 +273,12 @@ static int reconfigure_thread(struct thread_data *s, struct video_desc desc, int
}
if(s->dxt_out_buff != NULL) {
cudaFree(s->dxt_out_buff);
cuda_wrapper_free(s->dxt_out_buff);
s->dxt_out_buff = NULL;
}
if(cudaSuccess != cudaMallocHost((void **) &s->dxt_out_buff, desc.width * desc.height / ppb)) {
if(cuda_wrapper_malloc_host((void **) &s->dxt_out_buff, desc.width * desc.height / ppb)
!= CUDA_WRAPPER_SUCCESS) {
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
return false;
}