mirror of
https://github.com/outbackdingo/UltraGrid.git
synced 2026-03-21 14:40:22 +00:00
Windows: enable CUDA stuff
+ make VS project for for GPUJPEG
This commit is contained in:
18
Makefile.in
18
Makefile.in
@@ -153,7 +153,8 @@ modules: @LIB_TARGETS@
|
||||
|
||||
$(TARGET): $(OBJS) $(ULTRAGRID_OBJS) $(HEADERS)
|
||||
@if [ ! -d bin ]; then mkdir bin; fi
|
||||
$(LINKER) -rdynamic $(LDFLAGS) $(OBJS) $(ULTRAGRID_OBJS) $(LIBS) -o $(TARGET)
|
||||
$(LINKER) $(LDFLAGS) $(OBJS) $(ULTRAGRID_OBJS) $(LIBS) -o $(TARGET)
|
||||
if [ -n "@DLL_LIBS@" ]; then $(INSTALL) -m 644 @DLL_LIBS@ bin; fi
|
||||
|
||||
bin/import_control_keyboard: src/import_control_keyboard.o
|
||||
$(LINKER) $(LDFLAGS) $< @IMPORT_CONTROL_KEYBOARD_LIBS@ -o $@
|
||||
@@ -173,7 +174,9 @@ $(REFLECTOR_TARGET): $(OBJS) $(HEADERS) $(REFLECTOR_OBJS)
|
||||
|
||||
# Pattern rule for compiling CUDA files
|
||||
%.cu.o: %.cu
|
||||
$(NVCC) $(NVCCFLAGS) -Xcompiler -fPIC -c $< -o $@
|
||||
$(NVCC) $(NVCCFLAGS) $(INC) -c $< -o $@
|
||||
%.cu.lib: %.cu
|
||||
$(NVCC) $(NVCCFLAGS) -DEXPORT_DLL_SYMBOLS $(INC) --shared $< -o $<.dll
|
||||
|
||||
src/audio/resample.o:
|
||||
$(CC) $(CFLAGS) $(INC) -DEXPORT="" -DRANDOM_PREFIX=speex -DFLOATING_POINT -DOUTSIDE_SPEEX -I. -I speex-1.2rc1/include/speex -Iinclude -fvisibility=hidden -c speex-1.2rc1/libspeex/resample.c -fPIC -DPIC -o $@
|
||||
@@ -415,7 +418,7 @@ clean:
|
||||
-rm -rf $(BUNDLE)
|
||||
-rm -rf $(PERF) src/uv_perf.o
|
||||
-rm -rf $(REFLECTOR_TARGET) $(REFLECTOR_OBJS)
|
||||
-rm -rf @LIB_OBJS@ @LIB_TARGETS@ @LIB_HEADERS@ @X_OBJ@ @GL_COMMON_OBJ@
|
||||
-rm -rf @LIB_OBJS@ @LIB_TARGETS@ @LIB_HEADERS@ @X_OBJ@ @GL_COMMON_OBJ@ @CUDA_COMMON_OBJ@
|
||||
-rm -rf bin/import_control_keyboard
|
||||
[ ! -f gpujpeg/Makefile ] || make -C gpujpeg/ clean
|
||||
[ -z "@FASTDXT_PATH@" ] || make -C @FASTDXT_PATH@/ clean
|
||||
@@ -581,9 +584,9 @@ libavcodec: @LIBAVCODEC_DECOMPRESS_LIB_TARGET@ @LIBAVCODEC_COMPRESS_LIB_TARGET@
|
||||
mkdir -p lib/ultragrid
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_jpeg.so.@video_compress_abi_version@ $^ @JPEG_LIB@ -o $@
|
||||
|
||||
@CUDA_DXT_COMPRESS_LIB_TARGET@: @CUDA_DXT_OBJ@
|
||||
@CUDA_DXT_COMPRESS_LIB_TARGET@: @CUDA_DXT_OBJ@ @CUDA_COMMON_OBJ@
|
||||
mkdir -p lib/ultragrid
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_cuda_dxt.so.@video_compress_abi_version@ $^ @CUDA_DXT_LIB@ -o $@
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_cuda_dxt.so.@video_compress_abi_version@ $^ @CUDA_DXT_LIB@ @CUDA_COMMON_OBJ@ -o $@
|
||||
|
||||
@RTDXT_DECOMPRESS_LIB_TARGET@: @GL_COMMON_OBJ@ @X_OBJ@ @RTDXT_COMMON_OBJ@ @RTDXT_DECOMPRESS_OBJ@ @RTDXT_COMMON_HEADERS@
|
||||
mkdir -p lib/ultragrid
|
||||
@@ -593,9 +596,9 @@ libavcodec: @LIBAVCODEC_DECOMPRESS_LIB_TARGET@ @LIBAVCODEC_COMPRESS_LIB_TARGET@
|
||||
mkdir -p lib/ultragrid
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg.so.@video_decompress_abi_version@ $^ @JPEG_LIB@ -o $@
|
||||
|
||||
@JPEG_TO_DXT_DECOMPRESS_LIB_TARGET@: @JPEG_TO_DXT_OBJ@ @JPEG_DECOMPRESS_OBJ@ @JPEG_COMMON_OBJ@
|
||||
@JPEG_TO_DXT_DECOMPRESS_LIB_TARGET@: @JPEG_TO_DXT_OBJ@ @JPEG_DECOMPRESS_OBJ@ @JPEG_COMMON_OBJ@ @CUDA_COMMON_OBJ@
|
||||
mkdir -p lib/ultragrid
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg_to_dxt.so.@video_decompress_abi_version@ $^ @JPEG_TO_DXT_LIB@ -o $@
|
||||
$(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg_to_dxt.so.@video_decompress_abi_version@ $^ @JPEG_TO_DXT_LIB@ @CUDA_COMMON_OBJ@ -o $@
|
||||
|
||||
|
||||
@ALSA_PLAY_LIB_TARGET@: @ALSA_PLAY_OBJ@
|
||||
@@ -647,6 +650,7 @@ install: all
|
||||
$(INSTALL) -m 755 data/ultragrid-bugreport-collect.sh $(DESTDIR)/$(uv_datadir)
|
||||
$(INSTALL) -d -m 755 $(DESTDIR)/$(docdir)
|
||||
$(INSTALL) -m 644 $(DOCS) $(DESTDIR)/$(docdir)
|
||||
$(INSTALL) -m 644 @DLL_LIBS@ $(DESTDIR)/$(bindir)
|
||||
|
||||
uninstall:
|
||||
$(RM) $(DESTDIR)/$(bindir)/uv
|
||||
|
||||
12
autogen.sh
12
autogen.sh
@@ -3,17 +3,6 @@ set -e
|
||||
|
||||
[ -d m4 ] || mkdir m4
|
||||
|
||||
# variables
|
||||
if [ `uname -s` = "Darwin" ]; then
|
||||
LIBTOOLIZE=glibtoolize
|
||||
else if [ `uname -s` = "Linux" ]; then
|
||||
LIBTOOLIZE=libtoolize
|
||||
else # Windows
|
||||
LIBTOOLIZE=true
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
srcdir=`dirname $0`
|
||||
test -z "$srcdir" && srcdir=.
|
||||
|
||||
@@ -22,7 +11,6 @@ ORIGDIR=`pwd`
|
||||
cd $srcdir
|
||||
aclocal
|
||||
autoheader
|
||||
$LIBTOOLIZE --copy
|
||||
autoconf
|
||||
|
||||
$srcdir/configure --enable-gpl $@
|
||||
|
||||
137
configure.ac
137
configure.ac
@@ -5,7 +5,6 @@ AM_INIT_AUTOMAKE([1.10])
|
||||
AC_PREREQ(2.61)
|
||||
AC_CONFIG_SRCDIR([src/main.c])
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
LT_INIT
|
||||
|
||||
PARENT=`echo $PWD | sed -e 's%/[[^/]]*$%%'`
|
||||
|
||||
@@ -161,7 +160,12 @@ then
|
||||
CFLAGS="$CFLAGS -fPIC"
|
||||
CXXFLAGS="$CXXFLAGS -fPIC"
|
||||
NVCCFLAGS="$NVCCFLAGS -Xcompiler -fPIC"
|
||||
CU_SUFFIX="cu.o"
|
||||
else
|
||||
NVCCFLAGS="-m32"
|
||||
CU_SUFFIX="cu.lib"
|
||||
fi
|
||||
AC_SUBST(NVCCFLAGS)
|
||||
|
||||
AH_BOTTOM([
|
||||
/*
|
||||
@@ -237,19 +241,30 @@ AC_DEFUN([DEFINE_GL], [
|
||||
AC_SUBST(GL_COMMON_OBJ)
|
||||
])
|
||||
|
||||
AC_DEFUN([DEFINE_CUDA], [
|
||||
if test -z "$cuda_var_defined"; then
|
||||
AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system])
|
||||
LINKER=$CXX
|
||||
POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in JPEG support.\nIn order to use use JPEG compression and \
|
||||
AC_DEFUN([CUDA_MESSAGE], [
|
||||
if test -z "$cuda_msg_defined"; then
|
||||
POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in sort of CUDA code.\nIn order to use use it compression and \
|
||||
decompression, you will need to have CUDA libraries visible to your OS.\n\
|
||||
If not done so, you can accomplish this by adding line:\n\
|
||||
export LD_LIBRARY_PATH=$CUDA_LIB_PATH:\\\$\$LD_LIBRARY_PATH\n\
|
||||
to your .bashrc file (in home directory). To take effect immediatelly, you will need to enter:\n\
|
||||
exec bash\n***\n"
|
||||
cuda_msg_defined=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_DEFUN([DEFINE_CUDA], [
|
||||
if test -z "$cuda_var_defined"; then
|
||||
AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system])
|
||||
CUDA_COMMON_OBJ=src/cuda_wrapper.$CU_SUFFIX
|
||||
if test $system = Windows; then
|
||||
DLL_LIBS="$DLL_LIBS src/cuda_wrapper.cu.dll"
|
||||
fi
|
||||
AC_SUBST(CUDA_COMMON_OBJ)
|
||||
cuda_var_defined=yes
|
||||
fi
|
||||
])
|
||||
AC_SUBST(DLL_LIBS)
|
||||
|
||||
AC_MSG_CHECKING([if_nametoindex])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
@@ -402,7 +417,8 @@ if test "$build_libraries" = yes
|
||||
then
|
||||
if test "$system" = "Linux"
|
||||
then
|
||||
LIBS+=' -ldl'
|
||||
LIBS="$LIBS -ldl"
|
||||
LDFLAGS+="-rdynamic -ldl"
|
||||
else
|
||||
AC_MSG_WARN([*** Building libraries is not supported with other system than Linux])
|
||||
build_libraries=no
|
||||
@@ -438,7 +454,7 @@ AC_SUBST(vo_pp_abi_version)
|
||||
# ------------------------------------------------------------------------------------------------
|
||||
# environment variables
|
||||
#
|
||||
AC_ARG_VAR([CUDA_DIRECTORY], [Directory of your Nvidia toolkit instalation.])
|
||||
AC_ARG_VAR([CUDA_PATH], [Directory of your Nvidia toolkit instalation.])
|
||||
AC_ARG_VAR([SAGE_DIRECTORY], [Directory of your SAGE installation.])
|
||||
AC_ARG_VAR([DVS_DIRECTORY], [Path to DVS installation.])
|
||||
AC_ARG_VAR([DELTACAST_DIRECTORY], [Placement of VideoMasterHD directory (Deltacast).])
|
||||
@@ -690,7 +706,6 @@ then
|
||||
DECKLINK_COMMON="src/video_capture/DeckLinkAPIDispatch.o"
|
||||
fi
|
||||
AC_DEFINE([HAVE_DECKLINK], [1], [Build with DeckLink support])
|
||||
LINKER=$CXX
|
||||
AC_SUBST(DECKLINK_CAP_LIB_TARGET, "lib/ultragrid/vidcap_decklink.so.$video_capture_abi_version")
|
||||
AC_SUBST(DECKLINK_DISP_LIB_TARGET, "lib/ultragrid/display_decklink.so.$video_display_abi_version")
|
||||
AC_SUBST(DECKLINK_SOUND_PLAY_LIB_TARGET, "lib/ultragrid/aplay_decklink.so.$audio_playback_abi_version")
|
||||
@@ -1655,7 +1670,6 @@ then
|
||||
FASTDXT_LIB=""
|
||||
FASTDXT_OBJ="$FASTDXT_OBJ src/video_compress/fastdxt.o ${FASTDXT_PATH}/libdxt.a"
|
||||
AC_DEFINE([HAVE_FASTDXT], [1], [Build with support for FastDXT])
|
||||
LINKER=$CXX
|
||||
AC_SUBST(FASTDXT_LIB_TARGET, "lib/ultragrid/vcompress_fastdxt.so.$video_compress_abi_version")
|
||||
LIB_TARGETS="$LIB_TARGETS $FASTDXT_LIB_TARGET"
|
||||
LIB_OBJS="$LIB_OBJS $FASTDXT_OBJ"
|
||||
@@ -1782,10 +1796,8 @@ AC_SUBST(UYVY_LIB)
|
||||
CUDA_INC=
|
||||
CUDA_LIB=
|
||||
NVCC=
|
||||
NVCCFLAGS=
|
||||
CUDA_LIB_PATH=
|
||||
FOUND_CUDA=no
|
||||
HAVE_CUDA=no
|
||||
CUDA_PATH=$CUDA_DIRECTORY
|
||||
|
||||
AC_ARG_WITH(cuda,
|
||||
@@ -1797,18 +1809,12 @@ SAVED_LIBS=$LIBS
|
||||
LIBS="$LIBS -lcudart"
|
||||
if test -z "$CUDA_PATH"
|
||||
then
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <cuda_runtime_api.h>
|
||||
]],
|
||||
[[int devices_count;
|
||||
cudaGetDeviceCount(&devices_count);
|
||||
]])],FOUND_CUDA=yes,FOUND_CUDA=no)
|
||||
AC_CHECK_PROG([NVCC], [nvcc], [nvcc],
|
||||
[])
|
||||
fi
|
||||
|
||||
if test $FOUND_CUDA = no -o -z "$NVCC"
|
||||
if test -z "$NVCC"
|
||||
then
|
||||
FOUND_CUDA=no
|
||||
NVCC=
|
||||
if test -z "$CUDA_PATH"; then
|
||||
CUDA_PATH=/usr/local/cuda
|
||||
@@ -1821,11 +1827,6 @@ then
|
||||
else
|
||||
LIBS="$LIBS -L$CUDA_PATH/lib"
|
||||
fi
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <cuda_runtime_api.h>
|
||||
]],
|
||||
[[int devices_count;
|
||||
cudaGetDeviceCount(&devices_count);
|
||||
]])],FOUND_CUDA=yes,FOUND_CUDA=no)
|
||||
CFLAGS=$SAVED_CFLAGS
|
||||
AC_CHECK_PROG([NVCC], [nvcc], [$CUDA_PATH/bin/nvcc],
|
||||
[],
|
||||
@@ -1834,9 +1835,17 @@ fi
|
||||
|
||||
LIBS=$SAVED_LIBS
|
||||
|
||||
if test -n "$NVCC" -a $FOUND_CUDA = yes
|
||||
if test -n "$NVCC"
|
||||
then
|
||||
HAVE_CUDA=yes
|
||||
if test $system = Windows; then
|
||||
AC_CHECK_PROG([CL], [cl], [cl],
|
||||
[])
|
||||
if test -n $CL; then
|
||||
FOUND_CUDA=yes
|
||||
fi
|
||||
else
|
||||
FOUND_CUDA=yes
|
||||
fi
|
||||
AC_MSG_CHECKING([CUDA Toolkit version - major])
|
||||
nvcc_major=`$NVCC --version |grep release|sed 's/^.*release \(@<:@0-9@:>@@<:@0-9@:>@*\).*$/\1/'`
|
||||
AC_MSG_RESULT($nvcc_major)
|
||||
@@ -1859,10 +1868,8 @@ then
|
||||
CUDA_LIB="-L$CUDA_LIB_PATH -lcudart"
|
||||
fi
|
||||
|
||||
AC_SUBST(HAVE_CUDA)
|
||||
AC_SUBST(NVCC)
|
||||
AC_SUBST(FOUND_CUDA)
|
||||
AC_SUBST(NVCCFLAGS)
|
||||
AC_SUBST(CUDA_PATH)
|
||||
AC_SUBST(CUDA_LIB)
|
||||
AC_SUBST(CUDA_INC)
|
||||
@@ -1893,46 +1900,23 @@ CFLAGS=$SAVED_CFLAGS
|
||||
CXXFLAGS=$SAVED_CXXFLAGS
|
||||
CPPFLAGS=$SAVED_CPPFLAGS
|
||||
|
||||
if test $HAVE_CUDA = yes
|
||||
if test $system = Windows
|
||||
then
|
||||
if test \( $nvcc_major -gt 4 \) -o \( $nvcc_major -eq 4 -a $nvcc_minor -ge 1 \) -o $version_check = no
|
||||
then
|
||||
jpeg_cuda_version_ok=yes
|
||||
else
|
||||
AC_MSG_WARN([*** CUDA Toolkit older than 4.1 detected. Please install at least v4.1 to use JPEG.])
|
||||
HM_VERSION_WARNING
|
||||
fi
|
||||
|
||||
if test $version_check = no -o \
|
||||
\( $system = Linux -o $os_version_major -ge 10 \) # Linux or Mac at least Snow Leopard
|
||||
then
|
||||
jpeg_os_version_ok=yes
|
||||
else
|
||||
AC_MSG_WARN([*** Detected unsupported OS version for CUDA.])
|
||||
HM_VERSION_WARNING
|
||||
fi
|
||||
JPEG_LIB="$JPEG_LIB -Lgpujpeg/Release"
|
||||
DLL_LIBS="$DLL_LIBS gpujpeg/Release/gpujpeg.dll"
|
||||
fi
|
||||
|
||||
SAVED_LIBS=$LIBS
|
||||
LIBS="$LIBS $JPEG_LIB"
|
||||
AC_CHECK_LIB([gpujpeg], [gpujpeg_encoder_create])
|
||||
LIBS=$SAVED_LIBS
|
||||
|
||||
# Used also by JPEG_TO_DXT
|
||||
if test "$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes -a \
|
||||
$HAVE_CUDA = yes -a "$jpeg_cuda_version_ok" = yes -a \
|
||||
"$jpeg_os_version_ok" = yes
|
||||
then
|
||||
jpeg_env_ok=yes
|
||||
else
|
||||
jpeg_env_ok=no
|
||||
fi
|
||||
|
||||
if test "$jpeg_req" != no -a $jpeg_env_ok = yes
|
||||
if test "$jpeg_req" != no -a \
|
||||
"$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes
|
||||
then
|
||||
jpeg=yes
|
||||
|
||||
JPEG_LIB=" $CUDA_LIB"
|
||||
JPEG_INC=$CUDA_INC
|
||||
JPEG_INC="$JPEG_INC -Igpujpeg"
|
||||
JPEG_COMMON_OBJ="src/video_compress/jpeg.o"
|
||||
JPEG_LIB="$JPEG_LIB -lgpujpeg"
|
||||
JPEG_DECOMPRESS_OBJ="src/video_decompress/jpeg.o "
|
||||
@@ -1941,7 +1925,7 @@ then
|
||||
AC_SUBST(JPEG_DECOMPRESS_LIB_TARGET, "lib/ultragrid/vdecompress_jpeg.so.$video_decompress_abi_version")
|
||||
LIB_TARGETS="$LIB_TARGETS $JPEG_COMPRESS_LIB_TARGET $JPEG_DECOMPRESS_LIB_TARGET"
|
||||
LIB_OBJS="$LIB_OBJS $JPEG_DECOMPRESS_OBJ $JPEG_COMMON_OBJ"
|
||||
DEFINE_CUDA
|
||||
CUDA_MESSAGE
|
||||
fi
|
||||
|
||||
if test $jpeg_req = yes -a $jpeg = no; then
|
||||
@@ -1965,6 +1949,16 @@ CUDA_DXT_LIB=
|
||||
|
||||
cuda_dxt=no
|
||||
|
||||
AC_DEFUN([ADD_CUDA_DXT_OBJ], [
|
||||
if test -z "$included_shared_cuda_dxt_cu"; then
|
||||
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.$CU_SUFFIX"
|
||||
if test $system = Windows; then
|
||||
DLL_LIBS="$DLL_LIBS cuda_dxt/cuda_dxt.cu.dll"
|
||||
fi
|
||||
included_shared_cuda_dxt_cu=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE(cuda-dxt,
|
||||
AS_HELP_STRING([--disable-cuda-dxt], [disable CUDA DXT compression (auto)]),
|
||||
[cuda_dxt_req=$enableval],
|
||||
@@ -1972,22 +1966,20 @@ AC_ARG_ENABLE(cuda-dxt,
|
||||
|
||||
LIBS=$SAVED_LIBS
|
||||
|
||||
if test "$cuda_dxt_req" != no -a $HAVE_CUDA = yes
|
||||
if test "$cuda_dxt_req" != no -a $FOUND_CUDA = yes
|
||||
then
|
||||
cuda_dxt=yes
|
||||
|
||||
CUDA_DXT_LIB=" $CUDA_LIB"
|
||||
CUDA_DXT_INC=$CUDA_INC
|
||||
CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o cuda_dxt/cuda_dxt.cu.o"
|
||||
CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o cuda_dxt/cuda_dxt.$CU_SUFFIX"
|
||||
AC_DEFINE([HAVE_CUDA_DXT], [1], [Build with CUDA DXT support])
|
||||
AC_SUBST(CUDA_DXT_COMPRESS_LIB_TARGET, "lib/ultragrid/vcompress_cuda_dxt.so.$video_compress_abi_version")
|
||||
LIB_TARGETS="$LIB_TARGETS $CUDA_DXT_COMPRESS_LIB_TARGET"
|
||||
LIB_OBJS="$LIB_OBJS src/video_compress/cuda_dxt.o"
|
||||
if test -z "$included_shared_cuda_dxt_cu"; then
|
||||
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o"
|
||||
included_shared_cuda_dxt_cu=yes
|
||||
fi
|
||||
ADD_CUDA_DXT_OBJ
|
||||
DEFINE_CUDA
|
||||
CUDA_MESSAGE
|
||||
fi
|
||||
|
||||
if test $cuda_dxt_req = yes -a $cuda_dxt = no; then
|
||||
@@ -2009,21 +2001,20 @@ AC_ARG_ENABLE(jpeg_to_dxt,
|
||||
[jpeg_to_dxt_req=$enableval],
|
||||
[jpeg_to_dxt_req=auto])
|
||||
|
||||
if test $jpeg_env_ok = yes -a $jpeg_to_dxt_req != no
|
||||
if test $jpeg_to_dxt_req != no -a $FOUND_CUDA = yes -a \
|
||||
"$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes
|
||||
then
|
||||
jpeg_to_dxt=yes
|
||||
JPEG_TO_DXT_INC=" $CUDA_INC"
|
||||
JPEG_TO_DXT_LIB=" $CUDA_LIB -lgpujpeg"
|
||||
JPEG_TO_DXT_OBJ="src/video_decompress/jpeg_to_dxt.o cuda_dxt/cuda_dxt.cu.o"
|
||||
JPEG_TO_DXT_OBJ="src/video_decompress/jpeg_to_dxt.o cuda_dxt/cuda_dxt.$CU_SUFFIX"
|
||||
AC_SUBST(JPEG_TO_DXT_DECOMPRESS_LIB_TARGET, "lib/ultragrid/vdecompress_jpeg_to_dxt.so.$video_decompress_abi_version")
|
||||
LIB_TARGETS="$LIB_TARGETS $JPEG_TO_DXT_DECOMPRESS_LIB_TARGET"
|
||||
LIB_OBJS="$LIB_OBJS src/video_decompress/jpeg_to_dxt.o"
|
||||
AC_DEFINE([HAVE_JPEG_TO_DXT], [1], [Build with JPEG to DXT transcode support])
|
||||
if test -z "$included_shared_cuda_dxt_cu"; then
|
||||
LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o"
|
||||
included_shared_cuda_dxt_cu=yes
|
||||
fi
|
||||
ADD_CUDA_DXT_OBJ
|
||||
DEFINE_CUDA
|
||||
CUDA_MESSAGE
|
||||
fi
|
||||
|
||||
if test $jpeg_to_dxt = no -a $jpeg_to_dxt_req = yes
|
||||
@@ -2083,7 +2074,6 @@ then
|
||||
SAGE_LIB=-"L${SAGE_LIB} -lsail -lquanta"
|
||||
SAGE_OBJ="src/video_display/sage.o"
|
||||
AC_DEFINE([HAVE_SAGE], [1], [Build with SAGE support])
|
||||
LINKER=$CXX
|
||||
AC_SUBST(SAGE_LIB_TARGET, "lib/ultragrid/display_sage.so.$video_display_abi_version")
|
||||
CXXFLAGS="$CXXFLAGS -DQUANTA_USE_PTHREADS -DQUANTA_THREAD_SAFE -DGLSL_YUV"
|
||||
LIB_TARGETS="$LIB_TARGETS $SAGE_LIB_TARGET"
|
||||
@@ -2563,7 +2553,7 @@ fi
|
||||
if test "$build_libraries" != yes
|
||||
then
|
||||
LIBS="$LIBS $LIB_MODULES"
|
||||
OBJS="$OBJS $LIB_OBJS $GL_COMMON_OBJ $X_OBJ"
|
||||
OBJS="$OBJS $LIB_OBJS $GL_COMMON_OBJ $X_OBJ $CUDA_COMMON_OBJ"
|
||||
HEADERS="$HEADERS $LIB_HEADERS"
|
||||
LIB_OBJS=
|
||||
LIB_TARGETS=
|
||||
@@ -2663,6 +2653,7 @@ RESULT=\
|
||||
Realtime DXT (OpenGL) ....... $rtdxt
|
||||
JPEG ........................ $jpeg
|
||||
JPEG to DXT ................. $jpeg_to_dxt
|
||||
CUDA DXT .................... $cuda_dxt
|
||||
UYVY dummy compression ...... $uyvy
|
||||
Libavcodec .................. $libavcodec (audio: $libavcodec_audio)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
/// @brief CUDA implementation of DXT compression
|
||||
///
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <stdio.h>
|
||||
#include "cuda_dxt.h"
|
||||
|
||||
@@ -721,9 +722,9 @@ __global__ static void yuv422_to_yuv444_kernel(const void * src, void * out, int
|
||||
out_pix[1].w = pix34.x;
|
||||
out_pix[2].x = pix34.z;
|
||||
|
||||
out_pix[2].z = pix34.w;
|
||||
out_pix[2].w = pix34.x;
|
||||
out_pix[2].x = pix34.z;
|
||||
out_pix[2].y = pix34.w;
|
||||
out_pix[2].z = pix34.x;
|
||||
out_pix[2].w = pix34.z;
|
||||
|
||||
this_out[0] = out_pix[0];
|
||||
this_out[1] = out_pix[1];
|
||||
@@ -758,13 +759,14 @@ static int dxt_launch(const void * src, void * out, int sx, int sy, cudaStream_t
|
||||
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
|
||||
}
|
||||
|
||||
int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str) {
|
||||
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
|
||||
int pix_count, cuda_wrapper_stream_t str) {
|
||||
// grid and threadblock sizes
|
||||
const dim3 tsiz(64, 1);
|
||||
int thread_count = pix_count / 4; // we process block of 4 pixels
|
||||
const dim3 gsiz((thread_count + tsiz.x - 1) / tsiz.x, 1);
|
||||
yuv422_to_yuv444_kernel<<<gsiz, tsiz, 0, str>>>(src, out, pix_count);
|
||||
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
|
||||
yuv422_to_yuv444_kernel<<<gsiz, tsiz, 0, (cudaStream_t) str>>>(src, out, pix_count);
|
||||
return cudaSuccess != cudaStreamSynchronize((cudaStream_t) str) ? -3 : 0;
|
||||
}
|
||||
|
||||
/// CUDA DXT1 compression (only RGB without alpha).
|
||||
@@ -777,8 +779,9 @@ int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStrea
|
||||
/// @param size_y Height of the input image (must be divisible by 4).
|
||||
/// @param stream CUDA stream to run in, or 0 for default stream.
|
||||
/// @return 0 if OK, nonzero if failed.
|
||||
int cuda_rgb_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
|
||||
return dxt_launch<false, 1>(src, out, size_x, size_y, stream);
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<false, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -793,8 +796,9 @@ int cuda_rgb_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaS
|
||||
/// @param size_y Height of the input image (must be divisible by 4).
|
||||
/// @param stream CUDA stream to run in, or 0 for default stream.
|
||||
/// @return 0 if OK, nonzero if failed.
|
||||
int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
|
||||
return dxt_launch<true, 1>(src, out, size_x, size_y, stream);
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<true, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -809,11 +813,13 @@ int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaS
|
||||
/// (Input is read bottom up if negative)
|
||||
/// @param stream CUDA stream to run in, or 0 for default stream.
|
||||
/// @return 0 if OK, nonzero if failed.
|
||||
int cuda_rgb_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
|
||||
return dxt_launch<false, 6>(src, out, size_x, size_y, stream);
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt6(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<false, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
|
||||
int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) {
|
||||
return dxt_launch<true, 6>(src, out, size_x, size_y, stream);
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<true, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,13 +8,12 @@
|
||||
#ifndef CUDA_DXT_H
|
||||
#define CUDA_DXT_H
|
||||
|
||||
#include "cuda_wrapper.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
|
||||
/**
|
||||
* CUDA DXT1 compression (only RGB without alpha).
|
||||
* @param src Pointer to top-left source pixel in device-memory buffer.
|
||||
@@ -28,13 +27,13 @@ extern "C" {
|
||||
* @param stream CUDA stream to run in, or 0 for default stream.
|
||||
* @return 0 if OK, nonzero if failed.
|
||||
*/
|
||||
int cuda_rgb_to_dxt1
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt1
|
||||
(
|
||||
const void * src,
|
||||
void * out,
|
||||
int size_x,
|
||||
int size_y,
|
||||
cudaStream_t stream
|
||||
cuda_wrapper_stream_t stream
|
||||
);
|
||||
|
||||
|
||||
@@ -52,13 +51,13 @@ int cuda_rgb_to_dxt1
|
||||
* @param stream CUDA stream to run in, or 0 for default stream.
|
||||
* @return 0 if OK, nonzero if failed.
|
||||
*/
|
||||
int cuda_yuv_to_dxt1
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt1
|
||||
(
|
||||
const void * src,
|
||||
void * out,
|
||||
int size_x,
|
||||
int size_y,
|
||||
cudaStream_t stream
|
||||
cuda_wrapper_stream_t stream
|
||||
);
|
||||
|
||||
|
||||
@@ -75,17 +74,19 @@ int cuda_yuv_to_dxt1
|
||||
* @param stream CUDA stream to run in, or 0 for default stream.
|
||||
* @return 0 if OK, nonzero if failed.
|
||||
*/
|
||||
int cuda_rgb_to_dxt6
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt6
|
||||
(
|
||||
const void * src,
|
||||
void * out,
|
||||
int size_x,
|
||||
int size_y,
|
||||
cudaStream_t stream
|
||||
cuda_wrapper_stream_t stream
|
||||
);
|
||||
|
||||
int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream);
|
||||
int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str);
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream);
|
||||
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
|
||||
int pix_count, cuda_wrapper_stream_t str);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of extern "C" */
|
||||
|
||||
9
gpujpeg/.gitignore
vendored
9
gpujpeg/.gitignore
vendored
@@ -1,4 +1,3 @@
|
||||
gpujpeg
|
||||
.libs
|
||||
src/*.o
|
||||
src/*.lo
|
||||
@@ -28,3 +27,11 @@ src/.dirstamp
|
||||
stamp-h1
|
||||
libgpujpeg.la
|
||||
|
||||
# VS
|
||||
*.pdb
|
||||
*.sdf
|
||||
*.suo
|
||||
*.user
|
||||
Debug
|
||||
Release
|
||||
|
||||
|
||||
@@ -27,21 +27,21 @@ pkgconfig_DATA = libgpujpeg.pc
|
||||
library_include_HEADERS = libgpujpeg/*.h
|
||||
nodist_gpujpeg_libinclude_HEADERS = config.h
|
||||
|
||||
gpujpeg_SOURCES = src/main.c
|
||||
gpujpeg_SOURCES = src/main.c
|
||||
gpujpeg_CFLAGS = -std=c99 @COMMON_CFLAGS@
|
||||
gpujpeg_LDADD = libgpujpeg.la
|
||||
gpujpeg_LDFLAGS = @GPUJPEG_LDFLAGS@
|
||||
|
||||
# gpu jpeg library sources
|
||||
libgpujpeg_la_SOURCES = src/gpujpeg_common.c \
|
||||
src/gpujpeg_dct_cpu.c \
|
||||
src/gpujpeg_decoder.c \
|
||||
src/gpujpeg_encoder.c \
|
||||
src/gpujpeg_huffman_cpu_decoder.c \
|
||||
src/gpujpeg_huffman_cpu_encoder.c \
|
||||
src/gpujpeg_reader.c \
|
||||
src/gpujpeg_table.c \
|
||||
src/gpujpeg_writer.c
|
||||
libgpujpeg_la_SOURCES = src/gpujpeg_common.cpp \
|
||||
src/gpujpeg_dct_cpu.cpp \
|
||||
src/gpujpeg_decoder.cpp \
|
||||
src/gpujpeg_encoder.cpp \
|
||||
src/gpujpeg_huffman_cpu_decoder.cpp \
|
||||
src/gpujpeg_huffman_cpu_encoder.cpp \
|
||||
src/gpujpeg_reader.cpp \
|
||||
src/gpujpeg_table.cpp \
|
||||
src/gpujpeg_writer.cpp
|
||||
|
||||
libgpujpeg_la_DEPENDENCIES = @LIBGPUJPEG_CUDA_OBJS@
|
||||
|
||||
|
||||
@@ -10,13 +10,6 @@ else
|
||||
LIBTOOLIZE=libtoolize
|
||||
fi
|
||||
|
||||
if [ ! -x ../ltmain.sh ]
|
||||
then
|
||||
cd ..
|
||||
$LIBTOOLIZE --copy
|
||||
cd -
|
||||
fi
|
||||
|
||||
autoheader && \
|
||||
$LIBTOOLIZE --copy && \
|
||||
( [ -d m4 ] || mkdir m4 ) && \
|
||||
|
||||
@@ -2,6 +2,8 @@ AC_PREREQ([2.65])
|
||||
AC_INIT([libgpujpeg], [0.0.1], [martin.srom@mail.muni.cz], [libgpujpeg], [https://sourceforge.net/p/gpujpeg/])
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_SRCDIR([src/main.c])
|
||||
AC_CONFIG_AUX_DIR([.])
|
||||
AM_MAINTAINER_MODE
|
||||
AM_INIT_AUTOMAKE
|
||||
|
||||
AC_CONFIG_HEADERS([config.h])
|
||||
|
||||
20
gpujpeg/gpujpeg.sln
Normal file
20
gpujpeg/gpujpeg.sln
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Express 2012 for Windows Desktop
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpujpeg", "gpujpeg.vcxproj", "{B9D06885-F4F3-4B01-8C43-E131210B9F27}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Release|Win32 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
132
gpujpeg/gpujpeg.vcxproj
Normal file
132
gpujpeg/gpujpeg.vcxproj
Normal file
@@ -0,0 +1,132 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{B9D06885-F4F3-4B01-8C43-E131210B9F27}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>gpujpeg</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;%(CodeGeneration)</CodeGeneration>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_20,sm_20;compute_30,sm_30;compute_35,sm_53;%(CodeGeneration)</CodeGeneration>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_common.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_common_internal.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_decoder.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_decoder_internal.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_encoder.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_encoder_internal.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_reader.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_table.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_type.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_util.h" />
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_writer.h" />
|
||||
<ClInclude Include="src\gpujpeg_colorspace.h" />
|
||||
<ClInclude Include="src\gpujpeg_dct_cpu.h" />
|
||||
<ClInclude Include="src\gpujpeg_dct_gpu.h" />
|
||||
<ClInclude Include="src\gpujpeg_huffman_cpu_decoder.h" />
|
||||
<ClInclude Include="src\gpujpeg_huffman_cpu_encoder.h" />
|
||||
<ClInclude Include="src\gpujpeg_huffman_gpu_decoder.h" />
|
||||
<ClInclude Include="src\gpujpeg_huffman_gpu_encoder.h" />
|
||||
<ClInclude Include="src\gpujpeg_preprocessor.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\gpujpeg_common.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_dct_cpu.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_decoder.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_encoder.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_huffman_cpu_decoder.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_huffman_cpu_encoder.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_reader.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_table.cpp" />
|
||||
<ClCompile Include="src\gpujpeg_writer.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="src\gpujpeg_dct_gpu.cu" />
|
||||
<CudaCompile Include="src\gpujpeg_huffman_gpu_decoder.cu" />
|
||||
<CudaCompile Include="src\gpujpeg_huffman_gpu_encoder.cu" />
|
||||
<CudaCompile Include="src\gpujpeg_preprocessor.cu" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
122
gpujpeg/gpujpeg.vcxproj.filters
Normal file
122
gpujpeg/gpujpeg.vcxproj.filters
Normal file
@@ -0,0 +1,122 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Resource Files">
|
||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_common.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_common_internal.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_decoder.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_decoder_internal.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_encoder.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_encoder_internal.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_reader.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_table.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_type.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_util.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="libgpujpeg\gpujpeg_writer.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_colorspace.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_dct_cpu.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_dct_gpu.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_huffman_cpu_decoder.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_huffman_cpu_encoder.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_huffman_gpu_decoder.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_huffman_gpu_encoder.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\gpujpeg_preprocessor.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="src\gpujpeg_common.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_dct_cpu.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_decoder.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_encoder.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_huffman_cpu_decoder.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_huffman_cpu_encoder.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_reader.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_table.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\gpujpeg_writer.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="src\gpujpeg_dct_gpu.cu">
|
||||
<Filter>Source Files</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="src\gpujpeg_huffman_gpu_decoder.cu">
|
||||
<Filter>Source Files</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="src\gpujpeg_huffman_gpu_encoder.cu">
|
||||
<Filter>Source Files</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="src\gpujpeg_preprocessor.cu">
|
||||
<Filter>Source Files</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -37,6 +37,16 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined _MSC_VER || defined __MINGW32__
|
||||
#ifdef GPUJPEG_EXPORTS
|
||||
#define GPUJPEG_API __declspec(dllexport)
|
||||
#else
|
||||
#define GPUJPEG_API __declspec(dllimport)
|
||||
#endif
|
||||
#else // other platforms
|
||||
#define GPUJPEG_API
|
||||
#endif
|
||||
|
||||
/** Marker used as segment info */
|
||||
#define GPUJPEG_MARKER_SEGMENT_INFO GPUJPEG_MARKER_APP13
|
||||
|
||||
@@ -80,7 +90,7 @@ struct gpujpeg_devices_info
|
||||
*
|
||||
* @return devices info
|
||||
*/
|
||||
struct gpujpeg_devices_info
|
||||
GPUJPEG_API struct gpujpeg_devices_info
|
||||
gpujpeg_get_devices_info();
|
||||
|
||||
/**
|
||||
@@ -88,7 +98,7 @@ gpujpeg_get_devices_info();
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_print_devices_info();
|
||||
|
||||
/**
|
||||
@@ -99,7 +109,7 @@ gpujpeg_print_devices_info();
|
||||
* enable OpenGL interoperability (GPUJPEG_OPENGL_INTEROPERABILITY)
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_init_device(int device_id, int flags);
|
||||
|
||||
/**
|
||||
@@ -144,7 +154,7 @@ struct gpujpeg_parameters
|
||||
* @param param Parameters for JPEG coder
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_set_default_parameters(struct gpujpeg_parameters* param);
|
||||
|
||||
/**
|
||||
@@ -153,7 +163,7 @@ gpujpeg_set_default_parameters(struct gpujpeg_parameters* param);
|
||||
* @param param Parameters for coder
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_parameters_chroma_subsampling(struct gpujpeg_parameters* param);
|
||||
|
||||
/**
|
||||
@@ -181,7 +191,7 @@ struct gpujpeg_image_parameters {
|
||||
* @param param Parameters for image
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_image_set_default_parameters(struct gpujpeg_image_parameters* param);
|
||||
|
||||
/** Image file formats */
|
||||
@@ -206,9 +216,16 @@ enum gpujpeg_image_file_format {
|
||||
* @param filename Filename of image file
|
||||
* @return image_file_format or GPUJPEG_IMAGE_FILE_UNKNOWN if type cannot be determined
|
||||
*/
|
||||
enum gpujpeg_image_file_format
|
||||
GPUJPEG_API enum gpujpeg_image_file_format
|
||||
gpujpeg_image_get_file_format(const char* filename);
|
||||
|
||||
/**
|
||||
* Sets cuda device.
|
||||
*
|
||||
* @param index Index of the CUDA device to be activated.
|
||||
*/
|
||||
GPUJPEG_API void gpujpeg_set_device(int index);
|
||||
|
||||
/**
|
||||
* JPEG segment structure. Segment is data in scan generated by huffman coder
|
||||
* for N consecutive MCUs, where N is restart interval (e.g. data for MCUs between
|
||||
@@ -428,7 +445,7 @@ gpujpeg_coder_deinit(struct gpujpeg_coder* coder);
|
||||
* @param param Image parameters
|
||||
* @return calculate size
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param);
|
||||
|
||||
/**
|
||||
@@ -439,7 +456,7 @@ gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param);
|
||||
* @param image_size Image data buffer size (can be specified for verification or 0 for retrieval)
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_image_load_from_file(const char* filename, uint8_t** image, int* image_size);
|
||||
|
||||
/**
|
||||
@@ -450,7 +467,7 @@ gpujpeg_image_load_from_file(const char* filename, uint8_t** image, int* image_s
|
||||
* @param image_size Image data buffer size
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_image_save_to_file(const char* filename, uint8_t* image, int image_size);
|
||||
|
||||
/**
|
||||
@@ -459,7 +476,7 @@ gpujpeg_image_save_to_file(const char* filename, uint8_t* image, int image_size)
|
||||
* @param image Image data buffer
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_image_destroy(uint8_t* image);
|
||||
|
||||
/**
|
||||
@@ -470,7 +487,7 @@ gpujpeg_image_destroy(uint8_t* image);
|
||||
* @param height
|
||||
* @param sampling_factor
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_image_range_info(const char* filename, int width, int height, enum gpujpeg_sampling_factor sampling_factor);
|
||||
|
||||
/**
|
||||
@@ -481,7 +498,7 @@ gpujpeg_image_range_info(const char* filename, int width, int height, enum gpujp
|
||||
* @param param_image_from
|
||||
* @param param_image_to
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_image_convert(const char* input, const char* output, struct gpujpeg_image_parameters param_image_from,
|
||||
struct gpujpeg_image_parameters param_image_to);
|
||||
|
||||
@@ -490,7 +507,7 @@ gpujpeg_image_convert(const char* input, const char* output, struct gpujpeg_imag
|
||||
*
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_opengl_init();
|
||||
|
||||
/**
|
||||
@@ -501,7 +518,7 @@ gpujpeg_opengl_init();
|
||||
* @param data
|
||||
* @return nonzero texture id if succeeds, otherwise 0
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_opengl_texture_create(int width, int height, uint8_t* data);
|
||||
|
||||
/**
|
||||
@@ -511,7 +528,7 @@ gpujpeg_opengl_texture_create(int width, int height, uint8_t* data);
|
||||
* @param data
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_opengl_texture_set_data(int texture_id, uint8_t* data);
|
||||
|
||||
/**
|
||||
@@ -522,7 +539,7 @@ gpujpeg_opengl_texture_set_data(int texture_id, uint8_t* data);
|
||||
* @param data_size
|
||||
* @return 0 data if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_opengl_texture_get_data(int texture_id, uint8_t* data, int* data_size);
|
||||
|
||||
/**
|
||||
@@ -530,7 +547,7 @@ gpujpeg_opengl_texture_get_data(int texture_id, uint8_t* data, int* data_size);
|
||||
*
|
||||
* @param texture_id
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_opengl_texture_destroy(int texture_id);
|
||||
|
||||
/**
|
||||
@@ -592,7 +609,7 @@ struct gpujpeg_opengl_texture
|
||||
* @param texture_id
|
||||
* @return allocated registred texture structure
|
||||
*/
|
||||
struct gpujpeg_opengl_texture*
|
||||
GPUJPEG_API struct gpujpeg_opengl_texture*
|
||||
gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type texture_type);
|
||||
|
||||
/**
|
||||
@@ -601,7 +618,7 @@ gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type
|
||||
*
|
||||
* @param texture
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_opengl_texture_unregister(struct gpujpeg_opengl_texture* texture);
|
||||
|
||||
/**
|
||||
@@ -627,81 +644,6 @@ gpujpeg_opengl_texture_map(struct gpujpeg_opengl_texture* texture, int* data_siz
|
||||
void
|
||||
gpujpeg_opengl_texture_unmap(struct gpujpeg_opengl_texture* texture);
|
||||
|
||||
/**
|
||||
* Declare timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_DECLARE(name) \
|
||||
cudaEvent_t name ## _start__; \
|
||||
cudaEvent_t name ## _stop__; \
|
||||
float name ## _elapsedTime__; \
|
||||
|
||||
/**
|
||||
* Create timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_CREATE(name) \
|
||||
cudaEventCreate(&name ## _start__); \
|
||||
cudaEventCreate(&name ## _stop__); \
|
||||
|
||||
/**
|
||||
* Start timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_START(name) \
|
||||
cudaEventRecord(name ## _start__, 0) \
|
||||
|
||||
/**
|
||||
* Stop timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_STOP(name) \
|
||||
cudaEventRecord(name ## _stop__, 0); \
|
||||
cudaEventSynchronize(name ## _stop__); \
|
||||
cudaEventElapsedTime(&name ## _elapsedTime__, name ## _start__, name ## _stop__) \
|
||||
|
||||
/**
|
||||
* Get duration for timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_DURATION(name) name ## _elapsedTime__
|
||||
|
||||
/**
|
||||
* Stop timer and print result
|
||||
*
|
||||
* @param name
|
||||
* @param text
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_STOP_PRINT(name, text) \
|
||||
GPUJPEG_CUSTOM_TIMER_STOP(name); \
|
||||
printf("%s %f ms\n", text, name ## _elapsedTime__) \
|
||||
|
||||
/**
|
||||
* Destroy timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_DESTROY(name) \
|
||||
cudaEventDestroy(name ## _start__); \
|
||||
cudaEventDestroy(name ## _stop__); \
|
||||
|
||||
/**
|
||||
* Default timer implementation
|
||||
*/
|
||||
#define GPUJPEG_TIMER_INIT() \
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(def) \
|
||||
GPUJPEG_CUSTOM_TIMER_CREATE(def)
|
||||
#define GPUJPEG_TIMER_START() GPUJPEG_CUSTOM_TIMER_START(def)
|
||||
#define GPUJPEG_TIMER_STOP() GPUJPEG_CUSTOM_TIMER_STOP(def)
|
||||
#define GPUJPEG_TIMER_DURATION() GPUJPEG_CUSTOM_TIMER_DURATION(def)
|
||||
#define GPUJPEG_TIMER_STOP_PRINT(text) GPUJPEG_CUSTOM_TIMER_STOP_PRINT(def, text)
|
||||
#define GPUJPEG_TIMER_DEINIT() GPUJPEG_CUSTOM_TIMER_DESTROY(def)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
110
gpujpeg/libgpujpeg/gpujpeg_common_internal.h
Normal file
110
gpujpeg/libgpujpeg/gpujpeg_common_internal.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/**
|
||||
* Copyright (c) 2011, CESNET z.s.p.o
|
||||
* Copyright (c) 2011, Silicon Genome, LLC.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GPUJPEG_COMMON_INTERNAL_H
|
||||
#define GPUJPEG_COMMON_INTERNAL_H
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
/**
|
||||
* Declare timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_DECLARE(name) \
|
||||
cudaEvent_t name ## _start__; \
|
||||
cudaEvent_t name ## _stop__; \
|
||||
float name ## _elapsedTime__; \
|
||||
|
||||
/**
|
||||
* Create timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_CREATE(name) \
|
||||
cudaEventCreate(&name ## _start__); \
|
||||
cudaEventCreate(&name ## _stop__); \
|
||||
|
||||
/**
|
||||
* Start timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_START(name) \
|
||||
cudaEventRecord(name ## _start__, 0) \
|
||||
|
||||
/**
|
||||
* Stop timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_STOP(name) \
|
||||
cudaEventRecord(name ## _stop__, 0); \
|
||||
cudaEventSynchronize(name ## _stop__); \
|
||||
cudaEventElapsedTime(&name ## _elapsedTime__, name ## _start__, name ## _stop__) \
|
||||
|
||||
/**
|
||||
* Get duration for timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_DURATION(name) name ## _elapsedTime__
|
||||
|
||||
/**
|
||||
* Stop timer and print result
|
||||
*
|
||||
* @param name
|
||||
* @param text
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_STOP_PRINT(name, text) \
|
||||
GPUJPEG_CUSTOM_TIMER_STOP(name); \
|
||||
printf("%s %f ms\n", text, name ## _elapsedTime__) \
|
||||
|
||||
/**
|
||||
* Destroy timer
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define GPUJPEG_CUSTOM_TIMER_DESTROY(name) \
|
||||
cudaEventDestroy(name ## _start__); \
|
||||
cudaEventDestroy(name ## _stop__); \
|
||||
|
||||
/**
|
||||
* Default timer implementation
|
||||
*/
|
||||
#define GPUJPEG_TIMER_INIT() \
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(def) \
|
||||
GPUJPEG_CUSTOM_TIMER_CREATE(def)
|
||||
#define GPUJPEG_TIMER_START() GPUJPEG_CUSTOM_TIMER_START(def)
|
||||
#define GPUJPEG_TIMER_STOP() GPUJPEG_CUSTOM_TIMER_STOP(def)
|
||||
#define GPUJPEG_TIMER_DURATION() GPUJPEG_CUSTOM_TIMER_DURATION(def)
|
||||
#define GPUJPEG_TIMER_STOP_PRINT(text) GPUJPEG_CUSTOM_TIMER_STOP_PRINT(def, text)
|
||||
#define GPUJPEG_TIMER_DEINIT() GPUJPEG_CUSTOM_TIMER_DESTROY(def)
|
||||
|
||||
#endif // GPUJPEG_COMMON_INTERNAL_H
|
||||
@@ -30,14 +30,26 @@
|
||||
#ifndef GPUJPEG_DECODER_H
|
||||
#define GPUJPEG_DECODER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <libgpujpeg/gpujpeg_common.h>
|
||||
#include <libgpujpeg/gpujpeg_table.h>
|
||||
#include <libgpujpeg/gpujpeg_reader.h>
|
||||
#include <libgpujpeg/gpujpeg_type.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined _MSC_VER || defined __MINGW32__
|
||||
#ifdef GPUJPEG_EXPORTS
|
||||
#define GPUJPEG_API __declspec(dllexport)
|
||||
#else
|
||||
#define GPUJPEG_API __declspec(dllimport)
|
||||
#endif
|
||||
#else // other platforms
|
||||
#define GPUJPEG_API
|
||||
#endif
|
||||
|
||||
struct gpujpeg_decoder;
|
||||
|
||||
/**
|
||||
* Decoder output type
|
||||
*/
|
||||
@@ -76,7 +88,7 @@ struct gpujpeg_decoder_output
|
||||
* @param output Decoder output structure
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output);
|
||||
|
||||
/**
|
||||
@@ -86,7 +98,7 @@ gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output);
|
||||
* @param custom_buffer Custom buffer
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t* custom_buffer);
|
||||
|
||||
/**
|
||||
@@ -95,7 +107,7 @@ gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t
|
||||
* @param output Decoder output structure
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct gpujpeg_opengl_texture* texture);
|
||||
|
||||
/**
|
||||
@@ -103,39 +115,9 @@ gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct
|
||||
*
|
||||
* @param output Decoder output structure
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output);
|
||||
|
||||
/**
|
||||
* JPEG decoder structure
|
||||
*/
|
||||
struct gpujpeg_decoder
|
||||
{
|
||||
// JPEG coder structure
|
||||
struct gpujpeg_coder coder;
|
||||
|
||||
// JPEG reader structure
|
||||
struct gpujpeg_reader* reader;
|
||||
|
||||
// Quantization tables
|
||||
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
|
||||
|
||||
// Huffman coder tables
|
||||
struct gpujpeg_table_huffman_decoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
|
||||
// Huffman coder tables in device memory
|
||||
struct gpujpeg_table_huffman_decoder* d_table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
|
||||
|
||||
// Current segment count for decoded image
|
||||
int segment_count;
|
||||
|
||||
// Current data compressed size for decoded image
|
||||
int data_compressed_size;
|
||||
|
||||
// Timers
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
|
||||
};
|
||||
|
||||
/**
|
||||
* Create JPEG decoder
|
||||
*
|
||||
@@ -143,7 +125,7 @@ struct gpujpeg_decoder
|
||||
* @param param_image Parameters for image data
|
||||
* @return decoder structure if succeeds, otherwise NULL
|
||||
*/
|
||||
struct gpujpeg_decoder*
|
||||
GPUJPEG_API struct gpujpeg_decoder*
|
||||
gpujpeg_decoder_create();
|
||||
|
||||
/**
|
||||
@@ -154,7 +136,7 @@ gpujpeg_decoder_create();
|
||||
* @param param_image Parameters for image data
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image);
|
||||
|
||||
/**
|
||||
@@ -167,7 +149,7 @@ gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters*
|
||||
* @param image_decompressed_size Pointer to variable where decompressed image size will be placed
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output);
|
||||
|
||||
/**
|
||||
@@ -176,9 +158,21 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int imag
|
||||
* @param decoder Decoder structure
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder);
|
||||
|
||||
/**
|
||||
* Sets output format
|
||||
*
|
||||
* @param decoder Decoder structure
|
||||
* @param color_space Requested output color space
|
||||
* @param sampling_factor Requestd color sampling factor
|
||||
*/
|
||||
GPUJPEG_API void
|
||||
gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder,
|
||||
enum gpujpeg_color_space color_space,
|
||||
enum gpujpeg_sampling_factor sampling_factor);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
69
gpujpeg/libgpujpeg/gpujpeg_decoder_internal.h
Normal file
69
gpujpeg/libgpujpeg/gpujpeg_decoder_internal.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
* Copyright (c) 2011, CESNET z.s.p.o
|
||||
* Copyright (c) 2011, Silicon Genome, LLC.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GPUJPEG_DECODER_INTERNAL_H
|
||||
#define GPUJPEG_DECODER_INTERNAL_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_common.h>
|
||||
#include <libgpujpeg/gpujpeg_common_internal.h>
|
||||
#include <libgpujpeg/gpujpeg_table.h>
|
||||
#include <libgpujpeg/gpujpeg_reader.h>
|
||||
|
||||
/**
|
||||
* JPEG decoder structure
|
||||
*/
|
||||
struct gpujpeg_decoder
|
||||
{
|
||||
// JPEG coder structure
|
||||
struct gpujpeg_coder coder;
|
||||
|
||||
// JPEG reader structure
|
||||
struct gpujpeg_reader* reader;
|
||||
|
||||
// Quantization tables
|
||||
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
|
||||
|
||||
// Huffman coder tables
|
||||
struct gpujpeg_table_huffman_decoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
|
||||
// Huffman coder tables in device memory
|
||||
struct gpujpeg_table_huffman_decoder* d_table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
|
||||
|
||||
// Current segment count for decoded image
|
||||
int segment_count;
|
||||
|
||||
// Current data compressed size for decoded image
|
||||
int data_compressed_size;
|
||||
|
||||
// Timers
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
|
||||
};
|
||||
|
||||
#endif // GPUJPEG_DECODER_INTERNAL_H
|
||||
|
||||
@@ -31,13 +31,24 @@
|
||||
#define GPUJPEG_ENCODER_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_common.h>
|
||||
#include <libgpujpeg/gpujpeg_table.h>
|
||||
#include <libgpujpeg/gpujpeg_writer.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined _MSC_VER || defined __MINGW32__
|
||||
#ifdef GPUJPEG_EXPORTS
|
||||
#define GPUJPEG_API __declspec(dllexport)
|
||||
#else
|
||||
#define GPUJPEG_API __declspec(dllimport)
|
||||
#endif
|
||||
#else // other platforms
|
||||
#define GPUJPEG_API
|
||||
#endif
|
||||
|
||||
struct gpujpeg_encoder;
|
||||
|
||||
/**
|
||||
* Encoder input type
|
||||
*/
|
||||
@@ -70,7 +81,7 @@ struct gpujpeg_encoder_input
|
||||
* @param image Input image data
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* image);
|
||||
|
||||
/**
|
||||
@@ -80,31 +91,9 @@ gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* im
|
||||
* @param texture_id OpenGL texture id
|
||||
* @return void
|
||||
*/
|
||||
void
|
||||
GPUJPEG_API void
|
||||
gpujpeg_encoder_input_set_texture(struct gpujpeg_encoder_input* input, struct gpujpeg_opengl_texture* texture);
|
||||
|
||||
/**
|
||||
* JPEG encoder structure
|
||||
*/
|
||||
struct gpujpeg_encoder
|
||||
{
|
||||
// JPEG coder structure
|
||||
struct gpujpeg_coder coder;
|
||||
|
||||
// JPEG writer structure
|
||||
struct gpujpeg_writer* writer;
|
||||
|
||||
// Quantization tables
|
||||
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
|
||||
|
||||
// Huffman coder tables
|
||||
struct gpujpeg_table_huffman_encoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
|
||||
|
||||
// Timers
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
|
||||
};
|
||||
|
||||
/**
|
||||
* Create JPEG encoder
|
||||
*
|
||||
@@ -112,7 +101,7 @@ struct gpujpeg_encoder
|
||||
* @param param_image Parameters for image data
|
||||
* @return encoder structure if succeeds, otherwise NULL
|
||||
*/
|
||||
struct gpujpeg_encoder*
|
||||
GPUJPEG_API struct gpujpeg_encoder*
|
||||
gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image);
|
||||
|
||||
/**
|
||||
@@ -124,7 +113,7 @@ gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_pa
|
||||
* @param image_compressed_size Pointer to variable where compressed image size will be placed
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_input* input, uint8_t** image_compressed, int* image_compressed_size);
|
||||
|
||||
/**
|
||||
@@ -133,7 +122,7 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_i
|
||||
* @param encoder Encoder structure
|
||||
* @return 0 if succeeds, otherwise nonzero
|
||||
*/
|
||||
int
|
||||
GPUJPEG_API int
|
||||
gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
65
gpujpeg/libgpujpeg/gpujpeg_encoder_internal.h
Normal file
65
gpujpeg/libgpujpeg/gpujpeg_encoder_internal.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Copyright (c) 2011, CESNET z.s.p.o
|
||||
* Copyright (c) 2011, Silicon Genome, LLC.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GPUJPEG_ENCODER_INTERNAL_H
|
||||
#define GPUJPEG_ENCODER_INTERNAL_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_common.h>
|
||||
#include <libgpujpeg/gpujpeg_common_internal.h>
|
||||
#include <libgpujpeg/gpujpeg_table.h>
|
||||
#include <libgpujpeg/gpujpeg_writer.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct gpujpeg_encoder
|
||||
{
|
||||
// JPEG coder structure
|
||||
struct gpujpeg_coder coder;
|
||||
|
||||
// JPEG writer structure
|
||||
struct gpujpeg_writer* writer;
|
||||
|
||||
// Quantization tables
|
||||
struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT];
|
||||
|
||||
// Huffman coder tables
|
||||
struct gpujpeg_table_huffman_encoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT];
|
||||
|
||||
// Timers
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(def)
|
||||
GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu)
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // GPUJPEG_ENCODER_INTERNAL_H
|
||||
@@ -31,7 +31,6 @@
|
||||
#define GPUJPEG_TYPE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -71,7 +70,7 @@ enum gpujpeg_color_space {
|
||||
*
|
||||
* @param color_space
|
||||
*/
|
||||
static inline __device__ __host__ const char*
|
||||
static inline const char*
|
||||
gpujpeg_color_space_get_name(enum gpujpeg_color_space color_space)
|
||||
{
|
||||
switch ( color_space ) {
|
||||
@@ -199,6 +198,9 @@ enum gpujpeg_marker_code {
|
||||
GPUJPEG_MARKER_ERROR = 0x100
|
||||
};
|
||||
|
||||
static const char*
|
||||
gpujpeg_marker_name(enum gpujpeg_marker_code code) __attribute__((unused));
|
||||
|
||||
/**
|
||||
* Get marker name from code
|
||||
*
|
||||
|
||||
@@ -34,9 +34,8 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <assert.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
#include "config.h"
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
|
||||
#include <algorithm>
|
||||
#include <ctype.h>
|
||||
#include <libgpujpeg/gpujpeg_common.h>
|
||||
#include <libgpujpeg/gpujpeg_util.h>
|
||||
#include "gpujpeg_preprocessor.h"
|
||||
@@ -39,8 +41,8 @@
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glx.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
#endif
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
// rounds number of segment bytes up to next multiple of 128
|
||||
#define SEGMENT_ALIGN(b) (((b) + 127) & ~127)
|
||||
@@ -142,10 +144,12 @@ gpujpeg_init_device(int device_id, int flags)
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef GPUJPEG_USE_OPENGL
|
||||
if ( flags & GPUJPEG_OPENGL_INTEROPERABILITY ) {
|
||||
cudaGLSetGLDevice(device_id);
|
||||
gpujpeg_cuda_check_error("Enabling OpenGL interoperability");
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( flags & GPUJPEG_VERBOSE ) {
|
||||
int cuda_driver_version = 0;
|
||||
@@ -234,18 +238,26 @@ gpujpeg_image_get_file_format(const char* filename)
|
||||
GPUJPEG_IMAGE_FILE_JPEG
|
||||
};
|
||||
|
||||
char * ext = strrchr(filename, '.');
|
||||
const char * ext = strrchr(filename, '.');
|
||||
if ( ext == NULL )
|
||||
return -1;
|
||||
return GPUJPEG_IMAGE_FILE_UNKNOWN;
|
||||
ext++;
|
||||
char ext_lc[3];
|
||||
strncpy(ext_lc, ext, 3);
|
||||
std::transform(ext_lc, ext_lc + sizeof(ext_lc), ext_lc, ::tolower);
|
||||
for ( int i = 0; i < sizeof(format) / sizeof(*format); i++ ) {
|
||||
if ( strncasecmp(ext, extension[i], 3) == 0 ) {
|
||||
if ( strncmp(ext_lc, extension[i], 3) == 0 ) {
|
||||
return format[i];
|
||||
}
|
||||
}
|
||||
return GPUJPEG_IMAGE_FILE_UNKNOWN;
|
||||
}
|
||||
|
||||
void gpujpeg_set_device(int index)
|
||||
{
|
||||
cudaSetDevice(index);
|
||||
}
|
||||
|
||||
/** Documented at declaration */
|
||||
void
|
||||
gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data)
|
||||
@@ -226,7 +226,7 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder)
|
||||
int index = y * width + x;
|
||||
gpujpeg_idct_cpu_perform(
|
||||
&component->data_quantized[index * 64],
|
||||
decoder->table_quantization[type].table
|
||||
(int16_t *) decoder->table_quantization[type].table
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -30,8 +30,8 @@
|
||||
#ifndef GPUJPEG_DCT_CPU_H
|
||||
#define GPUJPEG_DCT_CPU_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_encoder.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder.h>
|
||||
#include <libgpujpeg/gpujpeg_encoder_internal.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h>
|
||||
|
||||
/**
|
||||
* Peform inverse DCT on CPU
|
||||
|
||||
@@ -30,8 +30,8 @@
|
||||
#ifndef GPUJPEG_DCT_GPU_H
|
||||
#define GPUJPEG_DCT_GPU_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_encoder.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder.h>
|
||||
#include <libgpujpeg/gpujpeg_encoder_internal.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
*/
|
||||
|
||||
#include <libgpujpeg/gpujpeg_decoder.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h>
|
||||
#include "gpujpeg_preprocessor.h"
|
||||
#include "gpujpeg_dct_cpu.h"
|
||||
#include "gpujpeg_dct_gpu.h"
|
||||
@@ -77,7 +78,7 @@ gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output)
|
||||
struct gpujpeg_decoder*
|
||||
gpujpeg_decoder_create()
|
||||
{
|
||||
struct gpujpeg_decoder* decoder = malloc(sizeof(struct gpujpeg_decoder));
|
||||
struct gpujpeg_decoder* decoder = (struct gpujpeg_decoder*) malloc(sizeof(struct gpujpeg_decoder));
|
||||
if ( decoder == NULL )
|
||||
return NULL;
|
||||
|
||||
@@ -336,6 +337,15 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int imag
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder,
|
||||
enum gpujpeg_color_space color_space,
|
||||
enum gpujpeg_sampling_factor sampling_factor)
|
||||
{
|
||||
decoder->coder.param_image.color_space = color_space;
|
||||
decoder->coder.param_image.sampling_factor = sampling_factor;
|
||||
}
|
||||
|
||||
/** Documented at declaration */
|
||||
int
|
||||
gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder)
|
||||
@@ -63,7 +63,7 @@ gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_pa
|
||||
assert(param->restart_interval >= 0);
|
||||
assert(param->interleaved == 0 || param->interleaved == 1);
|
||||
|
||||
struct gpujpeg_encoder* encoder = malloc(sizeof(struct gpujpeg_encoder));
|
||||
struct gpujpeg_encoder* encoder = (struct gpujpeg_encoder*) malloc(sizeof(struct gpujpeg_encoder));
|
||||
if ( encoder == NULL )
|
||||
return NULL;
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#ifndef GPUJPEG_HUFFMAN_CPU_DECODER_H
|
||||
#define GPUJPEG_HUFFMAN_CPU_DECODER_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_decoder.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h>
|
||||
|
||||
/**
|
||||
* Perform huffman decoding
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#ifndef GPUJPEG_HUFFMAN_CPU_ENCODER_H
|
||||
#define GPUJPEG_HUFFMAN_CPU_ENCODER_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_encoder.h>
|
||||
#include <libgpujpeg/gpujpeg_encoder_internal.h>
|
||||
|
||||
/**
|
||||
* Perform huffman encoding
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#ifndef GPUJPEG_HUFFMAN_GPU_DECODER_H
|
||||
#define GPUJPEG_HUFFMAN_GPU_DECODER_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_decoder.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#ifndef GPUJPEG_HUFFMAN_GPU_ENCODER_H
|
||||
#define GPUJPEG_HUFFMAN_GPU_ENCODER_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_encoder.h>
|
||||
#include <libgpujpeg/gpujpeg_encoder_internal.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
@@ -30,8 +30,8 @@
|
||||
#ifndef GPUJPEG_PREPROCESSOR_H
|
||||
#define GPUJPEG_PREPROCESSOR_H
|
||||
|
||||
#include <libgpujpeg/gpujpeg_encoder.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder.h>
|
||||
#include <libgpujpeg/gpujpeg_encoder_internal.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
@@ -29,13 +29,15 @@
|
||||
|
||||
#include <libgpujpeg/gpujpeg_reader.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder.h>
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h>
|
||||
#include <libgpujpeg/gpujpeg_util.h>
|
||||
|
||||
/** Documented at declaration */
|
||||
struct gpujpeg_reader*
|
||||
gpujpeg_reader_create()
|
||||
{
|
||||
struct gpujpeg_reader* reader = malloc(sizeof(struct gpujpeg_reader));
|
||||
struct gpujpeg_reader* reader = (struct gpujpeg_reader*)
|
||||
malloc(sizeof(struct gpujpeg_reader));
|
||||
if ( reader == NULL )
|
||||
return NULL;
|
||||
reader->comp_count = 0;
|
||||
@@ -122,7 +124,7 @@ gpujpeg_reader_read_app0(uint8_t** image)
|
||||
return -1;
|
||||
}
|
||||
|
||||
char jfif[4];
|
||||
char jfif[5];
|
||||
jfif[0] = gpujpeg_reader_read_byte(*image);
|
||||
jfif[1] = gpujpeg_reader_read_byte(*image);
|
||||
jfif[2] = gpujpeg_reader_read_byte(*image);
|
||||
@@ -29,20 +29,21 @@
|
||||
|
||||
#include <libgpujpeg/gpujpeg_writer.h>
|
||||
#include <libgpujpeg/gpujpeg_encoder.h>
|
||||
#include <libgpujpeg/gpujpeg_encoder_internal.h>
|
||||
#include <libgpujpeg/gpujpeg_util.h>
|
||||
|
||||
/** Documented at declaration */
|
||||
struct gpujpeg_writer*
|
||||
gpujpeg_writer_create(struct gpujpeg_encoder* encoder)
|
||||
{
|
||||
struct gpujpeg_writer* writer = malloc(sizeof(struct gpujpeg_writer));
|
||||
struct gpujpeg_writer* writer = (struct gpujpeg_writer*) malloc(sizeof(struct gpujpeg_writer));
|
||||
if ( writer == NULL )
|
||||
return NULL;
|
||||
|
||||
// Allocate output buffer
|
||||
int buffer_size = 1000;
|
||||
buffer_size += encoder->coder.param_image.width * encoder->coder.param_image.height * encoder->coder.param_image.comp_count * 2;
|
||||
writer->buffer = malloc(buffer_size * sizeof(uint8_t));
|
||||
writer->buffer = (uint8_t *) malloc(buffer_size * sizeof(uint8_t));
|
||||
if ( writer->buffer == NULL )
|
||||
return NULL;
|
||||
writer->buffer_current = NULL;
|
||||
@@ -27,6 +27,9 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <libgpujpeg/gpujpeg_common_internal.h> // TIMER
|
||||
#include <libgpujpeg/gpujpeg_encoder_internal.h> // TIMER
|
||||
#include <libgpujpeg/gpujpeg_decoder_internal.h> // TIMER
|
||||
#include <libgpujpeg/gpujpeg.h>
|
||||
#include <libgpujpeg/gpujpeg_util.h>
|
||||
#include <getopt.h>
|
||||
|
||||
@@ -114,6 +114,10 @@ static ssize_t write_all(fd_t fd, const void *buf, size_t count);
|
||||
static void * control_thread(void *args);
|
||||
static void send_response(fd_t fd, struct response *resp);
|
||||
|
||||
#ifndef HAVE_LINUX
|
||||
#define MSG_NOSIGNAL 0
|
||||
#endif
|
||||
|
||||
static ssize_t write_all(fd_t fd, const void *buf, size_t count)
|
||||
{
|
||||
char *p = (char *) buf;
|
||||
|
||||
119
src/cuda_wrapper.cu
Normal file
119
src/cuda_wrapper.cu
Normal file
@@ -0,0 +1,119 @@
|
||||
/**
|
||||
* @file cuda_wrapper.h
|
||||
* @author Martin Pulec <pulec@cesnet.cz>
|
||||
*
|
||||
* @brief This file contais wrapper around CUDA functions.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013 CESNET z.s.p.o.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, is permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of CESNET nor the names of its contributors may be
|
||||
* used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cuda_wrapper.h"
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef void *cuda_wrapper_stream_t;
|
||||
|
||||
static inline int map_cuda_error(cudaError_t cuda_error) {
|
||||
struct error_mapping {
|
||||
cudaError_t cuda_error;
|
||||
int wrapper_error;
|
||||
};
|
||||
struct error_mapping mapping[] = {
|
||||
{ cudaSuccess, CUDA_WRAPPER_SUCCESS },
|
||||
};
|
||||
|
||||
int i;
|
||||
for (i = 0; i < sizeof(mapping)/sizeof(struct error_mapping); ++i) {
|
||||
if (cuda_error == mapping[i].cuda_error) {
|
||||
return mapping[i].wrapper_error;
|
||||
}
|
||||
}
|
||||
|
||||
return CUDA_UNKNOWN_ERROR;
|
||||
};
|
||||
|
||||
static inline enum cudaMemcpyKind map_cuda_memcpy_kind(int our_kind) {
|
||||
struct kind_mapping {
|
||||
enum cudaMemcpyKind kind;
|
||||
int our_kind;
|
||||
};
|
||||
struct kind_mapping mapping[] = {
|
||||
{ cudaMemcpyHostToDevice, CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE },
|
||||
{ cudaMemcpyDeviceToHost, CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST },
|
||||
};
|
||||
|
||||
int i;
|
||||
for (i = 0; i < sizeof(mapping)/sizeof(struct kind_mapping); ++i) {
|
||||
if (our_kind == mapping[i].our_kind) {
|
||||
return mapping[i].kind;
|
||||
}
|
||||
}
|
||||
|
||||
abort(); // should not reach here
|
||||
};
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_free(void *buffer)
|
||||
{
|
||||
return map_cuda_error(cudaFree(buffer));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len)
|
||||
{
|
||||
return map_cuda_error(cudaMalloc(buffer, data_len));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len)
|
||||
{
|
||||
return map_cuda_error(cudaMallocHost(buffer, data_len));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
|
||||
size_t count, int kind)
|
||||
{
|
||||
return map_cuda_error(
|
||||
cudaMemcpy(dst, src, count,
|
||||
map_cuda_memcpy_kind(kind)));
|
||||
}
|
||||
|
||||
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void)
|
||||
{
|
||||
return cudaGetErrorString(cudaGetLastError());
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_set_device(int index)
|
||||
{
|
||||
return map_cuda_error(
|
||||
cudaSetDevice(index));
|
||||
}
|
||||
|
||||
82
src/cuda_wrapper.h
Normal file
82
src/cuda_wrapper.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* @file cuda_wrapper.h
|
||||
* @author Martin Pulec <pulec@cesnet.cz>
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013 CESNET z.s.p.o.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, is permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of CESNET nor the names of its contributors may be
|
||||
* used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CUDA_WRAPPER_H_
|
||||
#define CUDA_WRAPPER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
#if defined _MSC_VER || defined __MINGW32__
|
||||
#ifdef EXPORT_DLL_SYMBOLS
|
||||
#define CUDA_DLL_API __declspec(dllexport)
|
||||
#else
|
||||
#define CUDA_DLL_API __declspec(dllimport)
|
||||
#endif
|
||||
#else // other platforms
|
||||
#define CUDA_DLL_API
|
||||
#endif
|
||||
|
||||
/// @{
|
||||
#define CUDA_WRAPPER_SUCCESS 0
|
||||
#define CUDA_UNKNOWN_ERROR 1 ///< error for which there is no mapping in wrapper
|
||||
/// @}
|
||||
|
||||
/// @{
|
||||
#define CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE 0
|
||||
#define CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST 1
|
||||
/// @}
|
||||
|
||||
typedef void *cuda_wrapper_stream_t;
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_free(void *buffer);
|
||||
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len);
|
||||
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len);
|
||||
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
|
||||
size_t count, int kind);
|
||||
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void);
|
||||
CUDA_DLL_API int cuda_wrapper_set_device(int index);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // CUDA_WRAPPER_H_
|
||||
|
||||
@@ -1127,7 +1127,7 @@ int main(int argc, char *argv[])
|
||||
echo_cancellation = true;
|
||||
break;
|
||||
case OPT_CUDA_DEVICE:
|
||||
#ifdef HAVE_CUDA
|
||||
#ifdef HAVE_JPEG
|
||||
if(strcmp("help", optarg) == 0) {
|
||||
struct compress_state *compression;
|
||||
int ret = compress_init(&root_mod, "JPEG:list_devices", &compression);
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "video_compress/cuda_dxt.h"
|
||||
|
||||
#include "cuda_dxt/cuda_dxt.h"
|
||||
#include "cuda_wrapper.h"
|
||||
|
||||
#include "host.h"
|
||||
#include "module.h"
|
||||
@@ -57,12 +58,14 @@ struct state_video_compress_cuda_dxt {
|
||||
in_buffer = NULL;
|
||||
cuda_in_buffer = NULL;
|
||||
cuda_uyvy_buffer = NULL;
|
||||
cuda_out_buffer = NULL;
|
||||
}
|
||||
struct module module_data;
|
||||
struct video_desc saved_desc;
|
||||
char *in_buffer; ///< for decoded data
|
||||
char *cuda_uyvy_buffer; ///< same as in_buffer but in device memory
|
||||
char *cuda_in_buffer; ///< same as in_buffer but in device memory
|
||||
char *cuda_out_buffer; ///< same as in_buffer but in device memory
|
||||
struct video_frame *out[2];
|
||||
codec_t in_codec;
|
||||
codec_t out_codec;
|
||||
@@ -107,16 +110,20 @@ static void cleanup(struct state_video_compress_cuda_dxt *s)
|
||||
s->in_buffer = NULL;
|
||||
}
|
||||
if (s->cuda_uyvy_buffer) {
|
||||
cudaFree(s->cuda_uyvy_buffer);
|
||||
cuda_wrapper_free(s->cuda_uyvy_buffer);
|
||||
s->cuda_uyvy_buffer = NULL;
|
||||
}
|
||||
if (s->cuda_in_buffer) {
|
||||
cudaFree(s->cuda_in_buffer);
|
||||
cuda_wrapper_free(s->cuda_in_buffer);
|
||||
s->cuda_in_buffer = NULL;
|
||||
}
|
||||
if (s->cuda_out_buffer) {
|
||||
cuda_wrapper_free(s->cuda_out_buffer);
|
||||
s->cuda_out_buffer = NULL;
|
||||
}
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
if (s->out[i] != NULL) {
|
||||
cudaFree(s->out[i]->tiles[0].data);
|
||||
cuda_wrapper_free(s->out[i]->tiles[0].data);
|
||||
s->out[i]->tiles[0].data = NULL;
|
||||
}
|
||||
}
|
||||
@@ -138,7 +145,7 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video
|
||||
}
|
||||
|
||||
if (s->in_codec == UYVY) {
|
||||
if (cudaSuccess != cudaMalloc((void **) &s->cuda_uyvy_buffer,
|
||||
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **) &s->cuda_uyvy_buffer,
|
||||
desc.width * desc.height * 2)) {
|
||||
fprintf(stderr, "Could not allocate CUDA UYVY buffer.\n");
|
||||
return false;
|
||||
@@ -147,7 +154,7 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video
|
||||
|
||||
s->in_buffer = (char *) malloc(desc.width * desc.height * 3);
|
||||
|
||||
if (cudaSuccess != cudaMalloc((void **) &s->cuda_in_buffer,
|
||||
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **) &s->cuda_in_buffer,
|
||||
desc.width * desc.height * 3)) {
|
||||
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
|
||||
return false;
|
||||
@@ -160,12 +167,18 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video
|
||||
|
||||
s->out[i] = vf_alloc_desc(compressed_desc);
|
||||
s->out[i]->tiles[0].data_len = desc.width * desc.height / (s->out_codec == DXT1 ? 2 : 1);
|
||||
if (cudaSuccess != cudaMallocHost((void **) &s->out[i]->tiles[0].data,
|
||||
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc_host((void **) &s->out[i]->tiles[0].data,
|
||||
s->out[i]->tiles[0].data_len)) {
|
||||
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
|
||||
fprintf(stderr, "Could not allocate CUDA output host buffer.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **)
|
||||
&s->cuda_out_buffer,
|
||||
s->out[0]->tiles[0].data_len)) {
|
||||
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -176,7 +189,7 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram
|
||||
struct state_video_compress_cuda_dxt *s =
|
||||
(struct state_video_compress_cuda_dxt *) mod->priv_data;
|
||||
|
||||
cudaSetDevice(cuda_devices[0]);
|
||||
cuda_wrapper_set_device(cuda_devices[0]);
|
||||
|
||||
if (!video_desc_eq_excl_param(video_desc_from_frame(tx),
|
||||
s->saved_desc, PARAM_TILE_COUNT)) {
|
||||
@@ -205,30 +218,28 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram
|
||||
}
|
||||
|
||||
if (s->in_codec == UYVY) {
|
||||
if (cudaMemcpy(s->cuda_uyvy_buffer, in_buffer, tx->tiles[tile_idx].width *
|
||||
if (cuda_wrapper_memcpy(s->cuda_uyvy_buffer, in_buffer, tx->tiles[tile_idx].width *
|
||||
tx->tiles[tile_idx].height * 2,
|
||||
cudaMemcpyHostToDevice) != cudaSuccess) {
|
||||
fprintf(stderr, "Memcpy failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE) != CUDA_WRAPPER_SUCCESS) {
|
||||
fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string());
|
||||
return NULL;
|
||||
}
|
||||
if (cuda_yuv422_to_yuv444(s->cuda_uyvy_buffer, s->cuda_in_buffer,
|
||||
tx->tiles[tile_idx].width *
|
||||
tx->tiles[tile_idx].height, 0) != 0) {
|
||||
fprintf(stderr, "UYVY kernel failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
tx->tiles[tile_idx].height, 0) != CUDA_WRAPPER_SUCCESS) {
|
||||
fprintf(stderr, "Kernel failed: %s\n", cuda_wrapper_last_error_string());
|
||||
}
|
||||
} else {
|
||||
if (cudaMemcpy(s->cuda_in_buffer, in_buffer, tx->tiles[tile_idx].width *
|
||||
if (cuda_wrapper_memcpy(s->cuda_in_buffer, in_buffer, tx->tiles[tile_idx].width *
|
||||
tx->tiles[tile_idx].height * 3,
|
||||
cudaMemcpyHostToDevice) != cudaSuccess) {
|
||||
fprintf(stderr, "Memcpy failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE) != CUDA_WRAPPER_SUCCESS) {
|
||||
fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string());
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int (*cuda_dxt_enc_func)(const void * src, void * out, int size_x, int size_y, cudaStream_t stream);
|
||||
int (*cuda_dxt_enc_func)(const void * src, void * out, int size_x, int size_y,
|
||||
cuda_wrapper_stream_t stream);
|
||||
|
||||
if (s->out_codec == DXT1) {
|
||||
if (s->in_codec == RGB) {
|
||||
@@ -243,11 +254,18 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram
|
||||
cuda_dxt_enc_func = cuda_yuv_to_dxt6;
|
||||
}
|
||||
}
|
||||
int ret = cuda_dxt_enc_func(s->cuda_in_buffer, s->out[buffer]->tiles[0].data,
|
||||
int ret = cuda_dxt_enc_func(s->cuda_in_buffer, s->cuda_out_buffer,
|
||||
s->saved_desc.width, s->saved_desc.height, 0);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Encoding failed: %s\n",
|
||||
cudaGetErrorString(cudaGetLastError()));
|
||||
fprintf(stderr, "Encoding failed: %s\n", cuda_wrapper_last_error_string());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (cuda_wrapper_memcpy(s->out[buffer]->tiles[0].data,
|
||||
s->cuda_out_buffer,
|
||||
s->out[buffer]->tiles[0].data_len,
|
||||
CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST) != CUDA_WRAPPER_SUCCESS) {
|
||||
fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -58,7 +58,6 @@
|
||||
#include "module.h"
|
||||
#include "video_compress/jpeg.h"
|
||||
#include "libgpujpeg/gpujpeg_encoder.h"
|
||||
#include "libgpujpeg/gpujpeg_common.h"
|
||||
#include "video.h"
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
@@ -346,7 +345,7 @@ struct video_frame * jpeg_compress(struct module *mod, struct video_frame * tx,
|
||||
|
||||
unsigned int x;
|
||||
|
||||
cudaSetDevice(cuda_devices[0]);
|
||||
gpujpeg_set_device(cuda_devices[0]);
|
||||
|
||||
if(!s->encoder) {
|
||||
int ret;
|
||||
|
||||
@@ -57,7 +57,6 @@
|
||||
|
||||
#include "libgpujpeg/gpujpeg_decoder.h"
|
||||
//#include "compat/platform_semaphore.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include "video_decompress/jpeg.h"
|
||||
@@ -82,11 +81,11 @@ static int configure_with(struct state_decompress_jpeg *s, struct video_desc des
|
||||
return FALSE;
|
||||
}
|
||||
if(s->out_codec == RGB) {
|
||||
s->decoder->coder.param_image.color_space = GPUJPEG_RGB;
|
||||
s->decoder->coder.param_image.sampling_factor = GPUJPEG_4_4_4;
|
||||
gpujpeg_decoder_set_output_format(s->decoder, GPUJPEG_RGB,
|
||||
GPUJPEG_4_4_4);
|
||||
} else {
|
||||
s->decoder->coder.param_image.color_space = GPUJPEG_YCBCR_BT709;
|
||||
s->decoder->coder.param_image.sampling_factor = GPUJPEG_4_2_2;
|
||||
gpujpeg_decoder_set_output_format(s->decoder, GPUJPEG_YCBCR_BT709,
|
||||
GPUJPEG_4_2_2);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
@@ -118,7 +117,6 @@ int jpeg_decompress_reconfigure(void *state, struct video_desc desc,
|
||||
int rshift, int gshift, int bshift, int pitch, codec_t out_codec)
|
||||
{
|
||||
struct state_decompress_jpeg *s = (struct state_decompress_jpeg *) state;
|
||||
int ret;
|
||||
|
||||
assert(out_codec == RGB || out_codec == UYVY);
|
||||
|
||||
@@ -157,7 +155,7 @@ int jpeg_decompress(void *state, unsigned char *dst, unsigned char *buffer,
|
||||
linesize = s->desc.width * 2;
|
||||
}
|
||||
|
||||
cudaSetDevice(cuda_devices[0]);
|
||||
gpujpeg_set_device(cuda_devices[0]);
|
||||
|
||||
if((s->out_codec != RGB || (s->rshift == 0 && s->gshift == 8 && s->bshift == 16)) &&
|
||||
s->pitch == linesize) {
|
||||
|
||||
@@ -158,9 +158,10 @@ static void *worker_thread(void *arg)
|
||||
|
||||
msg_frame *output_frame = new msg_frame(s->desc.width * s->desc.height / s->ppb);
|
||||
|
||||
if (cudaSuccess != cudaMemcpy((char*) output_frame->data, s->dxt_out_buff,
|
||||
if (cuda_wrapper_memcpy((char*) output_frame->data, s->dxt_out_buff,
|
||||
output_frame->data_len,
|
||||
cudaMemcpyDeviceToHost)) {
|
||||
CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST) !=
|
||||
CUDA_WRAPPER_SUCCESS) {
|
||||
fprintf(stderr, "[jpeg_to_dxt] unable to copy from device.");
|
||||
}
|
||||
s->m_out.push(output_frame);
|
||||
@@ -173,7 +174,7 @@ static void *worker_thread(void *arg)
|
||||
gpujpeg_decoder_destroy(s->jpeg_decoder);
|
||||
}
|
||||
|
||||
cudaFree(s->dxt_out_buff);
|
||||
cuda_wrapper_free(s->dxt_out_buff);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -272,11 +273,12 @@ static int reconfigure_thread(struct thread_data *s, struct video_desc desc, int
|
||||
}
|
||||
|
||||
if(s->dxt_out_buff != NULL) {
|
||||
cudaFree(s->dxt_out_buff);
|
||||
cuda_wrapper_free(s->dxt_out_buff);
|
||||
s->dxt_out_buff = NULL;
|
||||
}
|
||||
|
||||
if(cudaSuccess != cudaMallocHost((void **) &s->dxt_out_buff, desc.width * desc.height / ppb)) {
|
||||
if(cuda_wrapper_malloc_host((void **) &s->dxt_out_buff, desc.width * desc.height / ppb)
|
||||
!= CUDA_WRAPPER_SUCCESS) {
|
||||
fprintf(stderr, "Could not allocate CUDA output buffer.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user