diff --git a/Makefile.in b/Makefile.in index 2cb420c5b..d6a5381ae 100644 --- a/Makefile.in +++ b/Makefile.in @@ -153,7 +153,8 @@ modules: @LIB_TARGETS@ $(TARGET): $(OBJS) $(ULTRAGRID_OBJS) $(HEADERS) @if [ ! -d bin ]; then mkdir bin; fi - $(LINKER) -rdynamic $(LDFLAGS) $(OBJS) $(ULTRAGRID_OBJS) $(LIBS) -o $(TARGET) + $(LINKER) $(LDFLAGS) $(OBJS) $(ULTRAGRID_OBJS) $(LIBS) -o $(TARGET) + if [ -n "@DLL_LIBS@" ]; then $(INSTALL) -m 644 @DLL_LIBS@ bin; fi bin/import_control_keyboard: src/import_control_keyboard.o $(LINKER) $(LDFLAGS) $< @IMPORT_CONTROL_KEYBOARD_LIBS@ -o $@ @@ -173,7 +174,9 @@ $(REFLECTOR_TARGET): $(OBJS) $(HEADERS) $(REFLECTOR_OBJS) # Pattern rule for compiling CUDA files %.cu.o: %.cu - $(NVCC) $(NVCCFLAGS) -Xcompiler -fPIC -c $< -o $@ + $(NVCC) $(NVCCFLAGS) $(INC) -c $< -o $@ +%.cu.lib: %.cu + $(NVCC) $(NVCCFLAGS) -DEXPORT_DLL_SYMBOLS $(INC) --shared $< -o $<.dll src/audio/resample.o: $(CC) $(CFLAGS) $(INC) -DEXPORT="" -DRANDOM_PREFIX=speex -DFLOATING_POINT -DOUTSIDE_SPEEX -I. -I speex-1.2rc1/include/speex -Iinclude -fvisibility=hidden -c speex-1.2rc1/libspeex/resample.c -fPIC -DPIC -o $@ @@ -415,7 +418,7 @@ clean: -rm -rf $(BUNDLE) -rm -rf $(PERF) src/uv_perf.o -rm -rf $(REFLECTOR_TARGET) $(REFLECTOR_OBJS) - -rm -rf @LIB_OBJS@ @LIB_TARGETS@ @LIB_HEADERS@ @X_OBJ@ @GL_COMMON_OBJ@ + -rm -rf @LIB_OBJS@ @LIB_TARGETS@ @LIB_HEADERS@ @X_OBJ@ @GL_COMMON_OBJ@ @CUDA_COMMON_OBJ@ -rm -rf bin/import_control_keyboard [ ! -f gpujpeg/Makefile ] || make -C gpujpeg/ clean [ -z "@FASTDXT_PATH@" ] || make -C @FASTDXT_PATH@/ clean @@ -581,9 +584,9 @@ libavcodec: @LIBAVCODEC_DECOMPRESS_LIB_TARGET@ @LIBAVCODEC_COMPRESS_LIB_TARGET@ mkdir -p lib/ultragrid $(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_jpeg.so.@video_compress_abi_version@ $^ @JPEG_LIB@ -o $@ -@CUDA_DXT_COMPRESS_LIB_TARGET@: @CUDA_DXT_OBJ@ +@CUDA_DXT_COMPRESS_LIB_TARGET@: @CUDA_DXT_OBJ@ @CUDA_COMMON_OBJ@ mkdir -p lib/ultragrid - $(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_cuda_dxt.so.@video_compress_abi_version@ $^ @CUDA_DXT_LIB@ -o $@ + $(LINKER) $(LDFLAGS) -shared -Wl,-soname,vcompress_cuda_dxt.so.@video_compress_abi_version@ $^ @CUDA_DXT_LIB@ @CUDA_COMMON_OBJ@ -o $@ @RTDXT_DECOMPRESS_LIB_TARGET@: @GL_COMMON_OBJ@ @X_OBJ@ @RTDXT_COMMON_OBJ@ @RTDXT_DECOMPRESS_OBJ@ @RTDXT_COMMON_HEADERS@ mkdir -p lib/ultragrid @@ -593,9 +596,9 @@ libavcodec: @LIBAVCODEC_DECOMPRESS_LIB_TARGET@ @LIBAVCODEC_COMPRESS_LIB_TARGET@ mkdir -p lib/ultragrid $(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg.so.@video_decompress_abi_version@ $^ @JPEG_LIB@ -o $@ -@JPEG_TO_DXT_DECOMPRESS_LIB_TARGET@: @JPEG_TO_DXT_OBJ@ @JPEG_DECOMPRESS_OBJ@ @JPEG_COMMON_OBJ@ +@JPEG_TO_DXT_DECOMPRESS_LIB_TARGET@: @JPEG_TO_DXT_OBJ@ @JPEG_DECOMPRESS_OBJ@ @JPEG_COMMON_OBJ@ @CUDA_COMMON_OBJ@ mkdir -p lib/ultragrid - $(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg_to_dxt.so.@video_decompress_abi_version@ $^ @JPEG_TO_DXT_LIB@ -o $@ + $(LINKER) $(LDFLAGS) -shared -Wl,-soname,vdecompress_jpeg_to_dxt.so.@video_decompress_abi_version@ $^ @JPEG_TO_DXT_LIB@ @CUDA_COMMON_OBJ@ -o $@ @ALSA_PLAY_LIB_TARGET@: @ALSA_PLAY_OBJ@ @@ -647,6 +650,7 @@ install: all $(INSTALL) -m 755 data/ultragrid-bugreport-collect.sh $(DESTDIR)/$(uv_datadir) $(INSTALL) -d -m 755 $(DESTDIR)/$(docdir) $(INSTALL) -m 644 $(DOCS) $(DESTDIR)/$(docdir) + $(INSTALL) -m 644 @DLL_LIBS@ $(DESTDIR)/$(bindir) uninstall: $(RM) $(DESTDIR)/$(bindir)/uv diff --git a/autogen.sh b/autogen.sh index 6e4599419..8d7e2beb6 100755 --- a/autogen.sh +++ b/autogen.sh @@ -3,17 +3,6 @@ set -e [ -d m4 ] || mkdir m4 -# variables -if [ `uname -s` = "Darwin" ]; then - LIBTOOLIZE=glibtoolize -else if [ `uname -s` = "Linux" ]; then - LIBTOOLIZE=libtoolize -else # Windows - LIBTOOLIZE=true -fi -fi - - srcdir=`dirname $0` test -z "$srcdir" && srcdir=. @@ -22,7 +11,6 @@ ORIGDIR=`pwd` cd $srcdir aclocal autoheader -$LIBTOOLIZE --copy autoconf $srcdir/configure --enable-gpl $@ diff --git a/configure.ac b/configure.ac index ed05af17a..6a7a85718 100644 --- a/configure.ac +++ b/configure.ac @@ -5,7 +5,6 @@ AM_INIT_AUTOMAKE([1.10]) AC_PREREQ(2.61) AC_CONFIG_SRCDIR([src/main.c]) AC_CONFIG_MACRO_DIR([m4]) -LT_INIT PARENT=`echo $PWD | sed -e 's%/[[^/]]*$%%'` @@ -161,7 +160,12 @@ then CFLAGS="$CFLAGS -fPIC" CXXFLAGS="$CXXFLAGS -fPIC" NVCCFLAGS="$NVCCFLAGS -Xcompiler -fPIC" + CU_SUFFIX="cu.o" +else + NVCCFLAGS="-m32" + CU_SUFFIX="cu.lib" fi +AC_SUBST(NVCCFLAGS) AH_BOTTOM([ /* @@ -237,19 +241,30 @@ AC_DEFUN([DEFINE_GL], [ AC_SUBST(GL_COMMON_OBJ) ]) -AC_DEFUN([DEFINE_CUDA], [ - if test -z "$cuda_var_defined"; then - AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system]) - LINKER=$CXX - POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in JPEG support.\nIn order to use use JPEG compression and \ +AC_DEFUN([CUDA_MESSAGE], [ + if test -z "$cuda_msg_defined"; then + POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in sort of CUDA code.\nIn order to use use it compression and \ decompression, you will need to have CUDA libraries visible to your OS.\n\ If not done so, you can accomplish this by adding line:\n\ export LD_LIBRARY_PATH=$CUDA_LIB_PATH:\\\$\$LD_LIBRARY_PATH\n\ to your .bashrc file (in home directory). To take effect immediatelly, you will need to enter:\n\ exec bash\n***\n" + cuda_msg_defined=yes + fi + ]) + +AC_DEFUN([DEFINE_CUDA], [ + if test -z "$cuda_var_defined"; then + AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system]) + CUDA_COMMON_OBJ=src/cuda_wrapper.$CU_SUFFIX + if test $system = Windows; then + DLL_LIBS="$DLL_LIBS src/cuda_wrapper.cu.dll" + fi + AC_SUBST(CUDA_COMMON_OBJ) cuda_var_defined=yes fi ]) +AC_SUBST(DLL_LIBS) AC_MSG_CHECKING([if_nametoindex]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ @@ -402,7 +417,8 @@ if test "$build_libraries" = yes then if test "$system" = "Linux" then - LIBS+=' -ldl' + LIBS="$LIBS -ldl" + LDFLAGS+="-rdynamic -ldl" else AC_MSG_WARN([*** Building libraries is not supported with other system than Linux]) build_libraries=no @@ -438,7 +454,7 @@ AC_SUBST(vo_pp_abi_version) # ------------------------------------------------------------------------------------------------ # environment variables # -AC_ARG_VAR([CUDA_DIRECTORY], [Directory of your Nvidia toolkit instalation.]) +AC_ARG_VAR([CUDA_PATH], [Directory of your Nvidia toolkit instalation.]) AC_ARG_VAR([SAGE_DIRECTORY], [Directory of your SAGE installation.]) AC_ARG_VAR([DVS_DIRECTORY], [Path to DVS installation.]) AC_ARG_VAR([DELTACAST_DIRECTORY], [Placement of VideoMasterHD directory (Deltacast).]) @@ -690,7 +706,6 @@ then DECKLINK_COMMON="src/video_capture/DeckLinkAPIDispatch.o" fi AC_DEFINE([HAVE_DECKLINK], [1], [Build with DeckLink support]) - LINKER=$CXX AC_SUBST(DECKLINK_CAP_LIB_TARGET, "lib/ultragrid/vidcap_decklink.so.$video_capture_abi_version") AC_SUBST(DECKLINK_DISP_LIB_TARGET, "lib/ultragrid/display_decklink.so.$video_display_abi_version") AC_SUBST(DECKLINK_SOUND_PLAY_LIB_TARGET, "lib/ultragrid/aplay_decklink.so.$audio_playback_abi_version") @@ -1655,7 +1670,6 @@ then FASTDXT_LIB="" FASTDXT_OBJ="$FASTDXT_OBJ src/video_compress/fastdxt.o ${FASTDXT_PATH}/libdxt.a" AC_DEFINE([HAVE_FASTDXT], [1], [Build with support for FastDXT]) - LINKER=$CXX AC_SUBST(FASTDXT_LIB_TARGET, "lib/ultragrid/vcompress_fastdxt.so.$video_compress_abi_version") LIB_TARGETS="$LIB_TARGETS $FASTDXT_LIB_TARGET" LIB_OBJS="$LIB_OBJS $FASTDXT_OBJ" @@ -1782,10 +1796,8 @@ AC_SUBST(UYVY_LIB) CUDA_INC= CUDA_LIB= NVCC= -NVCCFLAGS= CUDA_LIB_PATH= FOUND_CUDA=no -HAVE_CUDA=no CUDA_PATH=$CUDA_DIRECTORY AC_ARG_WITH(cuda, @@ -1797,18 +1809,12 @@ SAVED_LIBS=$LIBS LIBS="$LIBS -lcudart" if test -z "$CUDA_PATH" then - AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include - ]], - [[int devices_count; - cudaGetDeviceCount(&devices_count); - ]])],FOUND_CUDA=yes,FOUND_CUDA=no) AC_CHECK_PROG([NVCC], [nvcc], [nvcc], []) fi -if test $FOUND_CUDA = no -o -z "$NVCC" +if test -z "$NVCC" then - FOUND_CUDA=no NVCC= if test -z "$CUDA_PATH"; then CUDA_PATH=/usr/local/cuda @@ -1821,11 +1827,6 @@ then else LIBS="$LIBS -L$CUDA_PATH/lib" fi - AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include - ]], - [[int devices_count; - cudaGetDeviceCount(&devices_count); - ]])],FOUND_CUDA=yes,FOUND_CUDA=no) CFLAGS=$SAVED_CFLAGS AC_CHECK_PROG([NVCC], [nvcc], [$CUDA_PATH/bin/nvcc], [], @@ -1834,9 +1835,17 @@ fi LIBS=$SAVED_LIBS -if test -n "$NVCC" -a $FOUND_CUDA = yes +if test -n "$NVCC" then - HAVE_CUDA=yes + if test $system = Windows; then + AC_CHECK_PROG([CL], [cl], [cl], + []) + if test -n $CL; then + FOUND_CUDA=yes + fi + else + FOUND_CUDA=yes + fi AC_MSG_CHECKING([CUDA Toolkit version - major]) nvcc_major=`$NVCC --version |grep release|sed 's/^.*release \(@<:@0-9@:>@@<:@0-9@:>@*\).*$/\1/'` AC_MSG_RESULT($nvcc_major) @@ -1859,10 +1868,8 @@ then CUDA_LIB="-L$CUDA_LIB_PATH -lcudart" fi -AC_SUBST(HAVE_CUDA) AC_SUBST(NVCC) AC_SUBST(FOUND_CUDA) -AC_SUBST(NVCCFLAGS) AC_SUBST(CUDA_PATH) AC_SUBST(CUDA_LIB) AC_SUBST(CUDA_INC) @@ -1893,46 +1900,23 @@ CFLAGS=$SAVED_CFLAGS CXXFLAGS=$SAVED_CXXFLAGS CPPFLAGS=$SAVED_CPPFLAGS -if test $HAVE_CUDA = yes +if test $system = Windows then - if test \( $nvcc_major -gt 4 \) -o \( $nvcc_major -eq 4 -a $nvcc_minor -ge 1 \) -o $version_check = no - then - jpeg_cuda_version_ok=yes - else - AC_MSG_WARN([*** CUDA Toolkit older than 4.1 detected. Please install at least v4.1 to use JPEG.]) - HM_VERSION_WARNING - fi - - if test $version_check = no -o \ - \( $system = Linux -o $os_version_major -ge 10 \) # Linux or Mac at least Snow Leopard - then - jpeg_os_version_ok=yes - else - AC_MSG_WARN([*** Detected unsupported OS version for CUDA.]) - HM_VERSION_WARNING - fi + JPEG_LIB="$JPEG_LIB -Lgpujpeg/Release" + DLL_LIBS="$DLL_LIBS gpujpeg/Release/gpujpeg.dll" fi SAVED_LIBS=$LIBS +LIBS="$LIBS $JPEG_LIB" AC_CHECK_LIB([gpujpeg], [gpujpeg_encoder_create]) LIBS=$SAVED_LIBS -# Used also by JPEG_TO_DXT -if test "$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes -a \ - $HAVE_CUDA = yes -a "$jpeg_cuda_version_ok" = yes -a \ - "$jpeg_os_version_ok" = yes -then - jpeg_env_ok=yes -else - jpeg_env_ok=no -fi - -if test "$jpeg_req" != no -a $jpeg_env_ok = yes +if test "$jpeg_req" != no -a \ + "$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes then jpeg=yes - JPEG_LIB=" $CUDA_LIB" - JPEG_INC=$CUDA_INC + JPEG_INC="$JPEG_INC -Igpujpeg" JPEG_COMMON_OBJ="src/video_compress/jpeg.o" JPEG_LIB="$JPEG_LIB -lgpujpeg" JPEG_DECOMPRESS_OBJ="src/video_decompress/jpeg.o " @@ -1941,7 +1925,7 @@ then AC_SUBST(JPEG_DECOMPRESS_LIB_TARGET, "lib/ultragrid/vdecompress_jpeg.so.$video_decompress_abi_version") LIB_TARGETS="$LIB_TARGETS $JPEG_COMPRESS_LIB_TARGET $JPEG_DECOMPRESS_LIB_TARGET" LIB_OBJS="$LIB_OBJS $JPEG_DECOMPRESS_OBJ $JPEG_COMMON_OBJ" - DEFINE_CUDA + CUDA_MESSAGE fi if test $jpeg_req = yes -a $jpeg = no; then @@ -1965,6 +1949,16 @@ CUDA_DXT_LIB= cuda_dxt=no +AC_DEFUN([ADD_CUDA_DXT_OBJ], [ + if test -z "$included_shared_cuda_dxt_cu"; then + LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.$CU_SUFFIX" + if test $system = Windows; then + DLL_LIBS="$DLL_LIBS cuda_dxt/cuda_dxt.cu.dll" + fi + included_shared_cuda_dxt_cu=yes + fi + ]) + AC_ARG_ENABLE(cuda-dxt, AS_HELP_STRING([--disable-cuda-dxt], [disable CUDA DXT compression (auto)]), [cuda_dxt_req=$enableval], @@ -1972,22 +1966,20 @@ AC_ARG_ENABLE(cuda-dxt, LIBS=$SAVED_LIBS -if test "$cuda_dxt_req" != no -a $HAVE_CUDA = yes +if test "$cuda_dxt_req" != no -a $FOUND_CUDA = yes then cuda_dxt=yes CUDA_DXT_LIB=" $CUDA_LIB" CUDA_DXT_INC=$CUDA_INC - CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o cuda_dxt/cuda_dxt.cu.o" + CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o cuda_dxt/cuda_dxt.$CU_SUFFIX" AC_DEFINE([HAVE_CUDA_DXT], [1], [Build with CUDA DXT support]) AC_SUBST(CUDA_DXT_COMPRESS_LIB_TARGET, "lib/ultragrid/vcompress_cuda_dxt.so.$video_compress_abi_version") LIB_TARGETS="$LIB_TARGETS $CUDA_DXT_COMPRESS_LIB_TARGET" LIB_OBJS="$LIB_OBJS src/video_compress/cuda_dxt.o" - if test -z "$included_shared_cuda_dxt_cu"; then - LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o" - included_shared_cuda_dxt_cu=yes - fi + ADD_CUDA_DXT_OBJ DEFINE_CUDA + CUDA_MESSAGE fi if test $cuda_dxt_req = yes -a $cuda_dxt = no; then @@ -2009,21 +2001,20 @@ AC_ARG_ENABLE(jpeg_to_dxt, [jpeg_to_dxt_req=$enableval], [jpeg_to_dxt_req=auto]) -if test $jpeg_env_ok = yes -a $jpeg_to_dxt_req != no +if test $jpeg_to_dxt_req != no -a $FOUND_CUDA = yes -a \ + "$ac_cv_lib_gpujpeg_gpujpeg_encoder_create" = yes then jpeg_to_dxt=yes JPEG_TO_DXT_INC=" $CUDA_INC" JPEG_TO_DXT_LIB=" $CUDA_LIB -lgpujpeg" - JPEG_TO_DXT_OBJ="src/video_decompress/jpeg_to_dxt.o cuda_dxt/cuda_dxt.cu.o" + JPEG_TO_DXT_OBJ="src/video_decompress/jpeg_to_dxt.o cuda_dxt/cuda_dxt.$CU_SUFFIX" AC_SUBST(JPEG_TO_DXT_DECOMPRESS_LIB_TARGET, "lib/ultragrid/vdecompress_jpeg_to_dxt.so.$video_decompress_abi_version") LIB_TARGETS="$LIB_TARGETS $JPEG_TO_DXT_DECOMPRESS_LIB_TARGET" LIB_OBJS="$LIB_OBJS src/video_decompress/jpeg_to_dxt.o" AC_DEFINE([HAVE_JPEG_TO_DXT], [1], [Build with JPEG to DXT transcode support]) - if test -z "$included_shared_cuda_dxt_cu"; then - LIB_OBJS="$LIB_OBJS cuda_dxt/cuda_dxt.cu.o" - included_shared_cuda_dxt_cu=yes - fi + ADD_CUDA_DXT_OBJ DEFINE_CUDA + CUDA_MESSAGE fi if test $jpeg_to_dxt = no -a $jpeg_to_dxt_req = yes @@ -2083,7 +2074,6 @@ then SAGE_LIB=-"L${SAGE_LIB} -lsail -lquanta" SAGE_OBJ="src/video_display/sage.o" AC_DEFINE([HAVE_SAGE], [1], [Build with SAGE support]) - LINKER=$CXX AC_SUBST(SAGE_LIB_TARGET, "lib/ultragrid/display_sage.so.$video_display_abi_version") CXXFLAGS="$CXXFLAGS -DQUANTA_USE_PTHREADS -DQUANTA_THREAD_SAFE -DGLSL_YUV" LIB_TARGETS="$LIB_TARGETS $SAGE_LIB_TARGET" @@ -2563,7 +2553,7 @@ fi if test "$build_libraries" != yes then LIBS="$LIBS $LIB_MODULES" - OBJS="$OBJS $LIB_OBJS $GL_COMMON_OBJ $X_OBJ" + OBJS="$OBJS $LIB_OBJS $GL_COMMON_OBJ $X_OBJ $CUDA_COMMON_OBJ" HEADERS="$HEADERS $LIB_HEADERS" LIB_OBJS= LIB_TARGETS= @@ -2663,6 +2653,7 @@ RESULT=\ Realtime DXT (OpenGL) ....... $rtdxt JPEG ........................ $jpeg JPEG to DXT ................. $jpeg_to_dxt + CUDA DXT .................... $cuda_dxt UYVY dummy compression ...... $uyvy Libavcodec .................. $libavcodec (audio: $libavcodec_audio) diff --git a/cuda_dxt/cuda_dxt.cu b/cuda_dxt/cuda_dxt.cu index a4d5d7c97..d9067b323 100644 --- a/cuda_dxt/cuda_dxt.cu +++ b/cuda_dxt/cuda_dxt.cu @@ -4,6 +4,7 @@ /// @brief CUDA implementation of DXT compression /// +#include #include #include "cuda_dxt.h" @@ -721,9 +722,9 @@ __global__ static void yuv422_to_yuv444_kernel(const void * src, void * out, int out_pix[1].w = pix34.x; out_pix[2].x = pix34.z; - out_pix[2].z = pix34.w; - out_pix[2].w = pix34.x; - out_pix[2].x = pix34.z; + out_pix[2].y = pix34.w; + out_pix[2].z = pix34.x; + out_pix[2].w = pix34.z; this_out[0] = out_pix[0]; this_out[1] = out_pix[1]; @@ -758,13 +759,14 @@ static int dxt_launch(const void * src, void * out, int sx, int sy, cudaStream_t return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0; } -int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str) { +CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out, + int pix_count, cuda_wrapper_stream_t str) { // grid and threadblock sizes const dim3 tsiz(64, 1); int thread_count = pix_count / 4; // we process block of 4 pixels const dim3 gsiz((thread_count + tsiz.x - 1) / tsiz.x, 1); - yuv422_to_yuv444_kernel<<>>(src, out, pix_count); - return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0; + yuv422_to_yuv444_kernel<<>>(src, out, pix_count); + return cudaSuccess != cudaStreamSynchronize((cudaStream_t) str) ? -3 : 0; } /// CUDA DXT1 compression (only RGB without alpha). @@ -777,8 +779,9 @@ int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStrea /// @param size_y Height of the input image (must be divisible by 4). /// @param stream CUDA stream to run in, or 0 for default stream. /// @return 0 if OK, nonzero if failed. -int cuda_rgb_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) { - return dxt_launch(src, out, size_x, size_y, stream); +CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out, + int size_x, int size_y, cuda_wrapper_stream_t stream) { + return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } @@ -793,8 +796,9 @@ int cuda_rgb_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaS /// @param size_y Height of the input image (must be divisible by 4). /// @param stream CUDA stream to run in, or 0 for default stream. /// @return 0 if OK, nonzero if failed. -int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) { - return dxt_launch(src, out, size_x, size_y, stream); +CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out, + int size_x, int size_y, cuda_wrapper_stream_t stream) { + return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } @@ -809,11 +813,13 @@ int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cudaS /// (Input is read bottom up if negative) /// @param stream CUDA stream to run in, or 0 for default stream. /// @return 0 if OK, nonzero if failed. -int cuda_rgb_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) { - return dxt_launch(src, out, size_x, size_y, stream); +CUDA_DLL_API int cuda_rgb_to_dxt6(const void * src, void * out, + int size_x, int size_y, cuda_wrapper_stream_t stream) { + return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } -int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream) { - return dxt_launch(src, out, size_x, size_y, stream); +CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out, + int size_x, int size_y, cuda_wrapper_stream_t stream) { + return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } diff --git a/cuda_dxt/cuda_dxt.h b/cuda_dxt/cuda_dxt.h index 348dfba6f..c1a3e3858 100644 --- a/cuda_dxt/cuda_dxt.h +++ b/cuda_dxt/cuda_dxt.h @@ -8,13 +8,12 @@ #ifndef CUDA_DXT_H #define CUDA_DXT_H +#include "cuda_wrapper.h" + #ifdef __cplusplus extern "C" { #endif -#include - - /** * CUDA DXT1 compression (only RGB without alpha). * @param src Pointer to top-left source pixel in device-memory buffer. @@ -28,13 +27,13 @@ extern "C" { * @param stream CUDA stream to run in, or 0 for default stream. * @return 0 if OK, nonzero if failed. */ -int cuda_rgb_to_dxt1 +CUDA_DLL_API int cuda_rgb_to_dxt1 ( const void * src, void * out, int size_x, int size_y, - cudaStream_t stream + cuda_wrapper_stream_t stream ); @@ -52,13 +51,13 @@ int cuda_rgb_to_dxt1 * @param stream CUDA stream to run in, or 0 for default stream. * @return 0 if OK, nonzero if failed. */ -int cuda_yuv_to_dxt1 +CUDA_DLL_API int cuda_yuv_to_dxt1 ( const void * src, void * out, int size_x, int size_y, - cudaStream_t stream + cuda_wrapper_stream_t stream ); @@ -75,17 +74,19 @@ int cuda_yuv_to_dxt1 * @param stream CUDA stream to run in, or 0 for default stream. * @return 0 if OK, nonzero if failed. */ -int cuda_rgb_to_dxt6 +CUDA_DLL_API int cuda_rgb_to_dxt6 ( const void * src, void * out, int size_x, int size_y, - cudaStream_t stream + cuda_wrapper_stream_t stream ); -int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cudaStream_t stream); -int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cudaStream_t str); +CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out, + int size_x, int size_y, cuda_wrapper_stream_t stream); +CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out, + int pix_count, cuda_wrapper_stream_t str); #ifdef __cplusplus } /* end of extern "C" */ diff --git a/gpujpeg/.gitignore b/gpujpeg/.gitignore index b717c7193..4ec57fdf1 100644 --- a/gpujpeg/.gitignore +++ b/gpujpeg/.gitignore @@ -1,4 +1,3 @@ -gpujpeg .libs src/*.o src/*.lo @@ -28,3 +27,11 @@ src/.dirstamp stamp-h1 libgpujpeg.la +# VS +*.pdb +*.sdf +*.suo +*.user +Debug +Release + diff --git a/gpujpeg/Makefile.am b/gpujpeg/Makefile.am index d42d1227b..a52fc2e40 100644 --- a/gpujpeg/Makefile.am +++ b/gpujpeg/Makefile.am @@ -27,21 +27,21 @@ pkgconfig_DATA = libgpujpeg.pc library_include_HEADERS = libgpujpeg/*.h nodist_gpujpeg_libinclude_HEADERS = config.h -gpujpeg_SOURCES = src/main.c +gpujpeg_SOURCES = src/main.c gpujpeg_CFLAGS = -std=c99 @COMMON_CFLAGS@ gpujpeg_LDADD = libgpujpeg.la gpujpeg_LDFLAGS = @GPUJPEG_LDFLAGS@ # gpu jpeg library sources -libgpujpeg_la_SOURCES = src/gpujpeg_common.c \ - src/gpujpeg_dct_cpu.c \ - src/gpujpeg_decoder.c \ - src/gpujpeg_encoder.c \ - src/gpujpeg_huffman_cpu_decoder.c \ - src/gpujpeg_huffman_cpu_encoder.c \ - src/gpujpeg_reader.c \ - src/gpujpeg_table.c \ - src/gpujpeg_writer.c +libgpujpeg_la_SOURCES = src/gpujpeg_common.cpp \ + src/gpujpeg_dct_cpu.cpp \ + src/gpujpeg_decoder.cpp \ + src/gpujpeg_encoder.cpp \ + src/gpujpeg_huffman_cpu_decoder.cpp \ + src/gpujpeg_huffman_cpu_encoder.cpp \ + src/gpujpeg_reader.cpp \ + src/gpujpeg_table.cpp \ + src/gpujpeg_writer.cpp libgpujpeg_la_DEPENDENCIES = @LIBGPUJPEG_CUDA_OBJS@ diff --git a/gpujpeg/autogen.sh b/gpujpeg/autogen.sh index 5fd11b0d6..b713e239d 100755 --- a/gpujpeg/autogen.sh +++ b/gpujpeg/autogen.sh @@ -10,13 +10,6 @@ else LIBTOOLIZE=libtoolize fi -if [ ! -x ../ltmain.sh ] -then - cd .. - $LIBTOOLIZE --copy - cd - -fi - autoheader && \ $LIBTOOLIZE --copy && \ ( [ -d m4 ] || mkdir m4 ) && \ diff --git a/gpujpeg/configure.ac b/gpujpeg/configure.ac index b25ece8d6..17ace1524 100644 --- a/gpujpeg/configure.ac +++ b/gpujpeg/configure.ac @@ -2,6 +2,8 @@ AC_PREREQ([2.65]) AC_INIT([libgpujpeg], [0.0.1], [martin.srom@mail.muni.cz], [libgpujpeg], [https://sourceforge.net/p/gpujpeg/]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_SRCDIR([src/main.c]) +AC_CONFIG_AUX_DIR([.]) +AM_MAINTAINER_MODE AM_INIT_AUTOMAKE AC_CONFIG_HEADERS([config.h]) diff --git a/gpujpeg/gpujpeg.sln b/gpujpeg/gpujpeg.sln new file mode 100644 index 000000000..34fafd55d --- /dev/null +++ b/gpujpeg/gpujpeg.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Express 2012 for Windows Desktop +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpujpeg", "gpujpeg.vcxproj", "{B9D06885-F4F3-4B01-8C43-E131210B9F27}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.ActiveCfg = Debug|Win32 + {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.Build.0 = Debug|Win32 + {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.ActiveCfg = Release|Win32 + {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/gpujpeg/gpujpeg.vcxproj b/gpujpeg/gpujpeg.vcxproj new file mode 100644 index 000000000..5d8860661 --- /dev/null +++ b/gpujpeg/gpujpeg.vcxproj @@ -0,0 +1,132 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {B9D06885-F4F3-4B01-8C43-E131210B9F27} + Win32Proj + gpujpeg + + + + DynamicLibrary + true + v110 + Unicode + + + DynamicLibrary + false + v110 + true + Unicode + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions) + .;%(AdditionalIncludeDirectories) + + + Windows + true + cudart.lib;%(AdditionalDependencies) + + + compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;%(CodeGeneration) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions) + .;%(AdditionalIncludeDirectories) + + + Windows + true + true + true + cudart.lib;%(AdditionalDependencies) + + + compute_20,sm_20;compute_30,sm_30;compute_35,sm_53;%(CodeGeneration) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/gpujpeg/gpujpeg.vcxproj.filters b/gpujpeg/gpujpeg.vcxproj.filters new file mode 100644 index 000000000..7018eb7bf --- /dev/null +++ b/gpujpeg/gpujpeg.vcxproj.filters @@ -0,0 +1,122 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/gpujpeg/libgpujpeg/gpujpeg_common.h b/gpujpeg/libgpujpeg/gpujpeg_common.h index 4c2f9044c..dc31bd226 100644 --- a/gpujpeg/libgpujpeg/gpujpeg_common.h +++ b/gpujpeg/libgpujpeg/gpujpeg_common.h @@ -37,6 +37,16 @@ extern "C" { #endif +#if defined _MSC_VER || defined __MINGW32__ +#ifdef GPUJPEG_EXPORTS +#define GPUJPEG_API __declspec(dllexport) +#else +#define GPUJPEG_API __declspec(dllimport) +#endif +#else // other platforms +#define GPUJPEG_API +#endif + /** Marker used as segment info */ #define GPUJPEG_MARKER_SEGMENT_INFO GPUJPEG_MARKER_APP13 @@ -80,7 +90,7 @@ struct gpujpeg_devices_info * * @return devices info */ -struct gpujpeg_devices_info +GPUJPEG_API struct gpujpeg_devices_info gpujpeg_get_devices_info(); /** @@ -88,7 +98,7 @@ gpujpeg_get_devices_info(); * * @return void */ -void +GPUJPEG_API void gpujpeg_print_devices_info(); /** @@ -99,7 +109,7 @@ gpujpeg_print_devices_info(); * enable OpenGL interoperability (GPUJPEG_OPENGL_INTEROPERABILITY) * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_init_device(int device_id, int flags); /** @@ -144,7 +154,7 @@ struct gpujpeg_parameters * @param param Parameters for JPEG coder * @return void */ -void +GPUJPEG_API void gpujpeg_set_default_parameters(struct gpujpeg_parameters* param); /** @@ -153,7 +163,7 @@ gpujpeg_set_default_parameters(struct gpujpeg_parameters* param); * @param param Parameters for coder * @return void */ -void +GPUJPEG_API void gpujpeg_parameters_chroma_subsampling(struct gpujpeg_parameters* param); /** @@ -181,7 +191,7 @@ struct gpujpeg_image_parameters { * @param param Parameters for image * @return void */ -void +GPUJPEG_API void gpujpeg_image_set_default_parameters(struct gpujpeg_image_parameters* param); /** Image file formats */ @@ -206,9 +216,16 @@ enum gpujpeg_image_file_format { * @param filename Filename of image file * @return image_file_format or GPUJPEG_IMAGE_FILE_UNKNOWN if type cannot be determined */ -enum gpujpeg_image_file_format +GPUJPEG_API enum gpujpeg_image_file_format gpujpeg_image_get_file_format(const char* filename); +/** + * Sets cuda device. + * + * @param index Index of the CUDA device to be activated. + */ +GPUJPEG_API void gpujpeg_set_device(int index); + /** * JPEG segment structure. Segment is data in scan generated by huffman coder * for N consecutive MCUs, where N is restart interval (e.g. data for MCUs between @@ -428,7 +445,7 @@ gpujpeg_coder_deinit(struct gpujpeg_coder* coder); * @param param Image parameters * @return calculate size */ -int +GPUJPEG_API int gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param); /** @@ -439,7 +456,7 @@ gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param); * @param image_size Image data buffer size (can be specified for verification or 0 for retrieval) * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_image_load_from_file(const char* filename, uint8_t** image, int* image_size); /** @@ -450,7 +467,7 @@ gpujpeg_image_load_from_file(const char* filename, uint8_t** image, int* image_s * @param image_size Image data buffer size * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_image_save_to_file(const char* filename, uint8_t* image, int image_size); /** @@ -459,7 +476,7 @@ gpujpeg_image_save_to_file(const char* filename, uint8_t* image, int image_size) * @param image Image data buffer * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_image_destroy(uint8_t* image); /** @@ -470,7 +487,7 @@ gpujpeg_image_destroy(uint8_t* image); * @param height * @param sampling_factor */ -void +GPUJPEG_API void gpujpeg_image_range_info(const char* filename, int width, int height, enum gpujpeg_sampling_factor sampling_factor); /** @@ -481,7 +498,7 @@ gpujpeg_image_range_info(const char* filename, int width, int height, enum gpujp * @param param_image_from * @param param_image_to */ -void +GPUJPEG_API void gpujpeg_image_convert(const char* input, const char* output, struct gpujpeg_image_parameters param_image_from, struct gpujpeg_image_parameters param_image_to); @@ -490,7 +507,7 @@ gpujpeg_image_convert(const char* input, const char* output, struct gpujpeg_imag * * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_opengl_init(); /** @@ -501,7 +518,7 @@ gpujpeg_opengl_init(); * @param data * @return nonzero texture id if succeeds, otherwise 0 */ -int +GPUJPEG_API int gpujpeg_opengl_texture_create(int width, int height, uint8_t* data); /** @@ -511,7 +528,7 @@ gpujpeg_opengl_texture_create(int width, int height, uint8_t* data); * @param data * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_opengl_texture_set_data(int texture_id, uint8_t* data); /** @@ -522,7 +539,7 @@ gpujpeg_opengl_texture_set_data(int texture_id, uint8_t* data); * @param data_size * @return 0 data if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_opengl_texture_get_data(int texture_id, uint8_t* data, int* data_size); /** @@ -530,7 +547,7 @@ gpujpeg_opengl_texture_get_data(int texture_id, uint8_t* data, int* data_size); * * @param texture_id */ -void +GPUJPEG_API void gpujpeg_opengl_texture_destroy(int texture_id); /** @@ -592,7 +609,7 @@ struct gpujpeg_opengl_texture * @param texture_id * @return allocated registred texture structure */ -struct gpujpeg_opengl_texture* +GPUJPEG_API struct gpujpeg_opengl_texture* gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type texture_type); /** @@ -601,7 +618,7 @@ gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type * * @param texture */ -void +GPUJPEG_API void gpujpeg_opengl_texture_unregister(struct gpujpeg_opengl_texture* texture); /** @@ -627,81 +644,6 @@ gpujpeg_opengl_texture_map(struct gpujpeg_opengl_texture* texture, int* data_siz void gpujpeg_opengl_texture_unmap(struct gpujpeg_opengl_texture* texture); -/** - * Declare timer - * - * @param name - */ -#define GPUJPEG_CUSTOM_TIMER_DECLARE(name) \ - cudaEvent_t name ## _start__; \ - cudaEvent_t name ## _stop__; \ - float name ## _elapsedTime__; \ - -/** - * Create timer - * - * @param name - */ -#define GPUJPEG_CUSTOM_TIMER_CREATE(name) \ - cudaEventCreate(&name ## _start__); \ - cudaEventCreate(&name ## _stop__); \ - -/** - * Start timer - * - * @param name - */ -#define GPUJPEG_CUSTOM_TIMER_START(name) \ - cudaEventRecord(name ## _start__, 0) \ - -/** - * Stop timer - * - * @param name - */ -#define GPUJPEG_CUSTOM_TIMER_STOP(name) \ - cudaEventRecord(name ## _stop__, 0); \ - cudaEventSynchronize(name ## _stop__); \ - cudaEventElapsedTime(&name ## _elapsedTime__, name ## _start__, name ## _stop__) \ - -/** - * Get duration for timer - * - * @param name - */ -#define GPUJPEG_CUSTOM_TIMER_DURATION(name) name ## _elapsedTime__ - -/** - * Stop timer and print result - * - * @param name - * @param text - */ -#define GPUJPEG_CUSTOM_TIMER_STOP_PRINT(name, text) \ - GPUJPEG_CUSTOM_TIMER_STOP(name); \ - printf("%s %f ms\n", text, name ## _elapsedTime__) \ - -/** - * Destroy timer - * - * @param name - */ -#define GPUJPEG_CUSTOM_TIMER_DESTROY(name) \ - cudaEventDestroy(name ## _start__); \ - cudaEventDestroy(name ## _stop__); \ - -/** - * Default timer implementation - */ -#define GPUJPEG_TIMER_INIT() \ - GPUJPEG_CUSTOM_TIMER_DECLARE(def) \ - GPUJPEG_CUSTOM_TIMER_CREATE(def) -#define GPUJPEG_TIMER_START() GPUJPEG_CUSTOM_TIMER_START(def) -#define GPUJPEG_TIMER_STOP() GPUJPEG_CUSTOM_TIMER_STOP(def) -#define GPUJPEG_TIMER_DURATION() GPUJPEG_CUSTOM_TIMER_DURATION(def) -#define GPUJPEG_TIMER_STOP_PRINT(text) GPUJPEG_CUSTOM_TIMER_STOP_PRINT(def, text) -#define GPUJPEG_TIMER_DEINIT() GPUJPEG_CUSTOM_TIMER_DESTROY(def) - #ifdef __cplusplus } #endif diff --git a/gpujpeg/libgpujpeg/gpujpeg_common_internal.h b/gpujpeg/libgpujpeg/gpujpeg_common_internal.h new file mode 100644 index 000000000..3f6603cbc --- /dev/null +++ b/gpujpeg/libgpujpeg/gpujpeg_common_internal.h @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2011, CESNET z.s.p.o + * Copyright (c) 2011, Silicon Genome, LLC. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GPUJPEG_COMMON_INTERNAL_H +#define GPUJPEG_COMMON_INTERNAL_H + +#include "cuda_runtime.h" + +/** + * Declare timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_DECLARE(name) \ + cudaEvent_t name ## _start__; \ + cudaEvent_t name ## _stop__; \ + float name ## _elapsedTime__; \ + +/** + * Create timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_CREATE(name) \ + cudaEventCreate(&name ## _start__); \ + cudaEventCreate(&name ## _stop__); \ + +/** + * Start timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_START(name) \ + cudaEventRecord(name ## _start__, 0) \ + +/** + * Stop timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_STOP(name) \ + cudaEventRecord(name ## _stop__, 0); \ + cudaEventSynchronize(name ## _stop__); \ + cudaEventElapsedTime(&name ## _elapsedTime__, name ## _start__, name ## _stop__) \ + +/** + * Get duration for timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_DURATION(name) name ## _elapsedTime__ + +/** + * Stop timer and print result + * + * @param name + * @param text + */ +#define GPUJPEG_CUSTOM_TIMER_STOP_PRINT(name, text) \ + GPUJPEG_CUSTOM_TIMER_STOP(name); \ + printf("%s %f ms\n", text, name ## _elapsedTime__) \ + +/** + * Destroy timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_DESTROY(name) \ + cudaEventDestroy(name ## _start__); \ + cudaEventDestroy(name ## _stop__); \ + +/** + * Default timer implementation + */ +#define GPUJPEG_TIMER_INIT() \ + GPUJPEG_CUSTOM_TIMER_DECLARE(def) \ + GPUJPEG_CUSTOM_TIMER_CREATE(def) +#define GPUJPEG_TIMER_START() GPUJPEG_CUSTOM_TIMER_START(def) +#define GPUJPEG_TIMER_STOP() GPUJPEG_CUSTOM_TIMER_STOP(def) +#define GPUJPEG_TIMER_DURATION() GPUJPEG_CUSTOM_TIMER_DURATION(def) +#define GPUJPEG_TIMER_STOP_PRINT(text) GPUJPEG_CUSTOM_TIMER_STOP_PRINT(def, text) +#define GPUJPEG_TIMER_DEINIT() GPUJPEG_CUSTOM_TIMER_DESTROY(def) + +#endif // GPUJPEG_COMMON_INTERNAL_H diff --git a/gpujpeg/libgpujpeg/gpujpeg_decoder.h b/gpujpeg/libgpujpeg/gpujpeg_decoder.h index 53af520c5..6ea20377f 100644 --- a/gpujpeg/libgpujpeg/gpujpeg_decoder.h +++ b/gpujpeg/libgpujpeg/gpujpeg_decoder.h @@ -30,14 +30,26 @@ #ifndef GPUJPEG_DECODER_H #define GPUJPEG_DECODER_H +#include #include -#include -#include +#include #ifdef __cplusplus extern "C" { #endif +#if defined _MSC_VER || defined __MINGW32__ +#ifdef GPUJPEG_EXPORTS +#define GPUJPEG_API __declspec(dllexport) +#else +#define GPUJPEG_API __declspec(dllimport) +#endif +#else // other platforms +#define GPUJPEG_API +#endif + +struct gpujpeg_decoder; + /** * Decoder output type */ @@ -76,7 +88,7 @@ struct gpujpeg_decoder_output * @param output Decoder output structure * @return void */ -void +GPUJPEG_API void gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output); /** @@ -86,7 +98,7 @@ gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output); * @param custom_buffer Custom buffer * @return void */ -void +GPUJPEG_API void gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t* custom_buffer); /** @@ -95,7 +107,7 @@ gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t * @param output Decoder output structure * @return void */ -void +GPUJPEG_API void gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct gpujpeg_opengl_texture* texture); /** @@ -103,39 +115,9 @@ gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct * * @param output Decoder output structure */ -void +GPUJPEG_API void gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output); -/** - * JPEG decoder structure - */ -struct gpujpeg_decoder -{ - // JPEG coder structure - struct gpujpeg_coder coder; - - // JPEG reader structure - struct gpujpeg_reader* reader; - - // Quantization tables - struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT]; - - // Huffman coder tables - struct gpujpeg_table_huffman_decoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; - // Huffman coder tables in device memory - struct gpujpeg_table_huffman_decoder* d_table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; - - // Current segment count for decoded image - int segment_count; - - // Current data compressed size for decoded image - int data_compressed_size; - - // Timers - GPUJPEG_CUSTOM_TIMER_DECLARE(def) - GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu) -}; - /** * Create JPEG decoder * @@ -143,7 +125,7 @@ struct gpujpeg_decoder * @param param_image Parameters for image data * @return decoder structure if succeeds, otherwise NULL */ -struct gpujpeg_decoder* +GPUJPEG_API struct gpujpeg_decoder* gpujpeg_decoder_create(); /** @@ -154,7 +136,7 @@ gpujpeg_decoder_create(); * @param param_image Parameters for image data * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image); /** @@ -167,7 +149,7 @@ gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters* * @param image_decompressed_size Pointer to variable where decompressed image size will be placed * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output); /** @@ -176,9 +158,21 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int imag * @param decoder Decoder structure * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder); +/** + * Sets output format + * + * @param decoder Decoder structure + * @param color_space Requested output color space + * @param sampling_factor Requestd color sampling factor + */ +GPUJPEG_API void +gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder, + enum gpujpeg_color_space color_space, + enum gpujpeg_sampling_factor sampling_factor); + #ifdef __cplusplus } #endif diff --git a/gpujpeg/libgpujpeg/gpujpeg_decoder_internal.h b/gpujpeg/libgpujpeg/gpujpeg_decoder_internal.h new file mode 100644 index 000000000..1c37265f4 --- /dev/null +++ b/gpujpeg/libgpujpeg/gpujpeg_decoder_internal.h @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2011, CESNET z.s.p.o + * Copyright (c) 2011, Silicon Genome, LLC. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GPUJPEG_DECODER_INTERNAL_H +#define GPUJPEG_DECODER_INTERNAL_H + +#include +#include +#include +#include + +/** + * JPEG decoder structure + */ +struct gpujpeg_decoder +{ + // JPEG coder structure + struct gpujpeg_coder coder; + + // JPEG reader structure + struct gpujpeg_reader* reader; + + // Quantization tables + struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT]; + + // Huffman coder tables + struct gpujpeg_table_huffman_decoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; + // Huffman coder tables in device memory + struct gpujpeg_table_huffman_decoder* d_table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; + + // Current segment count for decoded image + int segment_count; + + // Current data compressed size for decoded image + int data_compressed_size; + + // Timers + GPUJPEG_CUSTOM_TIMER_DECLARE(def) + GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu) +}; + +#endif // GPUJPEG_DECODER_INTERNAL_H + diff --git a/gpujpeg/libgpujpeg/gpujpeg_encoder.h b/gpujpeg/libgpujpeg/gpujpeg_encoder.h index 5282ecc20..f35adca10 100644 --- a/gpujpeg/libgpujpeg/gpujpeg_encoder.h +++ b/gpujpeg/libgpujpeg/gpujpeg_encoder.h @@ -31,13 +31,24 @@ #define GPUJPEG_ENCODER_H #include -#include -#include +#include #ifdef __cplusplus extern "C" { #endif +#if defined _MSC_VER || defined __MINGW32__ +#ifdef GPUJPEG_EXPORTS +#define GPUJPEG_API __declspec(dllexport) +#else +#define GPUJPEG_API __declspec(dllimport) +#endif +#else // other platforms +#define GPUJPEG_API +#endif + +struct gpujpeg_encoder; + /** * Encoder input type */ @@ -70,7 +81,7 @@ struct gpujpeg_encoder_input * @param image Input image data * @return void */ -void +GPUJPEG_API void gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* image); /** @@ -80,31 +91,9 @@ gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* im * @param texture_id OpenGL texture id * @return void */ -void +GPUJPEG_API void gpujpeg_encoder_input_set_texture(struct gpujpeg_encoder_input* input, struct gpujpeg_opengl_texture* texture); -/** - * JPEG encoder structure - */ -struct gpujpeg_encoder -{ - // JPEG coder structure - struct gpujpeg_coder coder; - - // JPEG writer structure - struct gpujpeg_writer* writer; - - // Quantization tables - struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT]; - - // Huffman coder tables - struct gpujpeg_table_huffman_encoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; - - // Timers - GPUJPEG_CUSTOM_TIMER_DECLARE(def) - GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu) -}; - /** * Create JPEG encoder * @@ -112,7 +101,7 @@ struct gpujpeg_encoder * @param param_image Parameters for image data * @return encoder structure if succeeds, otherwise NULL */ -struct gpujpeg_encoder* +GPUJPEG_API struct gpujpeg_encoder* gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image); /** @@ -124,7 +113,7 @@ gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_pa * @param image_compressed_size Pointer to variable where compressed image size will be placed * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_input* input, uint8_t** image_compressed, int* image_compressed_size); /** @@ -133,7 +122,7 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_i * @param encoder Encoder structure * @return 0 if succeeds, otherwise nonzero */ -int +GPUJPEG_API int gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder); #ifdef __cplusplus diff --git a/gpujpeg/libgpujpeg/gpujpeg_encoder_internal.h b/gpujpeg/libgpujpeg/gpujpeg_encoder_internal.h new file mode 100644 index 000000000..cc175db80 --- /dev/null +++ b/gpujpeg/libgpujpeg/gpujpeg_encoder_internal.h @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2011, CESNET z.s.p.o + * Copyright (c) 2011, Silicon Genome, LLC. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GPUJPEG_ENCODER_INTERNAL_H +#define GPUJPEG_ENCODER_INTERNAL_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct gpujpeg_encoder +{ + // JPEG coder structure + struct gpujpeg_coder coder; + + // JPEG writer structure + struct gpujpeg_writer* writer; + + // Quantization tables + struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT]; + + // Huffman coder tables + struct gpujpeg_table_huffman_encoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; + + // Timers + GPUJPEG_CUSTOM_TIMER_DECLARE(def) + GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu) +}; + +#ifdef __cplusplus +} +#endif + +#endif // GPUJPEG_ENCODER_INTERNAL_H diff --git a/gpujpeg/libgpujpeg/gpujpeg_type.h b/gpujpeg/libgpujpeg/gpujpeg_type.h index 7fc611984..7ce0f2afd 100644 --- a/gpujpeg/libgpujpeg/gpujpeg_type.h +++ b/gpujpeg/libgpujpeg/gpujpeg_type.h @@ -31,7 +31,6 @@ #define GPUJPEG_TYPE_H #include -#include #ifdef __cplusplus extern "C" { @@ -71,7 +70,7 @@ enum gpujpeg_color_space { * * @param color_space */ -static inline __device__ __host__ const char* +static inline const char* gpujpeg_color_space_get_name(enum gpujpeg_color_space color_space) { switch ( color_space ) { @@ -199,6 +198,9 @@ enum gpujpeg_marker_code { GPUJPEG_MARKER_ERROR = 0x100 }; +static const char* +gpujpeg_marker_name(enum gpujpeg_marker_code code) __attribute__((unused)); + /** * Get marker name from code * diff --git a/gpujpeg/libgpujpeg/gpujpeg_util.h b/gpujpeg/libgpujpeg/gpujpeg_util.h index 55929310d..6d23eb03d 100644 --- a/gpujpeg/libgpujpeg/gpujpeg_util.h +++ b/gpujpeg/libgpujpeg/gpujpeg_util.h @@ -34,9 +34,8 @@ #include #include #include -#include #include -#include +#include "cuda_runtime.h" #ifdef __cplusplus extern "C" { diff --git a/gpujpeg/src/gpujpeg_common.c b/gpujpeg/src/gpujpeg_common.cpp similarity index 99% rename from gpujpeg/src/gpujpeg_common.c rename to gpujpeg/src/gpujpeg_common.cpp index 5ed252fc9..7e72bd3c9 100644 --- a/gpujpeg/src/gpujpeg_common.c +++ b/gpujpeg/src/gpujpeg_common.cpp @@ -31,6 +31,8 @@ #include "config.h" #endif /* HAVE_CONFIG_H */ +#include +#include #include #include #include "gpujpeg_preprocessor.h" @@ -39,8 +41,8 @@ #define GL_GLEXT_PROTOTYPES #include #include + #include #endif -#include // rounds number of segment bytes up to next multiple of 128 #define SEGMENT_ALIGN(b) (((b) + 127) & ~127) @@ -142,10 +144,12 @@ gpujpeg_init_device(int device_id, int flags) return -1; } +#ifdef GPUJPEG_USE_OPENGL if ( flags & GPUJPEG_OPENGL_INTEROPERABILITY ) { cudaGLSetGLDevice(device_id); gpujpeg_cuda_check_error("Enabling OpenGL interoperability"); } +#endif if ( flags & GPUJPEG_VERBOSE ) { int cuda_driver_version = 0; @@ -234,18 +238,26 @@ gpujpeg_image_get_file_format(const char* filename) GPUJPEG_IMAGE_FILE_JPEG }; - char * ext = strrchr(filename, '.'); + const char * ext = strrchr(filename, '.'); if ( ext == NULL ) - return -1; + return GPUJPEG_IMAGE_FILE_UNKNOWN; ext++; + char ext_lc[3]; + strncpy(ext_lc, ext, 3); + std::transform(ext_lc, ext_lc + sizeof(ext_lc), ext_lc, ::tolower); for ( int i = 0; i < sizeof(format) / sizeof(*format); i++ ) { - if ( strncasecmp(ext, extension[i], 3) == 0 ) { + if ( strncmp(ext_lc, extension[i], 3) == 0 ) { return format[i]; } } return GPUJPEG_IMAGE_FILE_UNKNOWN; } +void gpujpeg_set_device(int index) +{ + cudaSetDevice(index); +} + /** Documented at declaration */ void gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data) diff --git a/gpujpeg/src/gpujpeg_dct_cpu.c b/gpujpeg/src/gpujpeg_dct_cpu.cpp similarity index 99% rename from gpujpeg/src/gpujpeg_dct_cpu.c rename to gpujpeg/src/gpujpeg_dct_cpu.cpp index 447424ffa..af9a977c7 100644 --- a/gpujpeg/src/gpujpeg_dct_cpu.c +++ b/gpujpeg/src/gpujpeg_dct_cpu.cpp @@ -226,7 +226,7 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder) int index = y * width + x; gpujpeg_idct_cpu_perform( &component->data_quantized[index * 64], - decoder->table_quantization[type].table + (int16_t *) decoder->table_quantization[type].table ); } } diff --git a/gpujpeg/src/gpujpeg_dct_cpu.h b/gpujpeg/src/gpujpeg_dct_cpu.h index 527645e8b..4dff40eb7 100644 --- a/gpujpeg/src/gpujpeg_dct_cpu.h +++ b/gpujpeg/src/gpujpeg_dct_cpu.h @@ -30,8 +30,8 @@ #ifndef GPUJPEG_DCT_CPU_H #define GPUJPEG_DCT_CPU_H -#include -#include +#include +#include /** * Peform inverse DCT on CPU diff --git a/gpujpeg/src/gpujpeg_dct_gpu.h b/gpujpeg/src/gpujpeg_dct_gpu.h index 88c806fbd..757149a88 100644 --- a/gpujpeg/src/gpujpeg_dct_gpu.h +++ b/gpujpeg/src/gpujpeg_dct_gpu.h @@ -30,8 +30,8 @@ #ifndef GPUJPEG_DCT_GPU_H #define GPUJPEG_DCT_GPU_H -#include -#include +#include +#include #ifdef __cplusplus extern "C" { diff --git a/gpujpeg/src/gpujpeg_decoder.c b/gpujpeg/src/gpujpeg_decoder.cpp similarity index 96% rename from gpujpeg/src/gpujpeg_decoder.c rename to gpujpeg/src/gpujpeg_decoder.cpp index c49864af5..a9535b35f 100644 --- a/gpujpeg/src/gpujpeg_decoder.c +++ b/gpujpeg/src/gpujpeg_decoder.cpp @@ -28,6 +28,7 @@ */ #include +#include #include "gpujpeg_preprocessor.h" #include "gpujpeg_dct_cpu.h" #include "gpujpeg_dct_gpu.h" @@ -77,7 +78,7 @@ gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output) struct gpujpeg_decoder* gpujpeg_decoder_create() { - struct gpujpeg_decoder* decoder = malloc(sizeof(struct gpujpeg_decoder)); + struct gpujpeg_decoder* decoder = (struct gpujpeg_decoder*) malloc(sizeof(struct gpujpeg_decoder)); if ( decoder == NULL ) return NULL; @@ -336,6 +337,15 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int imag return 0; } +void +gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder, + enum gpujpeg_color_space color_space, + enum gpujpeg_sampling_factor sampling_factor) +{ + decoder->coder.param_image.color_space = color_space; + decoder->coder.param_image.sampling_factor = sampling_factor; +} + /** Documented at declaration */ int gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder) diff --git a/gpujpeg/src/gpujpeg_encoder.c b/gpujpeg/src/gpujpeg_encoder.cpp similarity index 99% rename from gpujpeg/src/gpujpeg_encoder.c rename to gpujpeg/src/gpujpeg_encoder.cpp index efb615528..dbccb2956 100644 --- a/gpujpeg/src/gpujpeg_encoder.c +++ b/gpujpeg/src/gpujpeg_encoder.cpp @@ -63,7 +63,7 @@ gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_pa assert(param->restart_interval >= 0); assert(param->interleaved == 0 || param->interleaved == 1); - struct gpujpeg_encoder* encoder = malloc(sizeof(struct gpujpeg_encoder)); + struct gpujpeg_encoder* encoder = (struct gpujpeg_encoder*) malloc(sizeof(struct gpujpeg_encoder)); if ( encoder == NULL ) return NULL; diff --git a/gpujpeg/src/gpujpeg_huffman_cpu_decoder.c b/gpujpeg/src/gpujpeg_huffman_cpu_decoder.cpp similarity index 100% rename from gpujpeg/src/gpujpeg_huffman_cpu_decoder.c rename to gpujpeg/src/gpujpeg_huffman_cpu_decoder.cpp diff --git a/gpujpeg/src/gpujpeg_huffman_cpu_decoder.h b/gpujpeg/src/gpujpeg_huffman_cpu_decoder.h index aad050b48..d63827420 100644 --- a/gpujpeg/src/gpujpeg_huffman_cpu_decoder.h +++ b/gpujpeg/src/gpujpeg_huffman_cpu_decoder.h @@ -30,7 +30,7 @@ #ifndef GPUJPEG_HUFFMAN_CPU_DECODER_H #define GPUJPEG_HUFFMAN_CPU_DECODER_H -#include +#include /** * Perform huffman decoding diff --git a/gpujpeg/src/gpujpeg_huffman_cpu_encoder.c b/gpujpeg/src/gpujpeg_huffman_cpu_encoder.cpp similarity index 100% rename from gpujpeg/src/gpujpeg_huffman_cpu_encoder.c rename to gpujpeg/src/gpujpeg_huffman_cpu_encoder.cpp diff --git a/gpujpeg/src/gpujpeg_huffman_cpu_encoder.h b/gpujpeg/src/gpujpeg_huffman_cpu_encoder.h index 6c1cc32c9..cb4e84cf0 100644 --- a/gpujpeg/src/gpujpeg_huffman_cpu_encoder.h +++ b/gpujpeg/src/gpujpeg_huffman_cpu_encoder.h @@ -30,7 +30,7 @@ #ifndef GPUJPEG_HUFFMAN_CPU_ENCODER_H #define GPUJPEG_HUFFMAN_CPU_ENCODER_H -#include +#include /** * Perform huffman encoding diff --git a/gpujpeg/src/gpujpeg_huffman_gpu_decoder.h b/gpujpeg/src/gpujpeg_huffman_gpu_decoder.h index 0ef5a1a8f..a49d43b4b 100644 --- a/gpujpeg/src/gpujpeg_huffman_gpu_decoder.h +++ b/gpujpeg/src/gpujpeg_huffman_gpu_decoder.h @@ -30,7 +30,7 @@ #ifndef GPUJPEG_HUFFMAN_GPU_DECODER_H #define GPUJPEG_HUFFMAN_GPU_DECODER_H -#include +#include #ifdef __cplusplus extern "C" { diff --git a/gpujpeg/src/gpujpeg_huffman_gpu_encoder.h b/gpujpeg/src/gpujpeg_huffman_gpu_encoder.h index 33142a458..a5bdb7970 100644 --- a/gpujpeg/src/gpujpeg_huffman_gpu_encoder.h +++ b/gpujpeg/src/gpujpeg_huffman_gpu_encoder.h @@ -30,7 +30,7 @@ #ifndef GPUJPEG_HUFFMAN_GPU_ENCODER_H #define GPUJPEG_HUFFMAN_GPU_ENCODER_H -#include +#include #ifdef __cplusplus extern "C" { diff --git a/gpujpeg/src/gpujpeg_preprocessor.h b/gpujpeg/src/gpujpeg_preprocessor.h index b0d74a52c..02c5d029b 100644 --- a/gpujpeg/src/gpujpeg_preprocessor.h +++ b/gpujpeg/src/gpujpeg_preprocessor.h @@ -30,8 +30,8 @@ #ifndef GPUJPEG_PREPROCESSOR_H #define GPUJPEG_PREPROCESSOR_H -#include -#include +#include +#include #ifdef __cplusplus extern "C" { diff --git a/gpujpeg/src/gpujpeg_reader.c b/gpujpeg/src/gpujpeg_reader.cpp similarity index 99% rename from gpujpeg/src/gpujpeg_reader.c rename to gpujpeg/src/gpujpeg_reader.cpp index 81228dcbe..52fecb5ce 100644 --- a/gpujpeg/src/gpujpeg_reader.c +++ b/gpujpeg/src/gpujpeg_reader.cpp @@ -29,13 +29,15 @@ #include #include +#include #include /** Documented at declaration */ struct gpujpeg_reader* gpujpeg_reader_create() { - struct gpujpeg_reader* reader = malloc(sizeof(struct gpujpeg_reader)); + struct gpujpeg_reader* reader = (struct gpujpeg_reader*) + malloc(sizeof(struct gpujpeg_reader)); if ( reader == NULL ) return NULL; reader->comp_count = 0; @@ -122,7 +124,7 @@ gpujpeg_reader_read_app0(uint8_t** image) return -1; } - char jfif[4]; + char jfif[5]; jfif[0] = gpujpeg_reader_read_byte(*image); jfif[1] = gpujpeg_reader_read_byte(*image); jfif[2] = gpujpeg_reader_read_byte(*image); diff --git a/gpujpeg/src/gpujpeg_table.c b/gpujpeg/src/gpujpeg_table.cpp similarity index 100% rename from gpujpeg/src/gpujpeg_table.c rename to gpujpeg/src/gpujpeg_table.cpp diff --git a/gpujpeg/src/gpujpeg_writer.c b/gpujpeg/src/gpujpeg_writer.cpp similarity index 98% rename from gpujpeg/src/gpujpeg_writer.c rename to gpujpeg/src/gpujpeg_writer.cpp index fc5e8858e..af4e5af62 100644 --- a/gpujpeg/src/gpujpeg_writer.c +++ b/gpujpeg/src/gpujpeg_writer.cpp @@ -29,20 +29,21 @@ #include #include +#include #include /** Documented at declaration */ struct gpujpeg_writer* gpujpeg_writer_create(struct gpujpeg_encoder* encoder) { - struct gpujpeg_writer* writer = malloc(sizeof(struct gpujpeg_writer)); + struct gpujpeg_writer* writer = (struct gpujpeg_writer*) malloc(sizeof(struct gpujpeg_writer)); if ( writer == NULL ) return NULL; // Allocate output buffer int buffer_size = 1000; buffer_size += encoder->coder.param_image.width * encoder->coder.param_image.height * encoder->coder.param_image.comp_count * 2; - writer->buffer = malloc(buffer_size * sizeof(uint8_t)); + writer->buffer = (uint8_t *) malloc(buffer_size * sizeof(uint8_t)); if ( writer->buffer == NULL ) return NULL; writer->buffer_current = NULL; diff --git a/gpujpeg/src/main.c b/gpujpeg/src/main.c index a8351c06d..a3d32ad19 100644 --- a/gpujpeg/src/main.c +++ b/gpujpeg/src/main.c @@ -27,6 +27,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include // TIMER +#include // TIMER +#include // TIMER #include #include #include diff --git a/src/control_socket.cpp b/src/control_socket.cpp index 749ff1f21..3dd2e826e 100644 --- a/src/control_socket.cpp +++ b/src/control_socket.cpp @@ -114,6 +114,10 @@ static ssize_t write_all(fd_t fd, const void *buf, size_t count); static void * control_thread(void *args); static void send_response(fd_t fd, struct response *resp); +#ifndef HAVE_LINUX +#define MSG_NOSIGNAL 0 +#endif + static ssize_t write_all(fd_t fd, const void *buf, size_t count) { char *p = (char *) buf; diff --git a/src/cuda_wrapper.cu b/src/cuda_wrapper.cu new file mode 100644 index 000000000..15dad6cac --- /dev/null +++ b/src/cuda_wrapper.cu @@ -0,0 +1,119 @@ +/** + * @file cuda_wrapper.h + * @author Martin Pulec + * + * @brief This file contais wrapper around CUDA functions. + */ +/* + * Copyright (c) 2013 CESNET z.s.p.o. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, is permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of CESNET nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cuda_wrapper.h" + +#include "cuda_runtime.h" +#include + +typedef void *cuda_wrapper_stream_t; + +static inline int map_cuda_error(cudaError_t cuda_error) { + struct error_mapping { + cudaError_t cuda_error; + int wrapper_error; + }; + struct error_mapping mapping[] = { + { cudaSuccess, CUDA_WRAPPER_SUCCESS }, + }; + + int i; + for (i = 0; i < sizeof(mapping)/sizeof(struct error_mapping); ++i) { + if (cuda_error == mapping[i].cuda_error) { + return mapping[i].wrapper_error; + } + } + + return CUDA_UNKNOWN_ERROR; +}; + +static inline enum cudaMemcpyKind map_cuda_memcpy_kind(int our_kind) { + struct kind_mapping { + enum cudaMemcpyKind kind; + int our_kind; + }; + struct kind_mapping mapping[] = { + { cudaMemcpyHostToDevice, CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE }, + { cudaMemcpyDeviceToHost, CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST }, + }; + + int i; + for (i = 0; i < sizeof(mapping)/sizeof(struct kind_mapping); ++i) { + if (our_kind == mapping[i].our_kind) { + return mapping[i].kind; + } + } + + abort(); // should not reach here +}; + +CUDA_DLL_API int cuda_wrapper_free(void *buffer) +{ + return map_cuda_error(cudaFree(buffer)); +} + +CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len) +{ + return map_cuda_error(cudaMalloc(buffer, data_len)); +} + +CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len) +{ + return map_cuda_error(cudaMallocHost(buffer, data_len)); +} + +CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src, + size_t count, int kind) +{ + return map_cuda_error( + cudaMemcpy(dst, src, count, + map_cuda_memcpy_kind(kind))); +} + +CUDA_DLL_API const char *cuda_wrapper_last_error_string(void) +{ + return cudaGetErrorString(cudaGetLastError()); +} + +CUDA_DLL_API int cuda_wrapper_set_device(int index) +{ + return map_cuda_error( + cudaSetDevice(index)); +} + diff --git a/src/cuda_wrapper.h b/src/cuda_wrapper.h new file mode 100644 index 000000000..2364abcad --- /dev/null +++ b/src/cuda_wrapper.h @@ -0,0 +1,82 @@ +/** + * @file cuda_wrapper.h + * @author Martin Pulec + */ +/* + * Copyright (c) 2013 CESNET z.s.p.o. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, is permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of CESNET nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CUDA_WRAPPER_H_ +#define CUDA_WRAPPER_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#if defined _MSC_VER || defined __MINGW32__ +#ifdef EXPORT_DLL_SYMBOLS +#define CUDA_DLL_API __declspec(dllexport) +#else +#define CUDA_DLL_API __declspec(dllimport) +#endif +#else // other platforms +#define CUDA_DLL_API +#endif + +/// @{ +#define CUDA_WRAPPER_SUCCESS 0 +#define CUDA_UNKNOWN_ERROR 1 ///< error for which there is no mapping in wrapper +/// @} + +/// @{ +#define CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE 0 +#define CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST 1 +/// @} + +typedef void *cuda_wrapper_stream_t; + +CUDA_DLL_API int cuda_wrapper_free(void *buffer); +CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len); +CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len); +CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src, + size_t count, int kind); +CUDA_DLL_API const char *cuda_wrapper_last_error_string(void); +CUDA_DLL_API int cuda_wrapper_set_device(int index); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // CUDA_WRAPPER_H_ + diff --git a/src/main.c b/src/main.c index 7fe391ffe..56f548895 100644 --- a/src/main.c +++ b/src/main.c @@ -1127,7 +1127,7 @@ int main(int argc, char *argv[]) echo_cancellation = true; break; case OPT_CUDA_DEVICE: -#ifdef HAVE_CUDA +#ifdef HAVE_JPEG if(strcmp("help", optarg) == 0) { struct compress_state *compression; int ret = compress_init(&root_mod, "JPEG:list_devices", &compression); diff --git a/src/video_compress/cuda_dxt.cpp b/src/video_compress/cuda_dxt.cpp index 5e77263b2..a8f5d8e5a 100644 --- a/src/video_compress/cuda_dxt.cpp +++ b/src/video_compress/cuda_dxt.cpp @@ -44,6 +44,7 @@ #include "video_compress/cuda_dxt.h" #include "cuda_dxt/cuda_dxt.h" +#include "cuda_wrapper.h" #include "host.h" #include "module.h" @@ -57,12 +58,14 @@ struct state_video_compress_cuda_dxt { in_buffer = NULL; cuda_in_buffer = NULL; cuda_uyvy_buffer = NULL; + cuda_out_buffer = NULL; } struct module module_data; struct video_desc saved_desc; char *in_buffer; ///< for decoded data char *cuda_uyvy_buffer; ///< same as in_buffer but in device memory char *cuda_in_buffer; ///< same as in_buffer but in device memory + char *cuda_out_buffer; ///< same as in_buffer but in device memory struct video_frame *out[2]; codec_t in_codec; codec_t out_codec; @@ -107,16 +110,20 @@ static void cleanup(struct state_video_compress_cuda_dxt *s) s->in_buffer = NULL; } if (s->cuda_uyvy_buffer) { - cudaFree(s->cuda_uyvy_buffer); + cuda_wrapper_free(s->cuda_uyvy_buffer); s->cuda_uyvy_buffer = NULL; } if (s->cuda_in_buffer) { - cudaFree(s->cuda_in_buffer); + cuda_wrapper_free(s->cuda_in_buffer); s->cuda_in_buffer = NULL; } + if (s->cuda_out_buffer) { + cuda_wrapper_free(s->cuda_out_buffer); + s->cuda_out_buffer = NULL; + } for (int i = 0; i < 2; ++i) { if (s->out[i] != NULL) { - cudaFree(s->out[i]->tiles[0].data); + cuda_wrapper_free(s->out[i]->tiles[0].data); s->out[i]->tiles[0].data = NULL; } } @@ -138,7 +145,7 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video } if (s->in_codec == UYVY) { - if (cudaSuccess != cudaMalloc((void **) &s->cuda_uyvy_buffer, + if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **) &s->cuda_uyvy_buffer, desc.width * desc.height * 2)) { fprintf(stderr, "Could not allocate CUDA UYVY buffer.\n"); return false; @@ -147,7 +154,7 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video s->in_buffer = (char *) malloc(desc.width * desc.height * 3); - if (cudaSuccess != cudaMalloc((void **) &s->cuda_in_buffer, + if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **) &s->cuda_in_buffer, desc.width * desc.height * 3)) { fprintf(stderr, "Could not allocate CUDA output buffer.\n"); return false; @@ -160,12 +167,18 @@ static bool configure_with(struct state_video_compress_cuda_dxt *s, struct video s->out[i] = vf_alloc_desc(compressed_desc); s->out[i]->tiles[0].data_len = desc.width * desc.height / (s->out_codec == DXT1 ? 2 : 1); - if (cudaSuccess != cudaMallocHost((void **) &s->out[i]->tiles[0].data, + if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc_host((void **) &s->out[i]->tiles[0].data, s->out[i]->tiles[0].data_len)) { - fprintf(stderr, "Could not allocate CUDA output buffer.\n"); + fprintf(stderr, "Could not allocate CUDA output host buffer.\n"); return false; } } + if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc((void **) + &s->cuda_out_buffer, + s->out[0]->tiles[0].data_len)) { + fprintf(stderr, "Could not allocate CUDA output buffer.\n"); + return false; + } return true; } @@ -176,7 +189,7 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram struct state_video_compress_cuda_dxt *s = (struct state_video_compress_cuda_dxt *) mod->priv_data; - cudaSetDevice(cuda_devices[0]); + cuda_wrapper_set_device(cuda_devices[0]); if (!video_desc_eq_excl_param(video_desc_from_frame(tx), s->saved_desc, PARAM_TILE_COUNT)) { @@ -205,30 +218,28 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram } if (s->in_codec == UYVY) { - if (cudaMemcpy(s->cuda_uyvy_buffer, in_buffer, tx->tiles[tile_idx].width * + if (cuda_wrapper_memcpy(s->cuda_uyvy_buffer, in_buffer, tx->tiles[tile_idx].width * tx->tiles[tile_idx].height * 2, - cudaMemcpyHostToDevice) != cudaSuccess) { - fprintf(stderr, "Memcpy failed: %s\n", - cudaGetErrorString(cudaGetLastError())); + CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE) != CUDA_WRAPPER_SUCCESS) { + fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string()); return NULL; } if (cuda_yuv422_to_yuv444(s->cuda_uyvy_buffer, s->cuda_in_buffer, tx->tiles[tile_idx].width * - tx->tiles[tile_idx].height, 0) != 0) { - fprintf(stderr, "UYVY kernel failed: %s\n", - cudaGetErrorString(cudaGetLastError())); + tx->tiles[tile_idx].height, 0) != CUDA_WRAPPER_SUCCESS) { + fprintf(stderr, "Kernel failed: %s\n", cuda_wrapper_last_error_string()); } } else { - if (cudaMemcpy(s->cuda_in_buffer, in_buffer, tx->tiles[tile_idx].width * + if (cuda_wrapper_memcpy(s->cuda_in_buffer, in_buffer, tx->tiles[tile_idx].width * tx->tiles[tile_idx].height * 3, - cudaMemcpyHostToDevice) != cudaSuccess) { - fprintf(stderr, "Memcpy failed: %s\n", - cudaGetErrorString(cudaGetLastError())); + CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE) != CUDA_WRAPPER_SUCCESS) { + fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string()); return NULL; } } - int (*cuda_dxt_enc_func)(const void * src, void * out, int size_x, int size_y, cudaStream_t stream); + int (*cuda_dxt_enc_func)(const void * src, void * out, int size_x, int size_y, + cuda_wrapper_stream_t stream); if (s->out_codec == DXT1) { if (s->in_codec == RGB) { @@ -243,11 +254,18 @@ struct video_frame *cuda_dxt_compress_tile(struct module *mod, struct video_fram cuda_dxt_enc_func = cuda_yuv_to_dxt6; } } - int ret = cuda_dxt_enc_func(s->cuda_in_buffer, s->out[buffer]->tiles[0].data, + int ret = cuda_dxt_enc_func(s->cuda_in_buffer, s->cuda_out_buffer, s->saved_desc.width, s->saved_desc.height, 0); if (ret != 0) { - fprintf(stderr, "Encoding failed: %s\n", - cudaGetErrorString(cudaGetLastError())); + fprintf(stderr, "Encoding failed: %s\n", cuda_wrapper_last_error_string()); + return NULL; + } + + if (cuda_wrapper_memcpy(s->out[buffer]->tiles[0].data, + s->cuda_out_buffer, + s->out[buffer]->tiles[0].data_len, + CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST) != CUDA_WRAPPER_SUCCESS) { + fprintf(stderr, "Memcpy failed: %s\n", cuda_wrapper_last_error_string()); return NULL; } diff --git a/src/video_compress/jpeg.c b/src/video_compress/jpeg.c index 57d4fce7d..36f03559d 100644 --- a/src/video_compress/jpeg.c +++ b/src/video_compress/jpeg.c @@ -58,7 +58,6 @@ #include "module.h" #include "video_compress/jpeg.h" #include "libgpujpeg/gpujpeg_encoder.h" -#include "libgpujpeg/gpujpeg_common.h" #include "video.h" #include #include @@ -346,7 +345,7 @@ struct video_frame * jpeg_compress(struct module *mod, struct video_frame * tx, unsigned int x; - cudaSetDevice(cuda_devices[0]); + gpujpeg_set_device(cuda_devices[0]); if(!s->encoder) { int ret; diff --git a/src/video_decompress/jpeg.c b/src/video_decompress/jpeg.c index 5c96dfb15..49eefec66 100644 --- a/src/video_decompress/jpeg.c +++ b/src/video_decompress/jpeg.c @@ -57,7 +57,6 @@ #include "libgpujpeg/gpujpeg_decoder.h" //#include "compat/platform_semaphore.h" -#include #include #include #include "video_decompress/jpeg.h" @@ -82,11 +81,11 @@ static int configure_with(struct state_decompress_jpeg *s, struct video_desc des return FALSE; } if(s->out_codec == RGB) { - s->decoder->coder.param_image.color_space = GPUJPEG_RGB; - s->decoder->coder.param_image.sampling_factor = GPUJPEG_4_4_4; + gpujpeg_decoder_set_output_format(s->decoder, GPUJPEG_RGB, + GPUJPEG_4_4_4); } else { - s->decoder->coder.param_image.color_space = GPUJPEG_YCBCR_BT709; - s->decoder->coder.param_image.sampling_factor = GPUJPEG_4_2_2; + gpujpeg_decoder_set_output_format(s->decoder, GPUJPEG_YCBCR_BT709, + GPUJPEG_4_2_2); } return TRUE; @@ -118,7 +117,6 @@ int jpeg_decompress_reconfigure(void *state, struct video_desc desc, int rshift, int gshift, int bshift, int pitch, codec_t out_codec) { struct state_decompress_jpeg *s = (struct state_decompress_jpeg *) state; - int ret; assert(out_codec == RGB || out_codec == UYVY); @@ -157,7 +155,7 @@ int jpeg_decompress(void *state, unsigned char *dst, unsigned char *buffer, linesize = s->desc.width * 2; } - cudaSetDevice(cuda_devices[0]); + gpujpeg_set_device(cuda_devices[0]); if((s->out_codec != RGB || (s->rshift == 0 && s->gshift == 8 && s->bshift == 16)) && s->pitch == linesize) { diff --git a/src/video_decompress/jpeg_to_dxt.cpp b/src/video_decompress/jpeg_to_dxt.cpp index cf18cdf77..7a7db4fab 100644 --- a/src/video_decompress/jpeg_to_dxt.cpp +++ b/src/video_decompress/jpeg_to_dxt.cpp @@ -158,9 +158,10 @@ static void *worker_thread(void *arg) msg_frame *output_frame = new msg_frame(s->desc.width * s->desc.height / s->ppb); - if (cudaSuccess != cudaMemcpy((char*) output_frame->data, s->dxt_out_buff, + if (cuda_wrapper_memcpy((char*) output_frame->data, s->dxt_out_buff, output_frame->data_len, - cudaMemcpyDeviceToHost)) { + CUDA_WRAPPER_MEMCPY_DEVICE_TO_HOST) != + CUDA_WRAPPER_SUCCESS) { fprintf(stderr, "[jpeg_to_dxt] unable to copy from device."); } s->m_out.push(output_frame); @@ -173,7 +174,7 @@ static void *worker_thread(void *arg) gpujpeg_decoder_destroy(s->jpeg_decoder); } - cudaFree(s->dxt_out_buff); + cuda_wrapper_free(s->dxt_out_buff); return NULL; } @@ -272,11 +273,12 @@ static int reconfigure_thread(struct thread_data *s, struct video_desc desc, int } if(s->dxt_out_buff != NULL) { - cudaFree(s->dxt_out_buff); + cuda_wrapper_free(s->dxt_out_buff); s->dxt_out_buff = NULL; } - if(cudaSuccess != cudaMallocHost((void **) &s->dxt_out_buff, desc.width * desc.height / ppb)) { + if(cuda_wrapper_malloc_host((void **) &s->dxt_out_buff, desc.width * desc.height / ppb) + != CUDA_WRAPPER_SUCCESS) { fprintf(stderr, "Could not allocate CUDA output buffer.\n"); return false; }