diff --git a/Makefile.in b/Makefile.in index 362b4e0a3..b5eb0ec45 100644 --- a/Makefile.in +++ b/Makefile.in @@ -284,12 +284,6 @@ POSTPROCESS_DEPS = \ $(CUDA_COMPILER) $(CUDA_FLAGS) $(CUDA_INC) -c $< -o $@ @$(REAL_CUDA_COMPILER) $(CUDA_FLAGS) $(CUDA_INC) -M $< > $*.d $(POSTPROCESS_DEPS) -%.lib: %.cu - $(MKDIR_P) $(dir $@) - $(CUDA_COMPILER) $(CUDA_FLAGS) -DEXPORT_DLL_SYMBOLS $(CUDA_INC) --shared $< -o $*.dll - @$(REAL_CUDA_COMPILER) $(CUDA_FLAGS) -DEXPORT_DLL_SYMBOLS $(CUDA_INC) -M --shared $< > $*.d - touch $@ - $(POSTPROCESS_DEPS) src/libavcodec/from_lavc_vid_conv.o: src/libavcodec/from_lavc_vid_conv.c $(MKDIR_P) $(dir $@) diff --git a/configure.ac b/configure.ac index 1cab7588d..d91636cd3 100644 --- a/configure.ac +++ b/configure.ac @@ -195,12 +195,6 @@ then CFLAGS="$CFLAGS -fPIC" CXXFLAGS="$CXXFLAGS -fPIC" CUDA_FLAGS="$CUDA_FLAGS -Xcompiler -fPIC" - CU_OBJ_SUFFIX="o" -else # Windows - if test $WORD_LEN = 32; then - CUDA_FLAGS="-m32" - fi - CU_OBJ_SUFFIX="lib" fi AC_SUBST(CUDA_FLAGS) @@ -574,18 +568,9 @@ fi if test "$FOUND_CUDA" = yes; then AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system]) - if test $system = Windows; then - # It is twice by intent - as an object to force dependency - # (== build the object), as a lib to be at the end of the - # link list (it is a library) in MSW. TODO: make it prettier - # somehow. The same applies also for other CUDA objs/libs. - CUDA_COMMON_OBJ=src/cuda_wrapper.$CU_OBJ_SUFFIX - CUDA_COMMON_LIB=src/cuda_wrapper.$CU_OBJ_SUFFIX - DLL_LIBS="$DLL_LIBS src/cuda_wrapper.dll" - else + OBJS="$OBJS src/cuda_wrapper.o" + if test $system != Windows; then LIBS="$LIBS $CUDA_LIB" - OBJS="$OBJS src/cuda_wrapper.$CU_OBJ_SUFFIX" - CUDA_COMMON_LIB= POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in sort of CUDA code.\nIn order to use use it compression and \ decompression, you will need to have CUDA libraries visible to your OS.\n\ If not done so, you can accomplish this by adding line:\n\ @@ -2274,20 +2259,6 @@ ENSURE_FEATURE_PRESENT([$gpujpeg_req], [$gpujpeg], [GPUJPEG not found]) cuda_dxt=no -AC_DEFUN([DEFINE_CUDA_DXT], [ - if test -z "$included_shared_cuda_dxt_cu"; then - if test $system = Windows; then - CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX" - CUDA_DXT_COMMON_LIB="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX" - DLL_LIBS="$DLL_LIBS cuda_dxt/cuda_dxt.dll" - else - CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX" - CUDA_DXT_COMMON_LIB= - fi - included_shared_cuda_dxt_cu=yes - fi - ]) - AC_ARG_ENABLE(cuda-dxt, [ --disable-cuda-dxt disable CUDA DXT compression (auto)] [ Requires: CUDA], @@ -2295,13 +2266,13 @@ AC_ARG_ENABLE(cuda-dxt, [cuda_dxt_req=$build_default]) LIBS=$SAVED_LIBS +CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.o" if test "$cuda_dxt_req" != no -a $FOUND_CUDA = yes then cuda_dxt=yes - DEFINE_CUDA_DXT - CUDA_DXT_LIB="$CUDA_COMMON_LIB $CUDA_DXT_COMMON_LIB $CUDA_LIB" + CUDA_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_LIB" CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o $CUDA_DXT_COMMON_OBJ $CUDA_COMMON_OBJ" add_module vcompress_cuda_dxt "$CUDA_DXT_OBJ" "$CUDA_DXT_LIB" fi @@ -2321,10 +2292,9 @@ AC_ARG_ENABLE(gpujpeg_to_dxt, if test $gpujpeg_to_dxt_req != no -a $FOUND_CUDA = yes -a \ "$found_gpujpeg" = yes then - DEFINE_CUDA_DXT gpujpeg_to_dxt=yes GPUJPEG_TO_DXT_INC=" $CUDA_INC" - GPUJPEG_TO_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_COMMON_LIB $CUDA_LIB $GPUJPEG_LIB" + GPUJPEG_TO_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_LIB $GPUJPEG_LIB" GPUJPEG_TO_DXT_OBJ="src/video_decompress/gpujpeg_to_dxt.o $CUDA_COMMON_OBJ $CUDA_DXT_COMMON_OBJ" add_module vdecompress_gpujpeg_to_dxt "$GPUJPEG_TO_DXT_OBJ" "$GPUJPEG_TO_DXT_LIB" fi @@ -2366,7 +2336,7 @@ if test "$gpustitch_req" != no && test "$FOUND_CUDA" = yes; then gpustitch=yes GPUSTITCH_LIB="-lgpustitch $LIBGPUSTITCH_LIBS" - GPUSTITCH_OBJ="src/video_capture/gpustitch.o src/utils/cuda_pix_conv.$CU_OBJ_SUFFIX $CUDA_COMMON_OBJ" + GPUSTITCH_OBJ="src/video_capture/gpustitch.o src/utils/cuda_pix_conv.o $CUDA_COMMON_OBJ" add_module vidcap_gpustitch "$GPUSTITCH_OBJ" "$GPUSTITCH_LIB" INC="$INC $LIBGPUSTITCH_CFLAGS" @@ -2924,14 +2894,8 @@ AC_ARG_ENABLE(ldgm-gpu, if test $ldgm_gpu_req != no -a $FOUND_CUDA = yes then LDGM_GPU_OBJS="ldgm/src/ldgm-session-gpu.o src/rtp/ldgm_gpu.o $CUDA_COMMON_OBJ" - LDGM_GPU_LIBS="$CUDA_COMMON_LIB $CUDA_LIB" - if test $system = Windows; then - DLL_LIBS="$DLL_LIBS ldgm/src/gpu.dll" - LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.$CU_OBJ_SUFFIX" - LDGM_GPU_LIBS="$LDGM_GPU_LIBS ldgm/src/gpu.$CU_OBJ_SUFFIX" - else - LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.$CU_OBJ_SUFFIX" - fi + LDGM_GPU_LIBS="$CUDA_LIB" + LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.o" add_module ldgm_gpu "$LDGM_GPU_OBJS" "$LDGM_GPU_LIBS" if test $WORD_LEN = 32 -a $system = Linux; then CUDA_FLAGS="$CUDA_FLAGS -Xcompiler -msse2" diff --git a/cuda_dxt/cuda_dxt.cu b/cuda_dxt/cuda_dxt.cu index 22a8c0969..44787382d 100644 --- a/cuda_dxt/cuda_dxt.cu +++ b/cuda_dxt/cuda_dxt.cu @@ -759,7 +759,7 @@ static int dxt_launch(const void * src, void * out, int sx, int sy, cudaStream_t return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0; } -CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out, +int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cuda_wrapper_stream_t str) { // grid and threadblock sizes const dim3 tsiz(64, 1); @@ -779,7 +779,7 @@ CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out, /// @param size_y Height of the input image (must be divisible by 4). /// @param stream CUDA stream to run in, or 0 for default stream. /// @return 0 if OK, nonzero if failed. -CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out, +int cuda_rgb_to_dxt1(const void * src, void * out, int size_x, int size_y, cuda_wrapper_stream_t stream) { return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } @@ -796,7 +796,7 @@ CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out, /// @param size_y Height of the input image (must be divisible by 4). /// @param stream CUDA stream to run in, or 0 for default stream. /// @return 0 if OK, nonzero if failed. -CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out, +int cuda_yuv_to_dxt1(const void * src, void * out, int size_x, int size_y, cuda_wrapper_stream_t stream) { return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } @@ -813,12 +813,12 @@ CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out, /// (Input is read bottom up if negative) /// @param stream CUDA stream to run in, or 0 for default stream. /// @return 0 if OK, nonzero if failed. -CUDA_DLL_API int cuda_rgb_to_dxt6(const void * src, void * out, +int cuda_rgb_to_dxt6(const void * src, void * out, int size_x, int size_y, cuda_wrapper_stream_t stream) { return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } -CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out, +int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cuda_wrapper_stream_t stream) { return dxt_launch(src, out, size_x, size_y, (cudaStream_t) stream); } diff --git a/cuda_dxt/cuda_dxt.h b/cuda_dxt/cuda_dxt.h index c1a3e3858..def965852 100644 --- a/cuda_dxt/cuda_dxt.h +++ b/cuda_dxt/cuda_dxt.h @@ -27,7 +27,7 @@ extern "C" { * @param stream CUDA stream to run in, or 0 for default stream. * @return 0 if OK, nonzero if failed. */ -CUDA_DLL_API int cuda_rgb_to_dxt1 +int cuda_rgb_to_dxt1 ( const void * src, void * out, @@ -51,7 +51,7 @@ CUDA_DLL_API int cuda_rgb_to_dxt1 * @param stream CUDA stream to run in, or 0 for default stream. * @return 0 if OK, nonzero if failed. */ -CUDA_DLL_API int cuda_yuv_to_dxt1 +int cuda_yuv_to_dxt1 ( const void * src, void * out, @@ -74,7 +74,7 @@ CUDA_DLL_API int cuda_yuv_to_dxt1 * @param stream CUDA stream to run in, or 0 for default stream. * @return 0 if OK, nonzero if failed. */ -CUDA_DLL_API int cuda_rgb_to_dxt6 +int cuda_rgb_to_dxt6 ( const void * src, void * out, @@ -83,9 +83,9 @@ CUDA_DLL_API int cuda_rgb_to_dxt6 cuda_wrapper_stream_t stream ); -CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out, +int cuda_yuv_to_dxt6(const void * src, void * out, int size_x, int size_y, cuda_wrapper_stream_t stream); -CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out, +int cuda_yuv422_to_yuv444(const void * src, void * out, int pix_count, cuda_wrapper_stream_t str); #ifdef __cplusplus diff --git a/ldgm/src/gpu.cu b/ldgm/src/gpu.cu index 18225780b..0c5f18408 100644 --- a/ldgm/src/gpu.cu +++ b/ldgm/src/gpu.cu @@ -110,7 +110,7 @@ char *xor_using_sse2 (char *source, char *dest, int packet_size) return dest; } -CUDA_DLL_API void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size) +void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size) { // cudaError_t error; @@ -191,7 +191,7 @@ CUDA_DLL_API void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM, } \ } while(0) -CUDA_DLL_API void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int * error_vec,int * sync_vec,int M,int K,int w_f,int buf_size,int packet_size) +void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int * error_vec,int * sync_vec,int M,int K,int w_f,int buf_size,int packet_size) { diff --git a/ldgm/src/gpu.cuh b/ldgm/src/gpu.cuh index 6c63cf1be..5dff6ab88 100644 --- a/ldgm/src/gpu.cuh +++ b/ldgm/src/gpu.cuh @@ -15,19 +15,9 @@ extern "C" { #endif -#if defined _MSC_VER || defined __MINGW32__ -#ifdef EXPORT_DLL_SYMBOLS -#define CUDA_DLL_API __declspec(dllexport) -#else -#define CUDA_DLL_API __declspec(dllimport) -#endif -#else // other platforms -#define CUDA_DLL_API -#endif +void gpu_encode_upgrade (char* source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size); -CUDA_DLL_API void gpu_encode_upgrade (char* source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size); - -CUDA_DLL_API void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int *, int*,int M,int K,int w_f,int buf_size,int packet_size); +void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int *, int*,int M,int K,int w_f,int buf_size,int packet_size); #ifdef __cplusplus } diff --git a/src/cuda_wrapper.cu b/src/cuda_wrapper.cu index 8ce472bf5..cbefe7f26 100644 --- a/src/cuda_wrapper.cu +++ b/src/cuda_wrapper.cu @@ -10,7 +10,7 @@ * kernels etc.) */ /* - * Copyright (c) 2013-2023 CESNET z.s.p.o. + * Copyright (c) 2013-2024 CESNET z.s.p.o. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -79,32 +79,32 @@ static inline enum cudaMemcpyKind map_cuda_memcpy_kind(int our_kind) { abort(); // should not reach here }; -CUDA_DLL_API int cuda_wrapper_free(void *buffer) +int cuda_wrapper_free(void *buffer) { return map_cuda_error(cudaFree(buffer)); } -CUDA_DLL_API int cuda_wrapper_free_host(void *buffer) +int cuda_wrapper_free_host(void *buffer) { return map_cuda_error(cudaFreeHost(buffer)); } -CUDA_DLL_API int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags) +int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags) { return map_cuda_error(cudaHostAlloc(pHost, size, flags)); } -CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len) +int cuda_wrapper_malloc(void **buffer, size_t data_len) { return map_cuda_error(cudaMalloc(buffer, data_len)); } -CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len) +int cuda_wrapper_malloc_host(void **buffer, size_t data_len) { return map_cuda_error(cudaMallocHost(buffer, data_len)); } -CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src, +int cuda_wrapper_memcpy(void *dst, const void *src, size_t count, int kind) { return map_cuda_error( @@ -112,29 +112,29 @@ CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src, map_cuda_memcpy_kind(kind))); } -CUDA_DLL_API const char *cuda_wrapper_last_error_string(void) +const char *cuda_wrapper_last_error_string(void) { return cudaGetErrorString(cudaGetLastError()); } -CUDA_DLL_API int cuda_wrapper_get_last_error(void) +int cuda_wrapper_get_last_error(void) { return map_cuda_error(cudaGetLastError()); } -CUDA_DLL_API const char *cuda_wrapper_get_error_string(int error) +const char *cuda_wrapper_get_error_string(int error) { return map_error_string(error); } -CUDA_DLL_API int cuda_wrapper_set_device(int index) +int cuda_wrapper_set_device(int index) { return map_cuda_error( cudaSetDevice(index)); } /// adapted from gpujpeg_print_devices_info() -CUDA_DLL_API void cuda_wrapper_print_devices_info(void) +void cuda_wrapper_print_devices_info(void) { int device_count = 0; if (cudaGetDeviceCount(&device_count) != cudaSuccess) { diff --git a/src/cuda_wrapper.h b/src/cuda_wrapper.h index 61f46596e..ff6fc2b67 100644 --- a/src/cuda_wrapper.h +++ b/src/cuda_wrapper.h @@ -3,7 +3,7 @@ * @author Martin Pulec */ /* - * Copyright (c) 2013-2023 CESNET z.s.p.o. + * Copyright (c) 2013-2024 CESNET z.s.p.o. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,16 +44,6 @@ extern "C" { #endif // __cplusplus -#if defined _MSC_VER || defined __MINGW32__ -#ifdef EXPORT_DLL_SYMBOLS -#define CUDA_DLL_API __declspec(dllexport) -#else -#define CUDA_DLL_API __declspec(dllimport) -#endif -#else // other platforms -#define CUDA_DLL_API -#endif - /// @{ #define CUDA_WRAPPER_SUCCESS 0 /// @} @@ -65,18 +55,18 @@ extern "C" { typedef void *cuda_wrapper_stream_t; -CUDA_DLL_API int cuda_wrapper_free(void *buffer); -CUDA_DLL_API int cuda_wrapper_free_host(void *buffer); -CUDA_DLL_API int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags); -CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len); -CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len); -CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src, +int cuda_wrapper_free(void *buffer); +int cuda_wrapper_free_host(void *buffer); +int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags); +int cuda_wrapper_malloc(void **buffer, size_t data_len); +int cuda_wrapper_malloc_host(void **buffer, size_t data_len); +int cuda_wrapper_memcpy(void *dst, const void *src, size_t count, int kind); -CUDA_DLL_API const char *cuda_wrapper_last_error_string(void); -CUDA_DLL_API int cuda_wrapper_set_device(int index); -CUDA_DLL_API int cuda_wrapper_get_last_error(void); -CUDA_DLL_API const char * cuda_wrapper_get_error_string(int error); -CUDA_DLL_API void cuda_wrapper_print_devices_info(void); +const char *cuda_wrapper_last_error_string(void); +int cuda_wrapper_set_device(int index); +int cuda_wrapper_get_last_error(void); +const char * cuda_wrapper_get_error_string(int error); +void cuda_wrapper_print_devices_info(void); #ifdef __cplusplus }