build: link Window CUDA code directly

The workaround with linked DLLs is no longer required!
This commit is contained in:
Martin Pulec
2024-03-05 15:14:12 +01:00
parent 448c19f69b
commit a4c22c93fa
8 changed files with 46 additions and 108 deletions

View File

@@ -284,12 +284,6 @@ POSTPROCESS_DEPS = \
$(CUDA_COMPILER) $(CUDA_FLAGS) $(CUDA_INC) -c $< -o $@
@$(REAL_CUDA_COMPILER) $(CUDA_FLAGS) $(CUDA_INC) -M $< > $*.d
$(POSTPROCESS_DEPS)
%.lib: %.cu
$(MKDIR_P) $(dir $@)
$(CUDA_COMPILER) $(CUDA_FLAGS) -DEXPORT_DLL_SYMBOLS $(CUDA_INC) --shared $< -o $*.dll
@$(REAL_CUDA_COMPILER) $(CUDA_FLAGS) -DEXPORT_DLL_SYMBOLS $(CUDA_INC) -M --shared $< > $*.d
touch $@
$(POSTPROCESS_DEPS)
src/libavcodec/from_lavc_vid_conv.o: src/libavcodec/from_lavc_vid_conv.c
$(MKDIR_P) $(dir $@)

View File

@@ -195,12 +195,6 @@ then
CFLAGS="$CFLAGS -fPIC"
CXXFLAGS="$CXXFLAGS -fPIC"
CUDA_FLAGS="$CUDA_FLAGS -Xcompiler -fPIC"
CU_OBJ_SUFFIX="o"
else # Windows
if test $WORD_LEN = 32; then
CUDA_FLAGS="-m32"
fi
CU_OBJ_SUFFIX="lib"
fi
AC_SUBST(CUDA_FLAGS)
@@ -574,18 +568,9 @@ fi
if test "$FOUND_CUDA" = yes; then
AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system])
if test $system = Windows; then
# It is twice by intent - as an object to force dependency
# (== build the object), as a lib to be at the end of the
# link list (it is a library) in MSW. TODO: make it prettier
# somehow. The same applies also for other CUDA objs/libs.
CUDA_COMMON_OBJ=src/cuda_wrapper.$CU_OBJ_SUFFIX
CUDA_COMMON_LIB=src/cuda_wrapper.$CU_OBJ_SUFFIX
DLL_LIBS="$DLL_LIBS src/cuda_wrapper.dll"
else
OBJS="$OBJS src/cuda_wrapper.o"
if test $system != Windows; then
LIBS="$LIBS $CUDA_LIB"
OBJS="$OBJS src/cuda_wrapper.$CU_OBJ_SUFFIX"
CUDA_COMMON_LIB=
POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in sort of CUDA code.\nIn order to use use it compression and \
decompression, you will need to have CUDA libraries visible to your OS.\n\
If not done so, you can accomplish this by adding line:\n\
@@ -2274,20 +2259,6 @@ ENSURE_FEATURE_PRESENT([$gpujpeg_req], [$gpujpeg], [GPUJPEG not found])
cuda_dxt=no
AC_DEFUN([DEFINE_CUDA_DXT], [
if test -z "$included_shared_cuda_dxt_cu"; then
if test $system = Windows; then
CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX"
CUDA_DXT_COMMON_LIB="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX"
DLL_LIBS="$DLL_LIBS cuda_dxt/cuda_dxt.dll"
else
CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX"
CUDA_DXT_COMMON_LIB=
fi
included_shared_cuda_dxt_cu=yes
fi
])
AC_ARG_ENABLE(cuda-dxt,
[ --disable-cuda-dxt disable CUDA DXT compression (auto)]
[ Requires: CUDA],
@@ -2295,13 +2266,13 @@ AC_ARG_ENABLE(cuda-dxt,
[cuda_dxt_req=$build_default])
LIBS=$SAVED_LIBS
CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.o"
if test "$cuda_dxt_req" != no -a $FOUND_CUDA = yes
then
cuda_dxt=yes
DEFINE_CUDA_DXT
CUDA_DXT_LIB="$CUDA_COMMON_LIB $CUDA_DXT_COMMON_LIB $CUDA_LIB"
CUDA_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_LIB"
CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o $CUDA_DXT_COMMON_OBJ $CUDA_COMMON_OBJ"
add_module vcompress_cuda_dxt "$CUDA_DXT_OBJ" "$CUDA_DXT_LIB"
fi
@@ -2321,10 +2292,9 @@ AC_ARG_ENABLE(gpujpeg_to_dxt,
if test $gpujpeg_to_dxt_req != no -a $FOUND_CUDA = yes -a \
"$found_gpujpeg" = yes
then
DEFINE_CUDA_DXT
gpujpeg_to_dxt=yes
GPUJPEG_TO_DXT_INC=" $CUDA_INC"
GPUJPEG_TO_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_COMMON_LIB $CUDA_LIB $GPUJPEG_LIB"
GPUJPEG_TO_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_LIB $GPUJPEG_LIB"
GPUJPEG_TO_DXT_OBJ="src/video_decompress/gpujpeg_to_dxt.o $CUDA_COMMON_OBJ $CUDA_DXT_COMMON_OBJ"
add_module vdecompress_gpujpeg_to_dxt "$GPUJPEG_TO_DXT_OBJ" "$GPUJPEG_TO_DXT_LIB"
fi
@@ -2366,7 +2336,7 @@ if test "$gpustitch_req" != no && test "$FOUND_CUDA" = yes; then
gpustitch=yes
GPUSTITCH_LIB="-lgpustitch $LIBGPUSTITCH_LIBS"
GPUSTITCH_OBJ="src/video_capture/gpustitch.o src/utils/cuda_pix_conv.$CU_OBJ_SUFFIX $CUDA_COMMON_OBJ"
GPUSTITCH_OBJ="src/video_capture/gpustitch.o src/utils/cuda_pix_conv.o $CUDA_COMMON_OBJ"
add_module vidcap_gpustitch "$GPUSTITCH_OBJ" "$GPUSTITCH_LIB"
INC="$INC $LIBGPUSTITCH_CFLAGS"
@@ -2924,14 +2894,8 @@ AC_ARG_ENABLE(ldgm-gpu,
if test $ldgm_gpu_req != no -a $FOUND_CUDA = yes
then
LDGM_GPU_OBJS="ldgm/src/ldgm-session-gpu.o src/rtp/ldgm_gpu.o $CUDA_COMMON_OBJ"
LDGM_GPU_LIBS="$CUDA_COMMON_LIB $CUDA_LIB"
if test $system = Windows; then
DLL_LIBS="$DLL_LIBS ldgm/src/gpu.dll"
LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.$CU_OBJ_SUFFIX"
LDGM_GPU_LIBS="$LDGM_GPU_LIBS ldgm/src/gpu.$CU_OBJ_SUFFIX"
else
LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.$CU_OBJ_SUFFIX"
fi
LDGM_GPU_LIBS="$CUDA_LIB"
LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.o"
add_module ldgm_gpu "$LDGM_GPU_OBJS" "$LDGM_GPU_LIBS"
if test $WORD_LEN = 32 -a $system = Linux; then
CUDA_FLAGS="$CUDA_FLAGS -Xcompiler -msse2"

View File

@@ -759,7 +759,7 @@ static int dxt_launch(const void * src, void * out, int sx, int sy, cudaStream_t
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
}
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
int cuda_yuv422_to_yuv444(const void * src, void * out,
int pix_count, cuda_wrapper_stream_t str) {
// grid and threadblock sizes
const dim3 tsiz(64, 1);
@@ -779,7 +779,7 @@ CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
/// @param size_y Height of the input image (must be divisible by 4).
/// @param stream CUDA stream to run in, or 0 for default stream.
/// @return 0 if OK, nonzero if failed.
CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out,
int cuda_rgb_to_dxt1(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<false, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
}
@@ -796,7 +796,7 @@ CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out,
/// @param size_y Height of the input image (must be divisible by 4).
/// @param stream CUDA stream to run in, or 0 for default stream.
/// @return 0 if OK, nonzero if failed.
CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out,
int cuda_yuv_to_dxt1(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<true, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
}
@@ -813,12 +813,12 @@ CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out,
/// (Input is read bottom up if negative)
/// @param stream CUDA stream to run in, or 0 for default stream.
/// @return 0 if OK, nonzero if failed.
CUDA_DLL_API int cuda_rgb_to_dxt6(const void * src, void * out,
int cuda_rgb_to_dxt6(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<false, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
}
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
int cuda_yuv_to_dxt6(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream) {
return dxt_launch<true, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
}

View File

@@ -27,7 +27,7 @@ extern "C" {
* @param stream CUDA stream to run in, or 0 for default stream.
* @return 0 if OK, nonzero if failed.
*/
CUDA_DLL_API int cuda_rgb_to_dxt1
int cuda_rgb_to_dxt1
(
const void * src,
void * out,
@@ -51,7 +51,7 @@ CUDA_DLL_API int cuda_rgb_to_dxt1
* @param stream CUDA stream to run in, or 0 for default stream.
* @return 0 if OK, nonzero if failed.
*/
CUDA_DLL_API int cuda_yuv_to_dxt1
int cuda_yuv_to_dxt1
(
const void * src,
void * out,
@@ -74,7 +74,7 @@ CUDA_DLL_API int cuda_yuv_to_dxt1
* @param stream CUDA stream to run in, or 0 for default stream.
* @return 0 if OK, nonzero if failed.
*/
CUDA_DLL_API int cuda_rgb_to_dxt6
int cuda_rgb_to_dxt6
(
const void * src,
void * out,
@@ -83,9 +83,9 @@ CUDA_DLL_API int cuda_rgb_to_dxt6
cuda_wrapper_stream_t stream
);
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
int cuda_yuv_to_dxt6(const void * src, void * out,
int size_x, int size_y, cuda_wrapper_stream_t stream);
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
int cuda_yuv422_to_yuv444(const void * src, void * out,
int pix_count, cuda_wrapper_stream_t str);
#ifdef __cplusplus

View File

@@ -110,7 +110,7 @@ char *xor_using_sse2 (char *source, char *dest, int packet_size)
return dest;
}
CUDA_DLL_API void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size)
void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size)
{
// cudaError_t error;
@@ -191,7 +191,7 @@ CUDA_DLL_API void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,
} \
} while(0)
CUDA_DLL_API void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int * error_vec,int * sync_vec,int M,int K,int w_f,int buf_size,int packet_size)
void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int * error_vec,int * sync_vec,int M,int K,int w_f,int buf_size,int packet_size)
{

View File

@@ -15,19 +15,9 @@
extern "C" {
#endif
#if defined _MSC_VER || defined __MINGW32__
#ifdef EXPORT_DLL_SYMBOLS
#define CUDA_DLL_API __declspec(dllexport)
#else
#define CUDA_DLL_API __declspec(dllimport)
#endif
#else // other platforms
#define CUDA_DLL_API
#endif
void gpu_encode_upgrade (char* source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size);
CUDA_DLL_API void gpu_encode_upgrade (char* source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size);
CUDA_DLL_API void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int *, int*,int M,int K,int w_f,int buf_size,int packet_size);
void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int *, int*,int M,int K,int w_f,int buf_size,int packet_size);
#ifdef __cplusplus
}

View File

@@ -10,7 +10,7 @@
* kernels etc.)
*/
/*
* Copyright (c) 2013-2023 CESNET z.s.p.o.
* Copyright (c) 2013-2024 CESNET z.s.p.o.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -79,32 +79,32 @@ static inline enum cudaMemcpyKind map_cuda_memcpy_kind(int our_kind) {
abort(); // should not reach here
};
CUDA_DLL_API int cuda_wrapper_free(void *buffer)
int cuda_wrapper_free(void *buffer)
{
return map_cuda_error(cudaFree(buffer));
}
CUDA_DLL_API int cuda_wrapper_free_host(void *buffer)
int cuda_wrapper_free_host(void *buffer)
{
return map_cuda_error(cudaFreeHost(buffer));
}
CUDA_DLL_API int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags)
int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags)
{
return map_cuda_error(cudaHostAlloc(pHost, size, flags));
}
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len)
int cuda_wrapper_malloc(void **buffer, size_t data_len)
{
return map_cuda_error(cudaMalloc(buffer, data_len));
}
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len)
int cuda_wrapper_malloc_host(void **buffer, size_t data_len)
{
return map_cuda_error(cudaMallocHost(buffer, data_len));
}
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
int cuda_wrapper_memcpy(void *dst, const void *src,
size_t count, int kind)
{
return map_cuda_error(
@@ -112,29 +112,29 @@ CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
map_cuda_memcpy_kind(kind)));
}
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void)
const char *cuda_wrapper_last_error_string(void)
{
return cudaGetErrorString(cudaGetLastError());
}
CUDA_DLL_API int cuda_wrapper_get_last_error(void)
int cuda_wrapper_get_last_error(void)
{
return map_cuda_error(cudaGetLastError());
}
CUDA_DLL_API const char *cuda_wrapper_get_error_string(int error)
const char *cuda_wrapper_get_error_string(int error)
{
return map_error_string(error);
}
CUDA_DLL_API int cuda_wrapper_set_device(int index)
int cuda_wrapper_set_device(int index)
{
return map_cuda_error(
cudaSetDevice(index));
}
/// adapted from gpujpeg_print_devices_info()
CUDA_DLL_API void cuda_wrapper_print_devices_info(void)
void cuda_wrapper_print_devices_info(void)
{
int device_count = 0;
if (cudaGetDeviceCount(&device_count) != cudaSuccess) {

View File

@@ -3,7 +3,7 @@
* @author Martin Pulec <pulec@cesnet.cz>
*/
/*
* Copyright (c) 2013-2023 CESNET z.s.p.o.
* Copyright (c) 2013-2024 CESNET z.s.p.o.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,16 +44,6 @@
extern "C" {
#endif // __cplusplus
#if defined _MSC_VER || defined __MINGW32__
#ifdef EXPORT_DLL_SYMBOLS
#define CUDA_DLL_API __declspec(dllexport)
#else
#define CUDA_DLL_API __declspec(dllimport)
#endif
#else // other platforms
#define CUDA_DLL_API
#endif
/// @{
#define CUDA_WRAPPER_SUCCESS 0
/// @}
@@ -65,18 +55,18 @@ extern "C" {
typedef void *cuda_wrapper_stream_t;
CUDA_DLL_API int cuda_wrapper_free(void *buffer);
CUDA_DLL_API int cuda_wrapper_free_host(void *buffer);
CUDA_DLL_API int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags);
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len);
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len);
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
int cuda_wrapper_free(void *buffer);
int cuda_wrapper_free_host(void *buffer);
int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags);
int cuda_wrapper_malloc(void **buffer, size_t data_len);
int cuda_wrapper_malloc_host(void **buffer, size_t data_len);
int cuda_wrapper_memcpy(void *dst, const void *src,
size_t count, int kind);
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void);
CUDA_DLL_API int cuda_wrapper_set_device(int index);
CUDA_DLL_API int cuda_wrapper_get_last_error(void);
CUDA_DLL_API const char * cuda_wrapper_get_error_string(int error);
CUDA_DLL_API void cuda_wrapper_print_devices_info(void);
const char *cuda_wrapper_last_error_string(void);
int cuda_wrapper_set_device(int index);
int cuda_wrapper_get_last_error(void);
const char * cuda_wrapper_get_error_string(int error);
void cuda_wrapper_print_devices_info(void);
#ifdef __cplusplus
}