mirror of
https://github.com/outbackdingo/UltraGrid.git
synced 2026-03-21 05:40:18 +00:00
build: link Window CUDA code directly
The workaround with linked DLLs is no longer required!
This commit is contained in:
@@ -284,12 +284,6 @@ POSTPROCESS_DEPS = \
|
||||
$(CUDA_COMPILER) $(CUDA_FLAGS) $(CUDA_INC) -c $< -o $@
|
||||
@$(REAL_CUDA_COMPILER) $(CUDA_FLAGS) $(CUDA_INC) -M $< > $*.d
|
||||
$(POSTPROCESS_DEPS)
|
||||
%.lib: %.cu
|
||||
$(MKDIR_P) $(dir $@)
|
||||
$(CUDA_COMPILER) $(CUDA_FLAGS) -DEXPORT_DLL_SYMBOLS $(CUDA_INC) --shared $< -o $*.dll
|
||||
@$(REAL_CUDA_COMPILER) $(CUDA_FLAGS) -DEXPORT_DLL_SYMBOLS $(CUDA_INC) -M --shared $< > $*.d
|
||||
touch $@
|
||||
$(POSTPROCESS_DEPS)
|
||||
|
||||
src/libavcodec/from_lavc_vid_conv.o: src/libavcodec/from_lavc_vid_conv.c
|
||||
$(MKDIR_P) $(dir $@)
|
||||
|
||||
52
configure.ac
52
configure.ac
@@ -195,12 +195,6 @@ then
|
||||
CFLAGS="$CFLAGS -fPIC"
|
||||
CXXFLAGS="$CXXFLAGS -fPIC"
|
||||
CUDA_FLAGS="$CUDA_FLAGS -Xcompiler -fPIC"
|
||||
CU_OBJ_SUFFIX="o"
|
||||
else # Windows
|
||||
if test $WORD_LEN = 32; then
|
||||
CUDA_FLAGS="-m32"
|
||||
fi
|
||||
CU_OBJ_SUFFIX="lib"
|
||||
fi
|
||||
AC_SUBST(CUDA_FLAGS)
|
||||
|
||||
@@ -574,18 +568,9 @@ fi
|
||||
|
||||
if test "$FOUND_CUDA" = yes; then
|
||||
AC_DEFINE([HAVE_CUDA], [1], [CUDA is present on the system])
|
||||
if test $system = Windows; then
|
||||
# It is twice by intent - as an object to force dependency
|
||||
# (== build the object), as a lib to be at the end of the
|
||||
# link list (it is a library) in MSW. TODO: make it prettier
|
||||
# somehow. The same applies also for other CUDA objs/libs.
|
||||
CUDA_COMMON_OBJ=src/cuda_wrapper.$CU_OBJ_SUFFIX
|
||||
CUDA_COMMON_LIB=src/cuda_wrapper.$CU_OBJ_SUFFIX
|
||||
DLL_LIBS="$DLL_LIBS src/cuda_wrapper.dll"
|
||||
else
|
||||
OBJS="$OBJS src/cuda_wrapper.o"
|
||||
if test $system != Windows; then
|
||||
LIBS="$LIBS $CUDA_LIB"
|
||||
OBJS="$OBJS src/cuda_wrapper.$CU_OBJ_SUFFIX"
|
||||
CUDA_COMMON_LIB=
|
||||
POST_COMPILE_MSG="$POST_COMPILE_MSG\n***\nYou have compiled in sort of CUDA code.\nIn order to use use it compression and \
|
||||
decompression, you will need to have CUDA libraries visible to your OS.\n\
|
||||
If not done so, you can accomplish this by adding line:\n\
|
||||
@@ -2274,20 +2259,6 @@ ENSURE_FEATURE_PRESENT([$gpujpeg_req], [$gpujpeg], [GPUJPEG not found])
|
||||
|
||||
cuda_dxt=no
|
||||
|
||||
AC_DEFUN([DEFINE_CUDA_DXT], [
|
||||
if test -z "$included_shared_cuda_dxt_cu"; then
|
||||
if test $system = Windows; then
|
||||
CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX"
|
||||
CUDA_DXT_COMMON_LIB="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX"
|
||||
DLL_LIBS="$DLL_LIBS cuda_dxt/cuda_dxt.dll"
|
||||
else
|
||||
CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.$CU_OBJ_SUFFIX"
|
||||
CUDA_DXT_COMMON_LIB=
|
||||
fi
|
||||
included_shared_cuda_dxt_cu=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE(cuda-dxt,
|
||||
[ --disable-cuda-dxt disable CUDA DXT compression (auto)]
|
||||
[ Requires: CUDA],
|
||||
@@ -2295,13 +2266,13 @@ AC_ARG_ENABLE(cuda-dxt,
|
||||
[cuda_dxt_req=$build_default])
|
||||
|
||||
LIBS=$SAVED_LIBS
|
||||
CUDA_DXT_COMMON_OBJ="cuda_dxt/cuda_dxt.o"
|
||||
|
||||
if test "$cuda_dxt_req" != no -a $FOUND_CUDA = yes
|
||||
then
|
||||
cuda_dxt=yes
|
||||
|
||||
DEFINE_CUDA_DXT
|
||||
CUDA_DXT_LIB="$CUDA_COMMON_LIB $CUDA_DXT_COMMON_LIB $CUDA_LIB"
|
||||
CUDA_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_LIB"
|
||||
CUDA_DXT_OBJ="src/video_compress/cuda_dxt.o $CUDA_DXT_COMMON_OBJ $CUDA_COMMON_OBJ"
|
||||
add_module vcompress_cuda_dxt "$CUDA_DXT_OBJ" "$CUDA_DXT_LIB"
|
||||
fi
|
||||
@@ -2321,10 +2292,9 @@ AC_ARG_ENABLE(gpujpeg_to_dxt,
|
||||
if test $gpujpeg_to_dxt_req != no -a $FOUND_CUDA = yes -a \
|
||||
"$found_gpujpeg" = yes
|
||||
then
|
||||
DEFINE_CUDA_DXT
|
||||
gpujpeg_to_dxt=yes
|
||||
GPUJPEG_TO_DXT_INC=" $CUDA_INC"
|
||||
GPUJPEG_TO_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_COMMON_LIB $CUDA_LIB $GPUJPEG_LIB"
|
||||
GPUJPEG_TO_DXT_LIB="$CUDA_DXT_COMMON_LIB $CUDA_LIB $GPUJPEG_LIB"
|
||||
GPUJPEG_TO_DXT_OBJ="src/video_decompress/gpujpeg_to_dxt.o $CUDA_COMMON_OBJ $CUDA_DXT_COMMON_OBJ"
|
||||
add_module vdecompress_gpujpeg_to_dxt "$GPUJPEG_TO_DXT_OBJ" "$GPUJPEG_TO_DXT_LIB"
|
||||
fi
|
||||
@@ -2366,7 +2336,7 @@ if test "$gpustitch_req" != no && test "$FOUND_CUDA" = yes; then
|
||||
gpustitch=yes
|
||||
|
||||
GPUSTITCH_LIB="-lgpustitch $LIBGPUSTITCH_LIBS"
|
||||
GPUSTITCH_OBJ="src/video_capture/gpustitch.o src/utils/cuda_pix_conv.$CU_OBJ_SUFFIX $CUDA_COMMON_OBJ"
|
||||
GPUSTITCH_OBJ="src/video_capture/gpustitch.o src/utils/cuda_pix_conv.o $CUDA_COMMON_OBJ"
|
||||
add_module vidcap_gpustitch "$GPUSTITCH_OBJ" "$GPUSTITCH_LIB"
|
||||
|
||||
INC="$INC $LIBGPUSTITCH_CFLAGS"
|
||||
@@ -2924,14 +2894,8 @@ AC_ARG_ENABLE(ldgm-gpu,
|
||||
if test $ldgm_gpu_req != no -a $FOUND_CUDA = yes
|
||||
then
|
||||
LDGM_GPU_OBJS="ldgm/src/ldgm-session-gpu.o src/rtp/ldgm_gpu.o $CUDA_COMMON_OBJ"
|
||||
LDGM_GPU_LIBS="$CUDA_COMMON_LIB $CUDA_LIB"
|
||||
if test $system = Windows; then
|
||||
DLL_LIBS="$DLL_LIBS ldgm/src/gpu.dll"
|
||||
LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.$CU_OBJ_SUFFIX"
|
||||
LDGM_GPU_LIBS="$LDGM_GPU_LIBS ldgm/src/gpu.$CU_OBJ_SUFFIX"
|
||||
else
|
||||
LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.$CU_OBJ_SUFFIX"
|
||||
fi
|
||||
LDGM_GPU_LIBS="$CUDA_LIB"
|
||||
LDGM_GPU_OBJS="$LDGM_GPU_OBJS ldgm/src/gpu.o"
|
||||
add_module ldgm_gpu "$LDGM_GPU_OBJS" "$LDGM_GPU_LIBS"
|
||||
if test $WORD_LEN = 32 -a $system = Linux; then
|
||||
CUDA_FLAGS="$CUDA_FLAGS -Xcompiler -msse2"
|
||||
|
||||
@@ -759,7 +759,7 @@ static int dxt_launch(const void * src, void * out, int sx, int sy, cudaStream_t
|
||||
return cudaSuccess != cudaStreamSynchronize(str) ? -3 : 0;
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
|
||||
int cuda_yuv422_to_yuv444(const void * src, void * out,
|
||||
int pix_count, cuda_wrapper_stream_t str) {
|
||||
// grid and threadblock sizes
|
||||
const dim3 tsiz(64, 1);
|
||||
@@ -779,7 +779,7 @@ CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
|
||||
/// @param size_y Height of the input image (must be divisible by 4).
|
||||
/// @param stream CUDA stream to run in, or 0 for default stream.
|
||||
/// @return 0 if OK, nonzero if failed.
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out,
|
||||
int cuda_rgb_to_dxt1(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<false, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
@@ -796,7 +796,7 @@ CUDA_DLL_API int cuda_rgb_to_dxt1(const void * src, void * out,
|
||||
/// @param size_y Height of the input image (must be divisible by 4).
|
||||
/// @param stream CUDA stream to run in, or 0 for default stream.
|
||||
/// @return 0 if OK, nonzero if failed.
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out,
|
||||
int cuda_yuv_to_dxt1(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<true, 1>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
@@ -813,12 +813,12 @@ CUDA_DLL_API int cuda_yuv_to_dxt1(const void * src, void * out,
|
||||
/// (Input is read bottom up if negative)
|
||||
/// @param stream CUDA stream to run in, or 0 for default stream.
|
||||
/// @return 0 if OK, nonzero if failed.
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt6(const void * src, void * out,
|
||||
int cuda_rgb_to_dxt6(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<false, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
|
||||
int cuda_yuv_to_dxt6(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream) {
|
||||
return dxt_launch<true, 6>(src, out, size_x, size_y, (cudaStream_t) stream);
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ extern "C" {
|
||||
* @param stream CUDA stream to run in, or 0 for default stream.
|
||||
* @return 0 if OK, nonzero if failed.
|
||||
*/
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt1
|
||||
int cuda_rgb_to_dxt1
|
||||
(
|
||||
const void * src,
|
||||
void * out,
|
||||
@@ -51,7 +51,7 @@ CUDA_DLL_API int cuda_rgb_to_dxt1
|
||||
* @param stream CUDA stream to run in, or 0 for default stream.
|
||||
* @return 0 if OK, nonzero if failed.
|
||||
*/
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt1
|
||||
int cuda_yuv_to_dxt1
|
||||
(
|
||||
const void * src,
|
||||
void * out,
|
||||
@@ -74,7 +74,7 @@ CUDA_DLL_API int cuda_yuv_to_dxt1
|
||||
* @param stream CUDA stream to run in, or 0 for default stream.
|
||||
* @return 0 if OK, nonzero if failed.
|
||||
*/
|
||||
CUDA_DLL_API int cuda_rgb_to_dxt6
|
||||
int cuda_rgb_to_dxt6
|
||||
(
|
||||
const void * src,
|
||||
void * out,
|
||||
@@ -83,9 +83,9 @@ CUDA_DLL_API int cuda_rgb_to_dxt6
|
||||
cuda_wrapper_stream_t stream
|
||||
);
|
||||
|
||||
CUDA_DLL_API int cuda_yuv_to_dxt6(const void * src, void * out,
|
||||
int cuda_yuv_to_dxt6(const void * src, void * out,
|
||||
int size_x, int size_y, cuda_wrapper_stream_t stream);
|
||||
CUDA_DLL_API int cuda_yuv422_to_yuv444(const void * src, void * out,
|
||||
int cuda_yuv422_to_yuv444(const void * src, void * out,
|
||||
int pix_count, cuda_wrapper_stream_t str);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -110,7 +110,7 @@ char *xor_using_sse2 (char *source, char *dest, int packet_size)
|
||||
return dest;
|
||||
}
|
||||
|
||||
CUDA_DLL_API void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size)
|
||||
void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size)
|
||||
{
|
||||
|
||||
// cudaError_t error;
|
||||
@@ -191,7 +191,7 @@ CUDA_DLL_API void gpu_encode_upgrade (char * source_data,int *OUTBUF, int * PCM,
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
CUDA_DLL_API void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int * error_vec,int * sync_vec,int M,int K,int w_f,int buf_size,int packet_size)
|
||||
void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int * error_vec,int * sync_vec,int M,int K,int w_f,int buf_size,int packet_size)
|
||||
{
|
||||
|
||||
|
||||
|
||||
@@ -15,19 +15,9 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined _MSC_VER || defined __MINGW32__
|
||||
#ifdef EXPORT_DLL_SYMBOLS
|
||||
#define CUDA_DLL_API __declspec(dllexport)
|
||||
#else
|
||||
#define CUDA_DLL_API __declspec(dllimport)
|
||||
#endif
|
||||
#else // other platforms
|
||||
#define CUDA_DLL_API
|
||||
#endif
|
||||
void gpu_encode_upgrade (char* source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size);
|
||||
|
||||
CUDA_DLL_API void gpu_encode_upgrade (char* source_data,int *OUTBUF, int * PCM,int param_k,int param_m,int w_f,int packet_size ,int buf_size);
|
||||
|
||||
CUDA_DLL_API void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int *, int*,int M,int K,int w_f,int buf_size,int packet_size);
|
||||
void gpu_decode_upgrade(char *data, int * PCM,int* SYNC_VEC,int* ERROR_VEC, int not_done, int *frame_size,int *, int*,int M,int K,int w_f,int buf_size,int packet_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
* kernels etc.)
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013-2023 CESNET z.s.p.o.
|
||||
* Copyright (c) 2013-2024 CESNET z.s.p.o.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -79,32 +79,32 @@ static inline enum cudaMemcpyKind map_cuda_memcpy_kind(int our_kind) {
|
||||
abort(); // should not reach here
|
||||
};
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_free(void *buffer)
|
||||
int cuda_wrapper_free(void *buffer)
|
||||
{
|
||||
return map_cuda_error(cudaFree(buffer));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_free_host(void *buffer)
|
||||
int cuda_wrapper_free_host(void *buffer)
|
||||
{
|
||||
return map_cuda_error(cudaFreeHost(buffer));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags)
|
||||
int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags)
|
||||
{
|
||||
return map_cuda_error(cudaHostAlloc(pHost, size, flags));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len)
|
||||
int cuda_wrapper_malloc(void **buffer, size_t data_len)
|
||||
{
|
||||
return map_cuda_error(cudaMalloc(buffer, data_len));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len)
|
||||
int cuda_wrapper_malloc_host(void **buffer, size_t data_len)
|
||||
{
|
||||
return map_cuda_error(cudaMallocHost(buffer, data_len));
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
|
||||
int cuda_wrapper_memcpy(void *dst, const void *src,
|
||||
size_t count, int kind)
|
||||
{
|
||||
return map_cuda_error(
|
||||
@@ -112,29 +112,29 @@ CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
|
||||
map_cuda_memcpy_kind(kind)));
|
||||
}
|
||||
|
||||
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void)
|
||||
const char *cuda_wrapper_last_error_string(void)
|
||||
{
|
||||
return cudaGetErrorString(cudaGetLastError());
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_get_last_error(void)
|
||||
int cuda_wrapper_get_last_error(void)
|
||||
{
|
||||
return map_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
CUDA_DLL_API const char *cuda_wrapper_get_error_string(int error)
|
||||
const char *cuda_wrapper_get_error_string(int error)
|
||||
{
|
||||
return map_error_string(error);
|
||||
}
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_set_device(int index)
|
||||
int cuda_wrapper_set_device(int index)
|
||||
{
|
||||
return map_cuda_error(
|
||||
cudaSetDevice(index));
|
||||
}
|
||||
|
||||
/// adapted from gpujpeg_print_devices_info()
|
||||
CUDA_DLL_API void cuda_wrapper_print_devices_info(void)
|
||||
void cuda_wrapper_print_devices_info(void)
|
||||
{
|
||||
int device_count = 0;
|
||||
if (cudaGetDeviceCount(&device_count) != cudaSuccess) {
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* @author Martin Pulec <pulec@cesnet.cz>
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013-2023 CESNET z.s.p.o.
|
||||
* Copyright (c) 2013-2024 CESNET z.s.p.o.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,16 +44,6 @@
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
#if defined _MSC_VER || defined __MINGW32__
|
||||
#ifdef EXPORT_DLL_SYMBOLS
|
||||
#define CUDA_DLL_API __declspec(dllexport)
|
||||
#else
|
||||
#define CUDA_DLL_API __declspec(dllimport)
|
||||
#endif
|
||||
#else // other platforms
|
||||
#define CUDA_DLL_API
|
||||
#endif
|
||||
|
||||
/// @{
|
||||
#define CUDA_WRAPPER_SUCCESS 0
|
||||
/// @}
|
||||
@@ -65,18 +55,18 @@ extern "C" {
|
||||
|
||||
typedef void *cuda_wrapper_stream_t;
|
||||
|
||||
CUDA_DLL_API int cuda_wrapper_free(void *buffer);
|
||||
CUDA_DLL_API int cuda_wrapper_free_host(void *buffer);
|
||||
CUDA_DLL_API int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags);
|
||||
CUDA_DLL_API int cuda_wrapper_malloc(void **buffer, size_t data_len);
|
||||
CUDA_DLL_API int cuda_wrapper_malloc_host(void **buffer, size_t data_len);
|
||||
CUDA_DLL_API int cuda_wrapper_memcpy(void *dst, const void *src,
|
||||
int cuda_wrapper_free(void *buffer);
|
||||
int cuda_wrapper_free_host(void *buffer);
|
||||
int cuda_wrapper_host_alloc(void **pHost, size_t size, unsigned int flags);
|
||||
int cuda_wrapper_malloc(void **buffer, size_t data_len);
|
||||
int cuda_wrapper_malloc_host(void **buffer, size_t data_len);
|
||||
int cuda_wrapper_memcpy(void *dst, const void *src,
|
||||
size_t count, int kind);
|
||||
CUDA_DLL_API const char *cuda_wrapper_last_error_string(void);
|
||||
CUDA_DLL_API int cuda_wrapper_set_device(int index);
|
||||
CUDA_DLL_API int cuda_wrapper_get_last_error(void);
|
||||
CUDA_DLL_API const char * cuda_wrapper_get_error_string(int error);
|
||||
CUDA_DLL_API void cuda_wrapper_print_devices_info(void);
|
||||
const char *cuda_wrapper_last_error_string(void);
|
||||
int cuda_wrapper_set_device(int index);
|
||||
int cuda_wrapper_get_last_error(void);
|
||||
const char * cuda_wrapper_get_error_string(int error);
|
||||
void cuda_wrapper_print_devices_info(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user