Skip to content

Commit 625d767

Browse files
authored
Merge pull request #1384 from boostorg/1383
Only enable our CUDA extensions when the user explicitly asks for them
2 parents cfe946f + 1d9e927 commit 625d767

9 files changed

Lines changed: 251 additions & 11 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ jobs:
820820
run: |
821821
cd ../boost-root
822822
mkdir __build__ && cd __build__
823-
cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=86 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 ..
823+
cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCMAKE_CUDA_STANDARD=17 ..
824824
- name: Build tests
825825
run: |
826826
cd ../boost-root/__build__

include/boost/math/special_functions/lanczos.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2751,7 +2751,7 @@ struct lanczos
27512751
} // namespace math
27522752
} // namespace boost
27532753

2754-
#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
2754+
#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
27552755
#if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
27562756
#include <boost/math/special_functions/detail/lanczos_sse2.hpp>
27572757
#endif

include/boost/math/special_functions/next.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include <cfloat>
2626

2727

28-
#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
28+
#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
2929
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
3030
#include "xmmintrin.h"
3131
#define BOOST_MATH_CHECK_SSE2

include/boost/math/special_functions/sign.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#pragma once
1515
#endif
1616

17-
#ifndef __CUDACC_RTC__
17+
#ifndef BOOST_MATH_HAS_NVRTC
1818

1919
#include <boost/math/tools/config.hpp>
2020
#include <boost/math/special_functions/math_fwd.hpp>
@@ -234,7 +234,7 @@ BOOST_MATH_GPU_ENABLED T sign(T z)
234234
} // namespace math
235235
} // namespace boost
236236

237-
#endif // __CUDACC_RTC__
237+
#endif // BOOST_MATH_HAS_NVRTC
238238

239239
#endif // BOOST_MATH_TOOLS_SIGN_HPP
240240

include/boost/math/tools/config.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#pragma once
1212
#endif
1313

14-
#ifndef __CUDACC_RTC__
14+
#if !(defined(__CUDACC_RTC__) && defined(BOOST_MATH_ENABLE_NVRTC))
1515

1616
#include <boost/math/tools/is_standalone.hpp>
1717

@@ -168,7 +168,7 @@
168168
# define BOOST_MATH_NOINLINE __declspec(noinline)
169169
# elif defined(__GNUC__) && __GNUC__ > 3
170170
// Clang also defines __GNUC__ (as 4)
171-
# if defined(__CUDACC__)
171+
# if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA)
172172
// nvcc doesn't always parse __noinline__,
173173
// see: https://svn.boost.org/trac/boost/ticket/9392
174174
# define BOOST_MATH_NOINLINE __attribute__ ((noinline))
@@ -678,7 +678,7 @@ namespace boost{ namespace math{
678678
// CUDA support:
679679
//
680680

681-
#ifdef __CUDACC__
681+
#if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA)
682682

683683
// We have to get our include order correct otherwise you get compilation failures
684684
#include <cuda.h>
@@ -774,7 +774,7 @@ BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_max(const T& a, const T& b) { return
774774
# define BOOST_MATH_STATIC_LOCAL_VARIABLE
775775
# else
776776
# define BOOST_MATH_INLINE_CONSTEXPR constexpr
777-
# define BOOST_MATH_STATIC constexpr
777+
# define BOOST_MATH_STATIC static
778778
# define BOOST_MATH_STATIC_LOCAL_VARIABLE static
779779
# endif
780780
#endif

test/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ if(HAVE_BOOST_TEST)
1010

1111
message(STATUS "Building boost.math with CUDA")
1212

13-
find_package(CUDA REQUIRED)
1413
enable_language(CUDA)
14+
find_package(CUDAToolkit REQUIRED)
1515
set(CMAKE_CUDA_EXTENSIONS OFF)
1616

1717
enable_testing()
1818

19-
boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} )
19+
boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework CUDA::cudart COMPILE_DEFINITIONS BOOST_MATH_ENABLE_CUDA=1 )
2020

2121
elseif (BOOST_MATH_ENABLE_NVRTC)
2222

test/cuda_jamfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ project : requirements
99
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
1010
;
1111

12+
# Github Issues
13+
run github_issue_1383.cu ;
14+
run github_issue_1383_pt_2.cu ;
15+
1216
# Quad
1317
run test_exp_sinh_quad_float.cu ;
1418
run test_exp_sinh_quad_double.cu ;

test/github_issue_1383.cu

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// Copyright John Maddock 2016.
2+
// Copyright Matt Borland 2024 - 2026.
3+
// Use, modification and distribution are subject to the
4+
// Boost Software License, Version 1.0. (See accompanying file
5+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
7+
#ifdef BOOST_MATH_ENABLE_CUDA
8+
# undef BOOST_MATH_ENABLE_CUDA
9+
#endif // BOOST_MATH_ENABLE_CUDA
10+
11+
// Purposefully pull in headers that caused errors in the linked issue
12+
#include <boost/math/special_functions.hpp>
13+
#include <boost/math/tools/config.hpp>
14+
#include <boost/math/tools/numeric_limits.hpp>
15+
#include <boost/math/constants/constants.hpp>
16+
17+
#include <iostream>
18+
#include <iomanip>
19+
#include <vector>
20+
#include <cmath>
21+
#include <random>
22+
#include "cuda_managed_ptr.hpp"
23+
#include "stopwatch.hpp"
24+
25+
#ifdef BOOST_MATH_ENABLE_CUDA
26+
# error "We should not be enabling this ourselves"
27+
#endif // BOOST_MATH_ENABLE_CUDA
28+
29+
// For the CUDA runtime routines (prefixed with "cuda_")
30+
#include <cuda_runtime.h>
31+
32+
typedef double float_type;
33+
34+
/**
35+
* CUDA Kernel Device code
36+
*
37+
*/
38+
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
39+
{
40+
using std::cos;
41+
const int i = blockDim.x * blockIdx.x + threadIdx.x;
42+
43+
if (i < numElements)
44+
{
45+
out[i] = cos(in[i]);
46+
}
47+
}
48+
49+
/**
50+
* Host main routine
51+
*/
52+
int main()
53+
{
54+
// Error code to check return values for CUDA calls
55+
cudaError_t err = cudaSuccess;
56+
57+
// Print the vector length to be used, and compute its size
58+
int numElements = 50000;
59+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
60+
61+
// Allocate the managed input vector A
62+
cuda_managed_ptr<float_type> input_vector(numElements);
63+
64+
// Allocate the managed output vector C
65+
cuda_managed_ptr<float_type> output_vector(numElements);
66+
67+
// Initialize the input vectors
68+
// Check some of our numeric_limits for viability
69+
std::mt19937_64 rng {42};
70+
std::uniform_real_distribution<float_type> dist(0, boost::math::constants::pi<float_type>());
71+
static_assert(boost::math::numeric_limits<float_type>::is_specialized, "Should be since it's a double");
72+
static_assert(boost::math::numeric_limits<float_type>::is_signed, "Should be since it's a double");
73+
74+
for (int i = 0; i < numElements; ++i)
75+
{
76+
input_vector[i] = dist(rng);
77+
}
78+
79+
// Launch the Vector Add CUDA Kernel
80+
int threadsPerBlock = 256;
81+
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
82+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
83+
84+
watch w;
85+
86+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
87+
cudaDeviceSynchronize();
88+
89+
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
90+
91+
err = cudaGetLastError();
92+
93+
if (err != cudaSuccess)
94+
{
95+
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
96+
return EXIT_FAILURE;
97+
}
98+
99+
// Verify that the result vector is correct
100+
std::vector<float_type> results;
101+
results.reserve(numElements);
102+
w.reset();
103+
for(int i = 0; i < numElements; ++i)
104+
results.push_back(std::cos(input_vector[i]));
105+
double t = w.elapsed();
106+
// check the results
107+
for(int i = 0; i < numElements; ++i)
108+
{
109+
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
110+
{
111+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
112+
return EXIT_FAILURE;
113+
}
114+
}
115+
116+
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
117+
std::cout << "Done\n";
118+
119+
return 0;
120+
}

test/github_issue_1383_pt_2.cu

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Copyright John Maddock 2016.
2+
// Copyright Matt Borland 2024 - 2026.
3+
// Use, modification and distribution are subject to the
4+
// Boost Software License, Version 1.0. (See accompanying file
5+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
7+
// Purposefully pull in headers that caused errors in the linked issue
8+
#include <boost/math/special_functions.hpp>
9+
#include <boost/math/tools/config.hpp>
10+
#include <boost/math/tools/numeric_limits.hpp>
11+
#include <boost/math/constants/constants.hpp>
12+
13+
#include <iostream>
14+
#include <iomanip>
15+
#include <vector>
16+
#include <cmath>
17+
#include <random>
18+
#include "cuda_managed_ptr.hpp"
19+
#include "stopwatch.hpp"
20+
21+
// For the CUDA runtime routines (prefixed with "cuda_")
22+
#include <cuda_runtime.h>
23+
24+
typedef double float_type;
25+
26+
/**
27+
* CUDA Kernel Device code
28+
*
29+
*/
30+
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
31+
{
32+
using std::cos;
33+
const int i = blockDim.x * blockIdx.x + threadIdx.x;
34+
35+
if (i < numElements)
36+
{
37+
out[i] = cos(in[i]);
38+
if (out[i] > boost::math::numeric_limits<float_type>::max() || !boost::math::numeric_limits<float_type>::is_signed)
39+
{
40+
__trap();
41+
}
42+
}
43+
}
44+
45+
/**
46+
* Host main routine
47+
*/
48+
int main()
49+
{
50+
// Error code to check return values for CUDA calls
51+
cudaError_t err = cudaSuccess;
52+
53+
// Print the vector length to be used, and compute its size
54+
int numElements = 50000;
55+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
56+
57+
// Allocate the managed input vector A
58+
cuda_managed_ptr<float_type> input_vector(numElements);
59+
60+
// Allocate the managed output vector C
61+
cuda_managed_ptr<float_type> output_vector(numElements);
62+
63+
// Initialize the input vectors
64+
// Check some of our numeric_limits for viability
65+
std::mt19937_64 rng {42};
66+
std::uniform_real_distribution<float_type> dist(0, boost::math::constants::pi<float_type>());
67+
static_assert(boost::math::numeric_limits<float_type>::is_specialized, "Should be since it's a double");
68+
static_assert(boost::math::numeric_limits<float_type>::is_signed, "Should be since it's a double");
69+
70+
for (int i = 0; i < numElements; ++i)
71+
{
72+
input_vector[i] = dist(rng);
73+
}
74+
75+
// Launch the Vector Add CUDA Kernel
76+
int threadsPerBlock = 256;
77+
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
78+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
79+
80+
watch w;
81+
82+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
83+
cudaDeviceSynchronize();
84+
85+
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
86+
87+
err = cudaGetLastError();
88+
89+
if (err != cudaSuccess)
90+
{
91+
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
92+
return EXIT_FAILURE;
93+
}
94+
95+
// Verify that the result vector is correct
96+
std::vector<float_type> results;
97+
results.reserve(numElements);
98+
w.reset();
99+
for(int i = 0; i < numElements; ++i)
100+
results.push_back(std::cos(input_vector[i]));
101+
double t = w.elapsed();
102+
// check the results
103+
for(int i = 0; i < numElements; ++i)
104+
{
105+
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
106+
{
107+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
108+
return EXIT_FAILURE;
109+
}
110+
}
111+
112+
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
113+
std::cout << "Done\n";
114+
115+
return 0;
116+
}

0 commit comments

Comments
 (0)