Advanced Multi-Physics (AMP)
On-Line Documentation
Helper_Cuda.h
Go to the documentation of this file.
1#ifndef AMP_CudaHelpers
2#define AMP_CudaHelpers
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7
8#include "AMP/utils/UtilityMacros.h"
9#include "AMP/utils/cuda/helper_string.h"
10
11#include "StackTrace/source_location.h"
12
13#include <cuda.h>
14#include <cuda_runtime.h>
15
16#define hostDeviceId cudaCpuDeviceId
17
18#define deviceMemAttachGlobal cudaMemAttachGlobal
19
20#define deviceMemcpyHostToDevice cudaMemcpyHostToDevice
21#define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
22#define deviceMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
23
24#define deviceInit( ... ) checkCudaErrors( cuInit( __VA_ARGS__ ) )
25#define deviceGetCount( ... ) checkCudaErrors( cudaGetDeviceCount( __VA_ARGS__ ) )
26#define deviceBind( ... ) checkCudaErrors( cudaSetDevice( __VA_ARGS__ ) )
27#define deviceId( ... ) checkCudaErrors( cudaGetDevice( __VA_ARGS__ ) )
28#define deviceSynchronize() checkCudaErrors( cudaDeviceSynchronize() )
29#define deviceMalloc( ... ) checkCudaErrors( cudaMalloc( __VA_ARGS__ ) )
30#define deviceMallocManaged( ... ) checkCudaErrors( cudaMallocManaged( __VA_ARGS__ ) )
31#define deviceMemcpy( ... ) checkCudaErrors( cudaMemcpy( __VA_ARGS__ ) )
32#define deviceMemset( ... ) checkCudaErrors( cudaMemset( __VA_ARGS__ ) )
33#define deviceFree( ... ) checkCudaErrors( cudaFree( __VA_ARGS__ ) )
34#define deviceMemPrefetchAsync( ... ) checkCudaErrors( cudaMemPrefetchAsync( __VA_ARGS__ ) )
35
36
37#ifndef EXIT_WAIVED
38 #define EXIT_WAIVED 2
39#endif
40
41
42#ifdef __DRIVER_TYPES_H__
43 #ifndef DEVICE_RESET
44 #define DEVICE_RESET cudaDeviceReset();
45 #endif
46#else
47 #ifndef DEVICE_RESET
48 #define DEVICE_RESET
49 #endif
50#endif
51
52
53namespace AMP::Utilities {
54enum class MemoryType : int8_t;
55}
56
57// Get the pointer type from cuda
59
60
61// Get the name of a return code
62template<typename T>
63const char *cudaGetName( T result );
64
65// Check the return code
66template<typename T>
67void checkCudaErrors( T result,
68 const StackTrace::source_location &source = SOURCE_LOCATION_CURRENT() );
69
70// Get the last cuda error
71void getLastDeviceError( const char *errorMessage,
72 const StackTrace::source_location &source = SOURCE_LOCATION_CURRENT() );
73
74#ifndef MAX
75 #define MAX( a, b ) ( a > b ? a : b )
76#endif
77
78// Float To Int conversion
79inline int ftoi( float value )
80{
81 return ( value >= 0 ? (int) ( value + 0.5 ) : (int) ( value - 0.5 ) );
82}
83
84// Beginning of GPU Architecture definitions
85inline int _ConvertSMVer2Cores( int major, int minor )
86{
87 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
88 typedef struct {
89 int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
90 int Cores;
91 } sSMtoCores;
92
93 sSMtoCores nGpuArchCoresPerSM[] = { { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
94 { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
95 { 0x30, 192 }, // Kepler Generation (SM 3.0) GK10x class
96 { 0x32, 192 }, // Kepler Generation (SM 3.2) GK10x class
97 { 0x35, 192 }, // Kepler Generation (SM 3.5) GK11x class
98 { 0x37, 192 }, // Kepler Generation (SM 3.7) GK21x class
99 { 0x50, 128 }, // Maxwell Generation (SM 5.0) GM10x class
100 { 0x52, 128 }, // Maxwell Generation (SM 5.2) GM20x class
101 { -1, -1 } };
102
103 int index = 0;
104
105 while ( nGpuArchCoresPerSM[index].SM != -1 ) {
106 if ( nGpuArchCoresPerSM[index].SM == ( ( major << 4 ) + minor ) ) {
107 return nGpuArchCoresPerSM[index].Cores;
108 }
109
110 index++;
111 }
112
113 // If we don't find the values, we default use the previous one to run properly
114 printf( "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n",
115 major,
116 minor,
117 nGpuArchCoresPerSM[index - 1].Cores );
118 return nGpuArchCoresPerSM[index - 1].Cores;
119}
120// end of GPU Architecture definitions
121
122#ifdef __CUDA_RUNTIME_H__
123// General GPU Device CUDA Initialization
124int gpuDeviceInit( int devID );
125
126// This function returns the best GPU (with maximum GFLOPS)
127int gpuGetMaxGflopsDeviceId();
128
129
130// Initialization code to find the best CUDA Device
131int findCudaDevice( int argc, const char **argv );
132
133// General check for CUDA GPU SM Capabilities
134bool checkCudaCapabilities( int major_version, int minor_version );
135
136
137#endif
138
139static void inline setKernelDims( size_t n, dim3 &BlockDim, dim3 &GridDim )
140{
141 // Parameters for an NVIDIA Volta.
142 // https://images.nvidia.com/content/volta-architecture/pdf/volta-architecture-whitepaper.pdf
143 // We should move to using occupancy API
144 constexpr int warpSize = 32;
145 constexpr int maxGridSize = 32 * 80; // max 32 blocks per SM of Volta, 80 SM's
146 // this number might need to be tuned
147 // consider querying for device info
148 int warpCount = ( n / warpSize ) + ( ( ( n % warpSize ) == 0 ) ? 0 : 1 );
149 int warpPerBlock = std::max( 1, std::min( 4, warpCount ) );
150 int threadCount = warpSize * warpPerBlock;
151 int blockCount = std::min( maxGridSize, std::max( 1, warpCount / warpPerBlock ) );
152 BlockDim = dim3( threadCount, 1, 1 );
153 GridDim = dim3( blockCount, 1, 1 );
154 return;
155}
156
157// end of CUDA Helper Functions
158
159
160#endif
int _ConvertSMVer2Cores(int major, int minor)
Definition Helper_Cuda.h:85
static void setKernelDims(size_t n, dim3 &BlockDim, dim3 &GridDim)
void checkCudaErrors(T result, const StackTrace::source_location &source=SOURCE_LOCATION_CURRENT())
void getLastDeviceError(const char *errorMessage, const StackTrace::source_location &source=SOURCE_LOCATION_CURRENT())
const char * cudaGetName(T result)
AMP::Utilities::MemoryType getCudaMemoryType(const void *ptr)
int ftoi(float value)
Definition Helper_Cuda.h:79
MemoryType
Enum to store pointer type.
Definition Memory.h:21



Advanced Multi-Physics (AMP)
Oak Ridge National Laboratory
Idaho National Laboratory
Los Alamos National Laboratory
This page automatically produced from the
source code by doxygen
Last updated: Tue Mar 10 2026 13:06:41.
Comments on this page