53static void inline setKernelDims(
size_t n, dim3 &BlockDim, dim3 &GridDim )
62 constexpr int waveFrontSize = 64;
66 constexpr int maxGridSize = 228 * 10;
68 int warpCount = ( n / waveFrontSize ) + ( ( ( n % waveFrontSize ) == 0 ) ? 0 : 1 );
69 int warpPerBlock = std::max( 1, std::min( 4, warpCount ) );
70 int threadCount = waveFrontSize * warpPerBlock;
71 int blockCount = std::min( maxGridSize, std::max( 1, warpCount / warpPerBlock ) );
72 BlockDim = dim3( threadCount, 1, 1 );
73 GridDim = dim3( blockCount, 1, 1 );