82 lines
3.1 KiB
Python
82 lines
3.1 KiB
Python
# Owner(s): ["module: unknown"]
|
|
|
|
from torch.testing._internal.common_utils import TestCase, run_tests
|
|
from torch.testing._internal.check_kernel_launches import (
|
|
check_cuda_kernel_launches, check_code_for_cuda_kernel_launches
|
|
)
|
|
|
|
|
|
class AlwaysCheckCudaLaunchTest(TestCase):
|
|
def test_check_code(self):
|
|
"""Verifies that the regex works for a few different situations"""
|
|
|
|
# Try some different spacings
|
|
self.assertEqual(2, check_code_for_cuda_kernel_launches("""
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
some_other_stuff;
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>> (arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>> ( arg1 , arg2 , arg3 ) ;
|
|
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
"""))
|
|
|
|
# Does it work for macros?
|
|
self.assertEqual(0, check_code_for_cuda_kernel_launches(r"""
|
|
#define SOME_MACRO(x) some_function_call<<<1,2>>> ( x ) ; \
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
|
|
#define SMALL_INDEX(TENSOR_TYPE, INDICES_TYPE, TYPE, SELF_DIM, SOURCE_DIM, IDX_DIM) \
|
|
indexAddSmallIndex<TENSOR_TYPE, INDICES_TYPE, TYPE, SELF_DIM, SOURCE_DIM, IDX_DIM> \
|
|
<<<smallIndexGrid, smallIndexBlock, 0, stream>>>( \
|
|
selfInfo, sourceInfo, indexInfo, \
|
|
selfAddDim, sourceAddDim, sliceSize, selfAddDimSize); \
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
"""))
|
|
|
|
# Does it work for lambdas?
|
|
self.assertEqual(1, check_code_for_cuda_kernel_launches(r"""
|
|
rrelu_with_noise_cuda_kernel<scalar_t, 2><<<grid, block, 0, stream>>>(
|
|
numel,
|
|
rng_engine_inputs,
|
|
output_data,
|
|
input_data,
|
|
noise_data,
|
|
lower,
|
|
upper,
|
|
[] __device__ (curandStatePhilox4_32_10_t* state) {
|
|
return curand_uniform2_double(state);
|
|
});
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
|
|
rrelu_with_noise_cuda_kernel<scalar_t, 2><<<grid, block, 0, stream>>>(
|
|
numel,
|
|
rng_engine_inputs,
|
|
output_data,
|
|
input_data,
|
|
noise_data,
|
|
lower,
|
|
upper,
|
|
[] __device__ (curandStatePhilox4_32_10_t* state) {
|
|
return curand_uniform2_double(state);
|
|
});
|
|
uh oh;
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
"""))
|
|
|
|
def test_check_cuda_launches(self):
|
|
unsafeLaunchesCount = check_cuda_kernel_launches()
|
|
self.assertTrue(unsafeLaunchesCount == 0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
run_tests()
|