33 'cuMemcpyHtoDAsync_v2',
34 'cuMemcpyDtoHAsync_v2',
36 'cuStreamSynchronize',
44 'cudaDeviceSynchronize',
45 'cudaThreadSynchronize',
48 'cudaStreamWaitEvent',
49 'cudaStreamSynchronize',
50 'cudaEventSynchronize',
61 'cudaMemcpyToArrayAsync',
67 'cudaMemcpy3DPeerAsync',
69 'cudaMemcpyFromArray',
70 'cudaMemcpyArrayToArray',
71 'cudaMemcpy2DToArray',
72 'cudaMemcpy2DFromArray',
73 'cudaMemcpy2DArrayToArray',
75 'cudaMemcpyFromSymbol',
76 'cudaMemcpyPeerAsync',
77 'cudaMemcpyFromArrayAsync',
79 'cudaMemcpy2DToArrayAsync',
80 'cudaMemcpy2DFromArrayAsync',
81 'cudaMemcpyToSymbolAsync',
82 'cudaMemcpyFromSymbolAsync',
102 // GENERATED FILE DON'T EDIT 103 #include "gpu_blame-cuda-driver-header.h" 105 #include<cuda_runtime_api.h> 107 fp.write(
'cuDriverFunctionPointer_t cuDriverFunctionPointer[] = {\n')
108 for name
in funcNames:
109 fp.write(
'\t {{.generic = (void*)0},"' + name[1] +
'"},\n')
121 // GENERATED FILE DON'T EDIT 123 #include<cuda_runtime_api.h> 124 #include "gpu_blame-cuda-runtime-header.h" 126 fp.write(
'cudaRuntimeFunctionPointer_t cudaRuntimeFunctionPointer[] = {\n')
127 for name
in funcNames:
128 fp.write(
'\t {{.generic = (void*)0},"' + name[1] +
'"},\n')
138 result = result +
'_' 139 result = result + letter.upper()
164 // GENERATED FILE DON'T EDIT 165 #ifndef __CU_DRIVER_HEADER_H__ 166 #define __CU_DRIVER_HEADER_H__ 168 #include<cuda_runtime_api.h> 169 typedef struct cuDriverFunctionPointer { 175 fp.write(
'\t' + sig[0] +
'(*' + sig[1] +
'Real) (' + sig[2] +
');\n' )
179 const char *functionName; 180 } cuDriverFunctionPointer_t; 192 enum cuDriverAPIIndex { 200 extern cuDriverFunctionPointer_t cuDriverFunctionPointer[CU_MAX_APIS]; 220 // GENERATED FILE DON'T EDIT 221 #ifndef __CUDA_RUNTIME_HEADER_H__ 222 #define __CUDA_RUNTIME_HEADER_H__ 224 #include<cuda_runtime_api.h> 225 typedef struct cudaRuntimeFunctionPointer { 231 fp.write(
'\t' + sig[0] +
'(*' + sig[1] +
'Real) (' + sig[2] +
');\n' )
235 const char *functionName; 236 } cudaRuntimeFunctionPointer_t; 249 enum cudaRuntimeAPIIndex{ 257 extern cudaRuntimeFunctionPointer_t cudaRuntimeFunctionPointer[CUDA_MAX_APIS]; 268 // GENERATED FILE DON'T EDIT 270 #include <hpcrun/thread_data.h> 273 #include "gpu_blame-cuda-driver-header.h" 274 extern bool hpcrun_is_safe_to_sync(const char* fn); 279 if sig[1]
in driverSkipList:
continue 281 fp.write(
'\t' + sig[0] + sig[1] +
' (' + sig[2] +
') {\n' )
282 fp.write(
'if (! hpcrun_is_safe_to_sync(__func__)) {')
283 fp.write(
' return cuDriverFunctionPointer[' +
FuncNameToEnum(sig[1]) +
'].' + sig[1] +
'Real(')
284 args = sig[2].split(
',')
286 for argTypeName
in args:
291 param = argTypeName.split()[-1].split(
'*')[-1]
292 if param.strip() !=
"void":
298 fp.write(
'TD_GET(gpu_data.is_thread_at_cuda_sync) = true;\n')
299 fp.write(
'monitor_disable_new_threads();\n')
301 fp.write(
'CUresult ret = cuDriverFunctionPointer[' +
FuncNameToEnum(sig[1]) +
'].' + sig[1] +
'Real(')
303 args = sig[2].split(
',')
305 for argTypeName
in args:
310 param = argTypeName.split()[-1].split(
'*')[-1]
311 if param.strip() !=
"void":
316 fp.write(
'monitor_enable_new_threads();\n')
317 fp.write(
'TD_GET(gpu_data.is_thread_at_cuda_sync) = false;\n')
319 fp.write(
'return ret;\n')
332 // GENERATED FILE DON'T EDIT 334 #include <hpcrun/thread_data.h> 337 #include<cuda_runtime_api.h> 338 #include "gpu_blame-cuda-runtime-header.h" 339 extern bool hpcrun_is_safe_to_sync(const char* fn); 344 if sig[1]
in runtimeSkipList:
continue 346 fp.write(
'\t' + sig[0] + sig[1] +
' (' + sig[2] +
') {\n' )
347 fp.write(
'if (! hpcrun_is_safe_to_sync(__func__)) {')
348 fp.write(
' return cudaRuntimeFunctionPointer[' +
FuncNameToEnum(sig[1]) +
'].' + sig[1] +
'Real(')
349 args = sig[2].split(
',')
351 for argTypeName
in args:
356 param = argTypeName.split()[-1].split(
'*')[-1]
357 if param.strip() !=
"void":
363 fp.write(
'TD_GET(gpu_data.is_thread_at_cuda_sync) = true;\n')
364 fp.write(
'monitor_disable_new_threads();\n')
366 fp.write(
'cudaError_t ret = cudaRuntimeFunctionPointer[' +
FuncNameToEnum(sig[1]) +
'].' + sig[1] +
'Real(')
368 args = sig[2].split(
',')
370 for argTypeName
in args:
375 param = argTypeName.split()[-1].split(
'*')[-1]
376 if param.strip() !=
"void":
381 fp.write(
'monitor_enable_new_threads();\n')
382 fp.write(
'TD_GET(gpu_data.is_thread_at_cuda_sync) = false;\n')
384 fp.write(
'return ret;\n')
400 cuPattern =
'\s*(CUresult[\s\n]+)(cu[a-zA-Z0-9_]*[\s\n]*)\(([^;]*)\)[\s\n]*;' 401 cudaPattern =
'\s*extern[\s\n]+(cudaError_t[\s\n]+)(cuda[a-zA-Z0-9_]*[\s\n]*)\(([^;]*)\)[\s\n]*;' 406 inFile = open(sys.argv[2]).
read()
408 generatedHeaderFile = sys.argv[3]
409 generatedTableFile = sys.argv[4]
410 generatedWrapperFile = sys.argv[5]
413 if sys.argv[1] ==
'driver':
414 lines = re.finditer(cuPattern,inFile, re.MULTILINE)
415 elif sys.argv[1] ==
'runtime':
416 lines = re.finditer(cudaPattern,inFile, re.MULTILINE)
418 print 'Invalid pattern' 422 defaultValue = re.compile(
'__dv\s*\(.*\)')
426 funcName = line.group(2)
427 funcPrefix = line.group(1)
428 funcArgs = line.group(3)
429 noDefaultArgs = defaultValue.sub(
'',funcArgs)
431 args = noDefaultArgs.split(
',')
433 for argTypeName
in args:
434 last = argTypeName.split()[-1]
435 last = last.split(
'*')[-1]
437 signatures.append((funcPrefix, funcName, noDefaultArgs))
439 if sys.argv[1] ==
'driver':
443 elif sys.argv[1] ==
'runtime':
def WritecuDriverFunctionPointerTable(file, funcNames)
def WriteRuntimeFunctionWrapper(file, funcSig)
endif ''')
def WriteDriverFunctionPointerHeader(file, funcSig)
def WritecuRuntimeFunctionPointerTable(file, funcNames)
def FuncNameToCapitalizedEnum(name)
ssize_t MONITOR_EXT_WRAP_NAME() read(int fd, void *buf, size_t count)
def WriteDriverFunctionWrapper(file, funcSig)
def WriteRuntimeFunctionPointerHeader(file, funcSig)