HPCToolkit
make-cuda-wrappers.py
Go to the documentation of this file.
1 #!/usr/local/bin/python
2 # -*- python -*-
3 #
4 # HPCToolkit MPI Profiler
5 # this script is adapted from mpiP MPI Profiler ( http://mpip.sourceforge.net/ )
6 #
7 # Please see COPYRIGHT AND LICENSE information at the end of this file.
8 #
9 #
10 # make-wrappers.py -- parse the mpi prototype file and generate a
11 # series of output files, which include the wrappers for profiling
12 # layer and other data structures.
13 #
14 # $Id: make-wrappers.py 442 2010-03-03 17:18:04Z chcham $
15 #
16 
17 import sys
18 import string
19 import os
20 import copy
21 import re
22 import time
23 import getopt
24 import socket
25 import pdb
26 
27 
28 driverSkipList = [
29 'cuCtxCreate_v2',
30 'cuCtxDestroy_v2',
31 'cuMemcpyHtoD_v2',
32 'cuMemcpyDtoH_v2',
33 'cuMemcpyHtoDAsync_v2',
34 'cuMemcpyDtoHAsync_v2',
35 'cuStreamCreate',
36 'cuStreamSynchronize',
37 'cuStreamDestroy_v2',
38 'cuEventSynchronize',
39 'cuLaunchGridAsync',
40 'cuLaunchKernel']
41 
42 
43 runtimeSkipList = [
44 'cudaDeviceSynchronize',
45 'cudaThreadSynchronize',
46 'cudaStreamCreate',
47 'cudaStreamDestroy',
48 'cudaStreamWaitEvent',
49 'cudaStreamSynchronize',
50 'cudaEventSynchronize',
51 'cudaConfigureCall',
52 'cudaLaunch',
53 'cudaMalloc',
54 'cudaMallocArray',
55 'cudaFree',
56 'cudaFreeArray',
57 'cudaMemcpy',
58 'cudaMemcpy2D',
59 'cudaMemcpyAsync',
60 'cudaMemcpyToArray',
61 'cudaMemcpyToArrayAsync',
62 'cudaMalloc3D',
63 'cudaMalloc3DArray',
64 'cudaMemcpy3D',
65 'cudaMemcpy3DPeer',
66 'cudaMemcpy3DAsync',
67 'cudaMemcpy3DPeerAsync',
68 'cudaMemcpyPeer',
69 'cudaMemcpyFromArray',
70 'cudaMemcpyArrayToArray',
71 'cudaMemcpy2DToArray',
72 'cudaMemcpy2DFromArray',
73 'cudaMemcpy2DArrayToArray',
74 'cudaMemcpyToSymbol',
75 'cudaMemcpyFromSymbol',
76 'cudaMemcpyPeerAsync',
77 'cudaMemcpyFromArrayAsync',
78 'cudaMemcpy2DAsync',
79 'cudaMemcpy2DToArrayAsync',
80 'cudaMemcpy2DFromArrayAsync',
81 'cudaMemcpyToSymbolAsync',
82 'cudaMemcpyFromSymbolAsync',
83 'cudaMemset',
84 'cudaMemset2D',
85 'cudaMemset3D',
86 'cudaMemsetAsync',
87 'cudaMemset2DAsync',
88 'cudaMemset3DAsync']
89 
90 
91 
92 
93 def WritecuDriverFunctionPointerTable(file, funcNames):
94 # a table like this:
95 #cuDriverFunctionPointer_t cuDriverFunctionPointer[] = {
96 # {0, "cuStreamCreate"},
97 # {0, "cuStreamDestroy"},
98 # ...
99 #};
100  fp = open(file,'w')
101  fp.write('''
102 // GENERATED FILE DON'T EDIT
103 #include "gpu_blame-cuda-driver-header.h"
104 #include<cuda.h>
105 #include<cuda_runtime_api.h>
106 ''')
107  fp.write('cuDriverFunctionPointer_t cuDriverFunctionPointer[] = {\n')
108  for name in funcNames:
109  fp.write('\t {{.generic = (void*)0},"' + name[1] + '"},\n')
110  fp.write('};\n')
111  fp.close();
112 
114 # a table like this:
115 #cudaRuntimeFunctionPointer_t cudaRuntimeFunctionPointer[] = {
116 # {0, "cudaThreadSynchronize"},
117 # ...
118 #};
119  fp = open(file,'w')
120  fp.write('''
121 // GENERATED FILE DON'T EDIT
122 #include<cuda.h>
123 #include<cuda_runtime_api.h>
124 #include "gpu_blame-cuda-runtime-header.h"
125 ''')
126  fp.write('cudaRuntimeFunctionPointer_t cudaRuntimeFunctionPointer[] = {\n')
127  for name in funcNames:
128  fp.write('\t {{.generic = (void*)0},"' + name[1] + '"},\n')
129  fp.write('};\n')
130  fp.close();
131 
132 
134 # convert cuStreamCreate to CU_STREAM_CREATE
135  result = ''
136  for letter in name:
137  if letter.isupper():
138  result = result + '_'
139  result = result + letter.upper()
140  return result
141 
142 
143 def FuncNameToEnum(name):
144  return name + 'Enum'
145 
146 
147 
149 #Produce struct like this:
150 #typedef struct cuDriverFunctionPointer {
151 # union {
152 # CUresult(*generic) (void);
153 # CUresult(*cuStreamCreateReal) (CUstream * phStream, unsigned int Flags);
154 # CUresult(*cuStreamDestroyReal) (CUstream hStream);
155 # CUresult(*cuStreamSynchronizeReal) (CUstream hStream);
156 # CUresult(*cuEventSynchronizeReal) (CUevent event);
157 # };
158 # const char *functionName;
159 #} cuDriverFunctionPointer_t;
160 
161 
162  fp = open(file,'w')
163  fp.write('''
164 // GENERATED FILE DON'T EDIT
165 #ifndef __CU_DRIVER_HEADER_H__
166 #define __CU_DRIVER_HEADER_H__
167 #include<cuda.h>
168 #include<cuda_runtime_api.h>
169 typedef struct cuDriverFunctionPointer {
170  union {
171  void* generic;
172 ''')
173 
174  for sig in funcSig:
175  fp.write('\t' + sig[0] + '(*' + sig[1] + 'Real) (' + sig[2] + ');\n' )
176 
177  fp.write(
178 ''' };
179  const char *functionName;
180 } cuDriverFunctionPointer_t;
181 ''')
182 
183 # create enum like this:
184 #enum cuDriverAPIIndex {
185 # cuStreamCreateEnum,
186 # cuStreamDestroyEnum,
187 # ...
188 # CU_MAX_APIS
189 #};
190 
191  fp.write('''
192 enum cuDriverAPIIndex {
193 ''')
194  for sig in funcSig:
195  fp.write('\t' + FuncNameToEnum(sig[1]) + ',\n' )
196 
197  fp.write('''
198 CU_MAX_APIS
199 };
200 extern cuDriverFunctionPointer_t cuDriverFunctionPointer[CU_MAX_APIS];
201 ''')
202 
203  fp.write('#endif\n')
204  fp.close();
205 
206 
207 
209 #Produce struct like this:
210 #typedef struct cudaRuntimeFunctionPointer {
211 # union {
212 # cudaError_t(*generic) (void);
213 # cudaError_t(*cudaThreadSynchronizeReal) (void);
214 # };
215 # const char *functionName;
216 #} cudaRuntimeFunctionPointer_t;
217 
218  fp = open(file,'w')
219  fp.write('''
220 // GENERATED FILE DON'T EDIT
221 #ifndef __CUDA_RUNTIME_HEADER_H__
222 #define __CUDA_RUNTIME_HEADER_H__
223 #include<cuda.h>
224 #include<cuda_runtime_api.h>
225 typedef struct cudaRuntimeFunctionPointer {
226  union {
227  void* generic;
228 ''')
229 
230  for sig in funcSig:
231  fp.write('\t' + sig[0] + '(*' + sig[1] + 'Real) (' + sig[2] + ');\n' )
232 
233  fp.write(
234 ''' };
235  const char *functionName;
236 } cudaRuntimeFunctionPointer_t;
237 ''')
238 
239 # create enum like this:
240 #enum cudaRuntimeAPIIndex {
241 # cudaThreadSynchronizeEnum,
242 # cudaStreamSynchronizeEnum,
243 # cudaDeviceSynchronizeEnum,
244 # ...
245 # CUDA_MAX_APIS
246 #};
247 
248  fp.write('''
249 enum cudaRuntimeAPIIndex{
250 ''')
251  for sig in funcSig:
252  fp.write('\t' + FuncNameToEnum(sig[1]) + ',\n' )
253 
254  fp.write('''
255 CUDA_MAX_APIS
256 };
257 extern cudaRuntimeFunctionPointer_t cudaRuntimeFunctionPointer[CUDA_MAX_APIS];
258 ''')
259 
260  fp.write('#endif\n')
261  fp.close();
262 
263 
264 
265 def WriteDriverFunctionWrapper(file, funcSig):
266  fp = open(file,'w')
267  fp.write('''
268 // GENERATED FILE DON'T EDIT
269 #include <stdbool.h>
270 #include <hpcrun/thread_data.h>
271 #include <monitor.h>
272 #include<cuda.h>
273 #include "gpu_blame-cuda-driver-header.h"
274 extern bool hpcrun_is_safe_to_sync(const char* fn);
275 ''')
276 
277  for sig in funcSig:
278  #skip the manually done ones
279  if sig[1] in driverSkipList: continue
280 
281  fp.write('\t' + sig[0] + sig[1] + ' (' + sig[2] + ') {\n' )
282  fp.write('if (! hpcrun_is_safe_to_sync(__func__)) {')
283  fp.write(' return cuDriverFunctionPointer[' +FuncNameToEnum(sig[1]) + '].' + sig[1] + 'Real(')
284  args = sig[2].split(',')
285  first = True
286  for argTypeName in args:
287  if not first:
288  fp.write(', ')
289  else:
290  first = False
291  param = argTypeName.split()[-1].split('*')[-1]
292  if param.strip() != "void":
293  fp.write(param)
294 
295 
296  fp.write( ');\n')
297  fp.write('}\n')
298  fp.write('TD_GET(gpu_data.is_thread_at_cuda_sync) = true;\n')
299  fp.write('monitor_disable_new_threads();\n')
300  #fp.write('printf("\\n%s on","' +sig[1] +'");fflush(stdout);')
301  fp.write('CUresult ret = cuDriverFunctionPointer[' +FuncNameToEnum(sig[1]) + '].' + sig[1] + 'Real(')
302 
303  args = sig[2].split(',')
304  first = True
305  for argTypeName in args:
306  if not first:
307  fp.write(', ')
308  else:
309  first = False
310  param = argTypeName.split()[-1].split('*')[-1]
311  if param.strip() != "void":
312  fp.write(param)
313 
314 
315  fp.write( ');\n')
316  fp.write('monitor_enable_new_threads();\n')
317  fp.write('TD_GET(gpu_data.is_thread_at_cuda_sync) = false;\n')
318  #fp.write('printf("\\n%s off","' +sig[1] +'");fflush(stdout);')
319  fp.write('return ret;\n')
320  fp.write('}\n')
321 # fp.write('''
322 
324  fp.close();
325 
326 
327 
328 
329 def WriteRuntimeFunctionWrapper(file, funcSig):
330  fp = open(file,'w')
331  fp.write('''
332 // GENERATED FILE DON'T EDIT
333 #include <stdbool.h>
334 #include <hpcrun/thread_data.h>
335 #include <monitor.h>
336 #include<cuda.h>
337 #include<cuda_runtime_api.h>
338 #include "gpu_blame-cuda-runtime-header.h"
339 extern bool hpcrun_is_safe_to_sync(const char* fn);
340 ''')
341 
342  for sig in funcSig:
343  #skip the manually done ones
344  if sig[1] in runtimeSkipList: continue
345 
346  fp.write('\t' + sig[0] + sig[1] + ' (' + sig[2] + ') {\n' )
347  fp.write('if (! hpcrun_is_safe_to_sync(__func__)) {')
348  fp.write(' return cudaRuntimeFunctionPointer[' +FuncNameToEnum(sig[1]) + '].' + sig[1] + 'Real(')
349  args = sig[2].split(',')
350  first = True
351  for argTypeName in args:
352  if not first:
353  fp.write(', ')
354  else:
355  first = False
356  param = argTypeName.split()[-1].split('*')[-1]
357  if param.strip() != "void":
358  fp.write(param)
359 
360 
361  fp.write( ');\n')
362  fp.write('}\n')
363  fp.write('TD_GET(gpu_data.is_thread_at_cuda_sync) = true;\n')
364  fp.write('monitor_disable_new_threads();\n')
365  #fp.write('printf("\\n%s on","' +sig[1] +'");')
366  fp.write('cudaError_t ret = cudaRuntimeFunctionPointer[' +FuncNameToEnum(sig[1]) + '].' + sig[1] + 'Real(')
367 
368  args = sig[2].split(',')
369  first = True
370  for argTypeName in args:
371  if not first:
372  fp.write(', ')
373  else:
374  first = False
375  param = argTypeName.split()[-1].split('*')[-1]
376  if param.strip() != "void":
377  fp.write(param)
378 
379 
380  fp.write( ');\n')
381  fp.write('monitor_enable_new_threads();\n')
382  fp.write('TD_GET(gpu_data.is_thread_at_cuda_sync) = false;\n')
383  #fp.write('printf("\\n%s off","' +sig[1] +'");')
384  fp.write('return ret;\n')
385  fp.write('}\n')
386  fp.close();
387 
388 
389 
390 
391 
392 
393 
394 
395 
396 #cuPattern = '\s*(CUresult[\s\n]+)(CUDAAPI[\s\n]+)(cu[a-zA-Z0-9_]*[\s\n]*)\(([^;]*)\)[\s\n]*;'
397 #cudaPattern = '\s*extern[\s\n]+__host__[\s\n]+(cudaError_t[\s\n]+)(CUDARTAPI[\s\n]+)(cuda[a-zA-Z0-9_]*[\s\n]*)\(([^;]*)\)[\s\n]*;'
398 
399 
400 cuPattern = '\s*(CUresult[\s\n]+)(cu[a-zA-Z0-9_]*[\s\n]*)\(([^;]*)\)[\s\n]*;'
401 cudaPattern = '\s*extern[\s\n]+(cudaError_t[\s\n]+)(cuda[a-zA-Z0-9_]*[\s\n]*)\(([^;]*)\)[\s\n]*;'
402 
403 
404 
405 
406 inFile = open(sys.argv[2]).read()
407 
408 generatedHeaderFile = sys.argv[3]
409 generatedTableFile = sys.argv[4]
410 generatedWrapperFile = sys.argv[5]
411 
412 
413 if sys.argv[1] == 'driver':
414  lines = re.finditer(cuPattern,inFile, re.MULTILINE)
415 elif sys.argv[1] == 'runtime':
416  lines = re.finditer(cudaPattern,inFile, re.MULTILINE)
417 else:
418  print 'Invalid pattern'
419  exit(-1)
420 
421 
422 defaultValue = re.compile('__dv\s*\(.*\)')
423 
424 signatures = []
425 for line in lines:
426  funcName = line.group(2)
427  funcPrefix = line.group(1)
428  funcArgs = line.group(3)
429  noDefaultArgs = defaultValue.sub('',funcArgs)
430  #print p.group(1), p.group(2), p.group(3), p.group(4), p.group(5), '(', n, ')'
431  args = noDefaultArgs.split(',')
432  #print funcPrefix, funcName, '(' , noDefaultArgs, ')'
433  for argTypeName in args:
434  last = argTypeName.split()[-1]
435  last = last.split('*')[-1]
436  #print last
437  signatures.append((funcPrefix, funcName, noDefaultArgs))
438 
439 if sys.argv[1] == 'driver':
440  WriteDriverFunctionPointerHeader(generatedHeaderFile, signatures)
441  WritecuDriverFunctionPointerTable(generatedTableFile, signatures)
442  WriteDriverFunctionWrapper(generatedWrapperFile, signatures)
443 elif sys.argv[1] == 'runtime':
444  WriteRuntimeFunctionPointerHeader(generatedHeaderFile, signatures)
445  WritecuRuntimeFunctionPointerTable(generatedTableFile, signatures)
446  WriteRuntimeFunctionWrapper(generatedWrapperFile, signatures)
447 
def WritecuDriverFunctionPointerTable(file, funcNames)
def WriteRuntimeFunctionWrapper(file, funcSig)
endif &#39;&#39;&#39;)
def WriteDriverFunctionPointerHeader(file, funcSig)
exit
Definition: names.cpp:1
def WritecuRuntimeFunctionPointerTable(file, funcNames)
def FuncNameToCapitalizedEnum(name)
ssize_t MONITOR_EXT_WRAP_NAME() read(int fd, void *buf, size_t count)
Definition: io-over.c:152
def WriteDriverFunctionWrapper(file, funcSig)
def WriteRuntimeFunctionPointerHeader(file, funcSig)