103 #define OVERFLOW_MODE 0 104 #define NO_THRESHOLD 1L 106 #define PAPI_CUDA_COMPONENT_ID 1 107 #define CUPTI_LAUNCH_CALLBACK_DEPTH 7 116 CUpti_CallbackDomain
domain,
117 CUpti_CallbackId cbid,
118 const CUpti_CallbackData *cbInfo);
131 PAPI_set_debug(0x3ff);
136 monitor_disable_new_threads();
137 int ret = PAPI_library_init(PAPI_VER_CURRENT);
138 monitor_enable_new_threads();
140 TMSG(CUDA,
"PAPI_library_init = %d", ret);
141 TMSG(CUDA,
"PAPI_VER_CURRENT = %d", PAPI_VER_CURRENT);
142 if (ret != PAPI_VER_CURRENT){
143 STDERR_MSG(
"Fatal error: PAPI_library_init() failed with version mismatch.\n" 144 "HPCToolkit was compiled with version 0x%x but run on version 0x%x.\n" 145 "Check the HPCToolkit installation and try again.",
146 PAPI_VER_CURRENT, ret);
155 TMSG(CUDA,
"thread init");
156 int retval = PAPI_thread_init(pthread_self);
157 if (retval != PAPI_OK) {
158 EEMSG(
"PAPI_thread_init NOT ok, retval = %d", retval);
161 TMSG(CUDA,
"thread init OK");
167 TMSG(CUDA,
"register thread");
168 int retval = PAPI_register_thread();
169 if (retval != PAPI_OK) {
170 EEMSG(
"PAPI_register_thread NOT ok, retval = %d", retval);
173 TMSG(CUDA,
"register thread ok");
182 TMSG(CUDA,
"start called");
184 cuptiErr = cuptiSubscribe(&subscriber,
189 cuptiErr = cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_RUNTIME_API,
190 CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020);
199 TMSG(CUDA,
"unregister thread");
200 int rval = PAPI_unregister_thread();
201 if (rval != PAPI_OK) {
202 TMSG(CUDA,
"warning: CUDA PAPI_unregister_thread (%d): %s.",
203 rval, PAPI_strerror(rval));
212 int eventSet = td->eventSet[
self->evset_idx];
216 TMSG(CUDA,
"stop called");
218 if (my_state ==
STOP) {
219 TMSG(CUDA,
"PAPI CUDA stop called on an already stopped event set %d",eventSet);
223 if (my_state !=
START) {
224 TMSG(CUDA,
"*WARNING* PAPI CUDA stop called on event set that has not been started");
235 EEMSG(
"CUDA/PAPI shutdown from thread %d", thr_id);
237 EMSG(
"Shutdown op for cuda sample source called from thread %d", thr_id);
244 int eventSet = td->eventSet[
self->evset_idx];
249 rval = PAPI_cleanup_eventset(eventSet);
250 if (rval != PAPI_OK) {
251 TMSG(CUDA,
"warning: CUDA PAPI_cleanup_eventset (%d): %s.",
252 rval, PAPI_strerror(rval));
255 rval = PAPI_destroy_eventset(&eventSet);
256 if (rval != PAPI_OK) {
257 TMSG(CUDA,
"warning: CUDA PAPI_destroy_eventset (%d): %s.",
258 rval, PAPI_strerror(rval));
261 td->eventSet[
self->evset_idx] = PAPI_NULL;
269 #define CUDA_PREFIX "CUDA." 276 if (self->state ==
UNINIT){
289 return PAPI_event_name_to_code(evtmp, &ec) == PAPI_OK;
299 int num_lush_metrics = 0;
307 TMSG(CUDA,
"checking event spec = %s",event);
310 AMSG(
"WARNING: %s is specified with a sampling threshold. " 311 "No thresholds supported for CUDA events", name);
313 ret = PAPI_event_name_to_code(name, &evcode);
314 if (ret != PAPI_OK) {
315 EMSG(
"unexpected failure in PAPI process_event_list(): " 316 "PAPI_event_name_to_code() returned %s (%d)",
317 PAPI_strerror(ret), ret);
320 if (PAPI_query_event(evcode) != PAPI_OK) {
324 TMSG(CUDA,
"got event code = %x, thresh = %ld", evcode, thresh);
327 int nevents = (
self->evl).nevents;
328 TMSG(CUDA,
"nevents = %d", nevents);
332 for (i = 0; i < nevents; i++) {
333 char buffer[PAPI_MAX_STR_LEN];
336 PAPI_event_code_to_name(self->evl.events[i].event, buffer);
337 TMSG(CUDA,
"metric for event %d = %s", i, buffer);
340 self->evl.events[i].thresh);
351 eventSet = PAPI_NULL;
352 TMSG(CUDA,
"create event set");
353 ret = PAPI_create_eventset(&eventSet);
354 TMSG(CUDA,
"PAPI_create_eventset = %d, eventSet = %d", ret, eventSet);
355 if (ret != PAPI_OK) {
356 hpcrun_abort(
"Failure: PAPI_create_eventset.Return code = %d ==> %s",
357 ret, PAPI_strerror(ret));
360 int nevents = (
self->evl).nevents;
361 for (i = 0; i < nevents; i++) {
362 int evcode =
self->evl.events[i].event;
363 ret = PAPI_add_event(eventSet, evcode);
364 TMSG(CUDA,
"PAPI_add_event(eventSet=%d, event_code=%x)", eventSet, evcode);
365 if (ret != PAPI_OK) {
366 EMSG(
"failure in PAPI gen_event_set(): " 367 "PAPI_add_event() returned: %s (%d)",
368 PAPI_strerror(ret), ret);
374 td->eventSet[
self->evset_idx] = eventSet;
380 PAPI_event_info_t info;
382 int ev, ret, num_total;
384 printf(
"===========================================================================\n");
385 printf(
"Available CUDA events\n");
386 printf(
"===========================================================================\n");
387 printf(
"Name\t\t\t\tDescription\n");
388 printf(
"---------------------------------------------------------------------------\n");
390 #ifdef PAPI_COMPONENT_STUFF_FIGURED_OUT 391 const PAPI_component_info_t *pci = PAPI_get_component_info(1);
392 printf(
"PAPI component name '%s' '%s' '%s' '%s'\n", pci->name, pci->version,
393 pci->support_version, pci->kernel_version);
394 #endif // PAPI_COMPONENT_STUFF_FIGURED_OUT 399 #ifdef PAPI_ENUM_FIRST 400 ret = PAPI_enum_event(&ev, PAPI_ENUM_FIRST);
402 while (ret == PAPI_OK) {
403 if (PAPI_query_event(ev) == PAPI_OK) {
404 PAPI_event_code_to_name(ev, name);
406 PAPI_get_event_info(ev, &info);
408 printf(
"%-30s\t%s\n", name, info.long_descr);
411 ret = PAPI_enum_event(&ev, PAPI_ENUM_EVENTS);
413 printf(
"Total CUDA events: %d\n", num_total);
422 #define ss_cls SS_HARDWARE 435 PAPI_event_code_to_name(ev_code, name);
436 if (PAPI_query_event(ev_code) != PAPI_OK) {
439 if (papi_ret == PAPI_ECNFLCT) {
449 if (err != CUPTI_SUCCESS) {
451 cuptiGetResultString(err, &errstr);
452 #ifdef CUPTI_ERRORS_UNMYSTIFIED 454 "failed with message '%s' \n", cuptifunc, errstr);
455 #endif // CUPTI_ERRORS_UNMYSTIFIED 461 CUpti_CallbackDomain
domain,
462 CUpti_CallbackId cbid,
463 const CUpti_CallbackData *cbInfo)
468 int nevents =
self->evl.nevents;
469 int cudaEventSet = td->eventSet[
self->evset_idx];
472 if (cbid != CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020) {
474 "interface operation: callback id %d\n", cbid);
477 if (cbInfo->callbackSite == CUPTI_API_ENTER) {
478 cudaThreadSynchronize();
480 TMSG(CUDA,
"starting CUDA monitoring w event set %d",cudaEventSet);
481 int ret = PAPI_start(cudaEventSet);
483 EMSG(
"CUDA monitoring failed to start. PAPI_start failed with %s (%d)",
484 PAPI_strerror(ret), ret);
488 if (cbInfo->callbackSite == CUPTI_API_EXIT) {
489 cudaThreadSynchronize();
490 long_long *eventValues =
491 (long_long *) alloca(
sizeof(long_long) * (nevents+2));
493 TMSG(CUDA,
"stopping CUDA monitoring w event set %d",cudaEventSet);
494 PAPI_stop(cudaEventSet, eventValues);
495 TMSG(CUDA,
"stopped CUDA monitoring w event set %d",cudaEventSet);
498 TMSG(CUDA,
"getting context in CUDA event handler");
500 TMSG(CUDA,
"got context in CUDA event handler");
501 hpcrun_async_block();
502 TMSG(CUDA,
"blocked async event in CUDA event handler");
505 for (i = 0; i < nevents; i++)
509 TMSG(CUDA,
"sampling call path for metric_id = %d", metric_id);
513 TMSG(CUDA,
"sampled call path for metric_id = %d", metric_id);
516 TMSG(CUDA,
"unblocking async event in CUDA event handler");
517 hpcrun_async_unblock();
518 TMSG(CUDA,
"unblocked async event in CUDA event handler");
static void event_fatal_error(int ev_code, int papi_ret)
sample_val_t hpcrun_sample_callpath(void *context, int metricId, hpcrun_metricVal_t metricIncr, int skipInner, int isSync, sampling_info_t *data)
#define PAPI_CUDA_COMPONENT_ID
static void METHOD_FN(init)
#define hpcrun_abort(...)
static CUpti_SubscriberHandle subscriber
metric_desc_t * hpcrun_set_metric_info_and_period(int metric_id, const char *name, MetricFlags_ValFmt_t valFmt, size_t period, metric_desc_properties_t prop)
static void check_cupti_error(int err, char *cuptifunc)
#define CUPTI_LAUNCH_CALLBACK_DEPTH
void hpcrun_ssfail_unsupported(char *source, char *event)
char * start_tok(char *lst)
int hpcrun_extract_ev_thresh(const char *in, int evlen, char *ev, long *th, long def)
int hpcrun_event2metric(sample_source_t *ss, int event_idx)
#define METHOD_CALL(obj, meth,...)
int hpcrun_new_metric(void)
void monitor_real_abort(void)
void hpcrun_pre_allocate_metrics(size_t num)
thread_data_t *(* hpcrun_get_thread_data)(void)
static void hpcrun_cuda_kernel_callback(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const CUpti_CallbackData *cbInfo)
void hpcrun_ssfail_conflict(char *source, char *event)