HPCToolkit
linux_perf.c
Go to the documentation of this file.
1 // -*-Mode: C++;-*- // technically C99
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // --------------------------------------------------------------------------
6 // Part of HPCToolkit (hpctoolkit.org)
7 //
8 // Information about sources of support for research and development of
9 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
10 // --------------------------------------------------------------------------
11 //
12 // Copyright ((c)) 2002-2019, Rice University
13 // All rights reserved.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met:
18 //
19 // * Redistributions of source code must retain the above copyright
20 // notice, this list of conditions and the following disclaimer.
21 //
22 // * Redistributions in binary form must reproduce the above copyright
23 // notice, this list of conditions and the following disclaimer in the
24 // documentation and/or other materials provided with the distribution.
25 //
26 // * Neither the name of Rice University (RICE) nor the names of its
27 // contributors may be used to endorse or promote products derived from
28 // this software without specific prior written permission.
29 //
30 // This software is provided by RICE and contributors "as is" and any
31 // express or implied warranties, including, but not limited to, the
32 // implied warranties of merchantability and fitness for a particular
33 // purpose are disclaimed. In no event shall RICE or contributors be
34 // liable for any direct, indirect, incidental, special, exemplary, or
35 // consequential damages (including, but not limited to, procurement of
36 // substitute goods or services; loss of use, data, or profits; or
37 // business interruption) however caused and on any theory of liability,
38 // whether in contract, strict liability, or tort (including negligence
39 // or otherwise) arising in any way out of the use of this software, even
40 // if advised of the possibility of such damage.
41 //
42 // ******************************************************* EndRiceCopyright *
43 
44 //
45 // Linux perf sample source interface
46 //
47 
48 
49 /******************************************************************************
50  * system includes
51  *****************************************************************************/
52 
53 #include <assert.h>
54 #include <errno.h>
55 #include <fcntl.h>
56 #include <signal.h>
57 #include <stdio.h>
58 #include <stdint.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <time.h>
62 #include <unistd.h>
63 #include <math.h>
64 
65 #include <sys/syscall.h>
66 #include <sys/stat.h>
67 #include <sys/ioctl.h>
68 #include <sys/wait.h>
69 
70 /******************************************************************************
71  * linux specific headers
72  *****************************************************************************/
73 #include <linux/perf_event.h>
74 #include <linux/version.h>
75 
76 
77 /******************************************************************************
78  * libmonitor
79  *****************************************************************************/
80 #include <monitor.h>
81 
82 
83 
84 /******************************************************************************
85  * local includes
86  *****************************************************************************/
87 
90 #include "sample-sources/common.h"
92 
94 #include <hpcrun/files.h>
95 #include <hpcrun/hpcrun_stats.h>
96 #include <hpcrun/loadmap.h>
98 #include <hpcrun/metrics.h>
99 #include <hpcrun/safe-sampling.h>
100 #include <hpcrun/sample_event.h>
105 
106 #include <evlist.h>
107 #include <limits.h> // PATH_MAX
108 #include <lib/prof-lean/hpcrun-metric.h> // prefix for metric helper
109 #include <lib/support-lean/OSUtil.h> // hostid
110 
111 #include <include/linux_info.h>
112 
113 #include "perfmon-util.h"
114 
115 #include "perf-util.h" // u64, u32 and perf_mmap_data_t
116 #include "perf_mmap.h" // api for parsing mmapped buffer
117 #include "perf_skid.h"
118 #include "perf_event_open.h"
119 
120 #include "event_custom.h" // api for pre-defined events
121 
122 #include "sample-sources/display.h" // api to display available events
123 
124 #include "kernel_blocking.h" // api for predefined kernel blocking event
125 #include "sample-sources/datacentric/datacentric.h" // api for datacentric
126 #include "sample-sources/datacentric/memaddress.h" // api for address centric
127 
129 
130 //******************************************************************************
131 // macros
132 //******************************************************************************
133 
134 
135 #define LINUX_PERF_DEBUG 0
136 
137 // default number of samples per second per thread
138 //
139 // linux perf has a default of 4000. this seems high, but the overhead for perf
140 // is still small. however, for some processors (e.g., KNL), overhead
141 // at such a high sampling rate is significant and as a result, the kernel
142 // will adjust the threshold to less than 100.
143 //
144 // 300 samples per sec with hpctoolkit has a similar overhead as perf
145 #define DEFAULT_THRESHOLD HPCRUN_DEFAULT_SAMPLE_RATE
146 
147 #ifndef sigev_notify_thread_id
148 #define sigev_notify_thread_id _sigev_un._tid
149 #endif
150 
151 // replace SIGIO with SIGRTMIN to support multiple events
152 // We know that:
153 // - realtime uses SIGRTMIN+3
154 // - PAPI uses SIGRTMIN+2
155 // so SIGRTMIN+4 is a safe bet (temporarily)
156 #define PERF_SIGNAL (SIGRTMIN+4)
157 
158 #define PERF_EVENT_AVAILABLE_UNKNOWN 0
159 #define PERF_EVENT_AVAILABLE_NO 1
160 #define PERF_EVENT_AVAILABLE_YES 2
161 
162 #define PERF_MULTIPLEX_RANGE 1.2
163 
164 #define FILE_BUFFER_SIZE (1024*1024)
165 
166 #define DEFAULT_COMPRESSION 5
167 
168 #define PERIOD_THRESHOLD 1
169 #define PERIOD_FREQUENCY 2
170 #define PERIOD_DEFAULT 0
171 
172 #define PERF_FD_FINALIZED (-2)
173 
174 
175 //******************************************************************************
176 // type declarations
177 //******************************************************************************
178 
179 
180 
181 //******************************************************************************
182 // forward declarations
183 //******************************************************************************
184 
185 static void
186 perf_thread_fini(int nevents, event_thread_t *event_thread);
187 
188 static int
189 perf_event_handler( int sig, siginfo_t* siginfo, void* context);
190 
191 
192 //******************************************************************************
193 // constants
194 //******************************************************************************
195 
196 static const struct timespec nowait = {0, 0};
197 
198 
199 
200 //******************************************************************************
201 // local variables
202 //******************************************************************************
203 
204 
205 
206 
207 /******************************************************************************
208  * external thread-local variables
209  *****************************************************************************/
210 extern __thread bool hpcrun_thread_suppress_sample;
211 
212 
213 //******************************************************************************
214 // private operations
215 //******************************************************************************
216 
217 
218 /*
219  * determine whether the perf sample source has been finalized for this thread
220  */
221 static int
223 (
224  int nevents,
225  event_thread_t *event_thread
226 )
227 {
228  return nevents >= 1 && event_thread[0].fd == PERF_FD_FINALIZED;
229 }
230 
231 
232 /*
233  * Enable all the counters
234  */
235 static void
236 perf_start_all(int nevents, event_thread_t *event_thread)
237 {
238  int i, ret;
239 
240  for(i=0; i<nevents; i++) {
241  int fd = event_thread[i].fd;
242  if (fd<0)
243  continue;
244 
245  ret = ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
246 
247  if (ret == -1) {
248  EMSG("Can't enable event with fd: %d: %s", fd, strerror(errno));
249  }
250  }
251 }
252 
253 /*
254  * Disable all the counters
255  */
256 static void
257 perf_stop_all(int nevents, event_thread_t *event_thread)
258 {
259  int i, ret;
260 
261  for(i=0; i<nevents; i++) {
262  int fd = event_thread[i].fd;
263  if (fd<0)
264  continue;
265 
266  ret = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
267  if (ret == -1) {
268  EMSG("Can't disable event with fd: %d: %s", fd, strerror(errno));
269  }
270  }
271 }
272 
273 static int
274 perf_get_pmu_support(const char *name, struct perf_event_attr *event_attr)
275 {
276  return pfmu_getEventAttribute(name, event_attr);
277 }
278 
279 /****
280  * copy /proc/kallsyms file into hpctoolkit output directory
281  * return
282  * 1 if the copy is successful
283  * 0 if the target file already exists
284  * -1 if something wrong happens
285  */
286 static int
288 {
289  char *source = LINUX_KERNEL_SYMBOL_FILE;
290 
291  FILE *infile = fopen(source, "r");
292  if (infile == NULL)
293  return -1;
294 
295  char dest[PATH_MAX], kernel_name[PATH_MAX];
296  char dest_directory[PATH_MAX];
298 
299  snprintf(dest_directory, PATH_MAX, "%s/%s", output_directory,
301 
302  OSUtil_setCustomKernelName(kernel_name, PATH_MAX);
303 
304  // we need to keep the host-id to be exactly the same template
305  // as the hpcrun file. If the filename format changes in hpcun
306  // we need to adapt again here.
307 
308  snprintf(dest, PATH_MAX, "%s/%s", dest_directory, kernel_name);
309 
310  // test if the file already exist
311  struct stat st = {0};
312  if (stat(dest, &st) >= 0) {
313  return 0; // file already exists
314  }
315 
316  mkdir(dest_directory, S_IRWXU | S_IRGRP | S_IXGRP);
317 
318  FILE *outfile = fopen(dest, "wx");
319 
320  if (outfile == NULL)
321  return -1;
322 
323  compress_deflate(infile, outfile, DEFAULT_COMPRESSION);
324 
325  fclose(infile);
326  fclose(outfile);
327 
328  TMSG(LINUX_PERF, "copy %s into %s", source, dest);
329 
330  return 1;
331 }
332 
333 //----------------------------------------------------------
334 // initialization
335 //----------------------------------------------------------
336 
337 static void
339 {
340  // copy /proc/kallsyms file into hpctoolkit output directory
341  // only if the value of kptr_restric is zero
342 
344  //copy the kernel symbol table
345  int ret = copy_kallsyms();
346  TMSG(LINUX_PERF, "copy_kallsyms result: %d", ret);
347  }
348 
349  perf_mmap_init();
350 
351  // initialize sigset to contain PERF_SIGNAL
352  sigset_t sig_mask;
353  sigemptyset(&sig_mask);
354  sigaddset(&sig_mask, PERF_SIGNAL);
355 
356  // arrange to block monitor shootdown signal while in perf_event_handler
357  // FIXME: this assumes that monitor's shootdown signal is SIGRTMIN+8
358  struct sigaction perf_sigaction;
359  sigemptyset(&perf_sigaction.sa_mask);
360  sigaddset(&perf_sigaction.sa_mask, SIGRTMIN+8);
361  perf_sigaction.sa_flags = 0;
362 
363  monitor_sigaction(PERF_SIGNAL, &perf_event_handler, 0, &perf_sigaction);
364  monitor_real_pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
365 }
366 
367 
368 
369 
370 //----------------------------------------------------------
371 // initialize an event
372 // event_num: event number
373 // name: name of event (has to be recognized by perf event)
374 // threshold: sampling threshold
375 //----------------------------------------------------------
376 static bool
378 {
379  // ask sys to "create" the event
380  // it returns -1 if it fails.
381  et->fd = perf_event_open(&(event->attr),
383  TMSG(LINUX_PERF, "event fd: %d, skid: %d, code: %d, type: %d, period: %d, freq: %d",
384  et->fd, event->attr.precise_ip, event->attr.config,
385  event->attr.type, event->attr.sample_freq, event->attr.freq);
386 
387  // check if perf_event_open is successful
388  if (et->fd < 0) {
389  EMSG("linux perf event open failed"
390  " fd: %d, skid: %d,"
391  " config: %d, type: %d, sample_freq: %d,"
392  " freq: %d, error: %s",
393  et->fd, event->attr.precise_ip,
394  event->attr.config, event->attr.type, event->attr.sample_freq,
395  event->attr.freq, strerror(errno));
396  return false;
397  }
398 
399  // create mmap buffer for this file
400  et->mmap = set_mmap(et->fd);
401 
402  // make sure the file i/o is asynchronous
403  int flag = fcntl(et->fd, F_GETFL, 0);
404  int ret = fcntl(et->fd, F_SETFL, flag | O_ASYNC );
405  if (ret == -1) {
406  EMSG("can't set notification for event fd: %d: %s",
407  et->fd, strerror(errno));
408  }
409 
410  // need to set perf_signal to this file descriptor
411  // to avoid poll_hup in the signal handler
412  ret = fcntl(et->fd, F_SETSIG, PERF_SIGNAL);
413  if (ret == -1) {
414  EMSG("can't set signal for event fd: %d: %s",
415  et->fd, strerror(errno));
416  }
417 
418  // set file descriptor owner to this specific thread
419  struct f_owner_ex owner;
420  owner.type = F_OWNER_TID;
421  owner.pid = syscall(SYS_gettid);
422  ret = fcntl(et->fd, F_SETOWN_EX, &owner);
423  if (ret == -1) {
424  EMSG("can't set thread owner for event fd: %d: %s",
425  et->fd, strerror(errno));
426  }
427 
428  ret = ioctl(et->fd, PERF_EVENT_IOC_RESET, 0);
429  if (ret == -1) {
430  EMSG("can't reset event fd: %d: %s",
431  et->fd, strerror(errno));
432  }
433  return (ret >= 0);
434 }
435 
436 
437 //----------------------------------------------------------
438 // actions when the program terminates:
439 // - unmap the memory
440 // - close file descriptors used by each event
441 //----------------------------------------------------------
442 static void
443 perf_thread_fini(int nevents, event_thread_t *event_thread)
444 {
445  // suppress perf signal while we shut down perf monitoring
446  sigset_t perf_sigset;
447  sigemptyset(&perf_sigset);
448  sigaddset(&perf_sigset, PERF_SIGNAL);
449  monitor_real_pthread_sigmask(SIG_BLOCK, &perf_sigset, NULL);
450 
451  for(int i=0; i<nevents; i++) {
452  if (!event_thread) {
453  continue; // in some situations, it is possible a shutdown signal is delivered
454  // while hpcrun is in the middle of abort.
455  // in this case, all information is null and we shouldn't
456  // start profiling.
457  }
458  if (event_thread[i].fd >= 0) {
459  close(event_thread[i].fd);
460  event_thread[i].fd = PERF_FD_FINALIZED;
461  }
462 
463  if (event_thread[i].mmap) {
464  perf_unmmap(event_thread[i].mmap);
465  event_thread[i].mmap = 0;
466  }
467  }
468 
469  // consume any pending perf signals for this thread
470  for (;;) {
471  siginfo_t siginfo;
472  // negative return value means no signals left pending
473  if (sigtimedwait(&perf_sigset, &siginfo, &nowait) < 0) break;
474  }
475 }
476 
477 
478 // ---------------------------------------------
479 // get the index of the file descriptor
480 // ---------------------------------------------
481 
482 static int
483 get_fd_index(int nevents, int fd, event_thread_t *event_thread)
484 {
485  for(int i=0; i<nevents; i++) {
486  if (event_thread[i].fd == fd)
487  return i;
488  }
489  return -1;
490 }
491 
492 /***
493  * record a sample.
494  * output: pointer to sample node sv if successful
495  *
496  * return 0 is the record is not valid
497  * return metric value if successful
498  */
499 static double
501  void* context, int metric, int freq, sample_val_t *sv)
502 {
503  if (current == NULL)
504  return 0.0;
505 
506  // ----------------------------------------------------------------------------
507  // for event with frequency, we need to increase the counter by its period
508  // sampling taken by perf event kernel
509  // ----------------------------------------------------------------------------
510  uint64_t metric_inc = 1;
511  if (freq==1 && mmap_data->period > 0)
512  metric_inc = mmap_data->period;
513 
514  // ----------------------------------------------------------------------------
515  // record time enabled and time running
516  // if the time enabled is not the same as running time, then it's multiplexed
517  // ----------------------------------------------------------------------------
518  u64 time_enabled = current->mmap->time_enabled;
519  u64 time_running = current->mmap->time_running;
520 
521  // ----------------------------------------------------------------------------
522  // the estimate count = raw_count * scale_factor
523  // = metric_inc * time_enabled / time running
524  // ----------------------------------------------------------------------------
525  double scale_f = (double) time_enabled / time_running;
526 
527  // for period-based sampling with no multiplexing, there is no need to adjust
528  // the scale. also for software event. for them, the value of time_enabled
529  // and time_running are incorrect (the ratio is less than 1 which doesn't make sense)
530 
531  if (scale_f < 1.0)
532  scale_f = 1.0;
533 
534  // if PERF_SAMPLE_WEIGHT is enabled, we need to consider the counter with the weight
535  // to emphasize its costness
536 
537  int weight = (mmap_data->weight < 1 ? 1 : mmap_data->weight);
538 
539  double counter = scale_f * metric_inc * (double) weight;
540 
541  // ----------------------------------------------------------------------------
542  // set additional information for the metric description
543  // ----------------------------------------------------------------------------
545  metric_aux_info_t *info_aux = &(td->core_profile_trace_data.perf_event_info[metric]);
546 
547  // check if this event is multiplexed. we need to notify the user that a multiplexed
548  // event is not accurate at all.
549  // note: perf event can report the scale to be close to 1 (like 1.02 or 0.99).
550  // we need to use a range of value to see if it's multiplexed or not
551  info_aux->is_multiplexed |= (scale_f>PERF_MULTIPLEX_RANGE);
552 
553  // case of multiplexed or frequency-based sampling, we need to store the mean and
554  // the standard deviation of the sampling period
555  info_aux->num_samples++;
556  const double delta = counter - info_aux->threshold_mean;
557  info_aux->threshold_mean += delta / info_aux->num_samples;
558 
559  // ----------------------------------------------------------------------------
560  // update the cct and add callchain if necessary
561  // ----------------------------------------------------------------------------
562  sampling_info_t info;
563 
564  info.sample_clock = 0;
566 #if kernel_sampling_enabled
568 #else
570 #endif
571  info.sample_custom_cct.data_aux = mmap_data;
572 
573  *sv = hpcrun_sample_callpath(context, metric,
574  (hpcrun_metricVal_t) {.r=counter},
575  0/*skipinner*/, 0/*issync*/, &info);
576 
577  blame_shift_apply(metric, sv->sample_node, counter /*metricincr*/);
578 
579  return counter;
580 }
581 
586 static size_t
587 exist_precise_ip_modifier(const char *original_event)
588 {
589  size_t len = strlen(original_event);
590  int is_precise = 0;
591 
592  if (len > 2) {
593  // precise_ip modifier is either :p or :p at the end
594  is_precise = (original_event[len-2] == ':') &&
595  (original_event[len-1] == 'p' || original_event[len-1] == 'p');
596 
597  if (is_precise)
598  return len-2;
599  }
600  return 0;
601 }
602 
603 /******************************************************************************
604  * method functions
605  *****************************************************************************/
606 
607 // --------------------------------------------------------------------------
608 // event occurs when the sample source is initialized
609 // this method is called first before others
610 // --------------------------------------------------------------------------
611 static void
613 {
614  TMSG(LINUX_PERF, "%d: init", self->sel_idx);
615 
616  pfmu_init();
617 
618  perf_util_init();
619 
620  // checking the option of multiplexing:
621  // the env variable is set by hpcrun or by user (case for static exec)
622 
623  self->state = INIT;
624 
625  // init events
628  memcentric_init();
629 
630  TMSG(LINUX_PERF, "%d: init ok", self->sel_idx);
631 }
632 
633 
634 // --------------------------------------------------------------------------
635 // when a new thread is created and has been started
636 // this method is called after "start"
637 // --------------------------------------------------------------------------
638 static void
639 METHOD_FN(thread_init)
640 {
641  TMSG(LINUX_PERF, "%d: thread init", self->sel_idx);
642 
643  TMSG(LINUX_PERF, "%d: thread init ok", self->sel_idx);
644 }
645 
646 
647 // --------------------------------------------------------------------------
648 // start of the thread
649 // --------------------------------------------------------------------------
650 static void
651 METHOD_FN(thread_init_action)
652 {
653  TMSG(LINUX_PERF, "%d: thread init action", self->sel_idx);
654 
655  TMSG(LINUX_PERF, "%d: thread init action ok", self->sel_idx);
656 }
657 
658 
659 // --------------------------------------------------------------------------
660 // start of application thread
661 // --------------------------------------------------------------------------
662 static void
663 METHOD_FN(start)
664 {
665  TMSG(LINUX_PERF, "%d: start", self->sel_idx);
666 
667  source_state_t my_state = TD_GET(ss_state)[self->sel_idx];
668 
669  // make LINUX_PERF start idempotent. the application can turn on sampling
670  // anywhere via the start-stop interface, so we can't control what
671  // state LINUX_PERF is in.
672 
673  if (my_state == START) {
674  TMSG(LINUX_PERF,"%d: *note* LINUX_PERF start called when already in state start",
675  self->sel_idx);
676  return;
677  }
678 
679  int nevents = (self->evl).nevents;
680  event_thread_t *et = (event_thread_t *)TD_GET(ss_info)[self->sel_idx].ptr;
681 
682  // enable all perf_events
683  perf_start_all(nevents, et);
684 
686  td->ss_state[self->sel_idx] = START;
687 
688  TMSG(LINUX_PERF, "%d: start ok", self->sel_idx);
689 }
690 
691 // --------------------------------------------------------------------------
692 // end of thread
693 // --------------------------------------------------------------------------
694 static void
695 METHOD_FN(thread_fini_action)
696 {
697  TMSG(LINUX_PERF, "%d: unregister thread", self->sel_idx);
698 
699  METHOD_CALL(self, stop); // stop the sample source
700 
701  event_thread_t *event_thread = TD_GET(ss_info)[self->sel_idx].ptr;
702  int nevents = self->evl.nevents;
703 
704  perf_thread_fini(nevents, event_thread);
705 
706  self->state = UNINIT;
707 
708  TMSG(LINUX_PERF, "%d: unregister thread ok", self->sel_idx);
709 }
710 
711 
712 // --------------------------------------------------------------------------
713 // end of the application
714 // --------------------------------------------------------------------------
715 static void
717 {
718  TMSG(LINUX_PERF, "%d: stop", self->sel_idx);
719 
720  source_state_t my_state = TD_GET(ss_state)[self->sel_idx];
721  if (my_state == STOP) {
722  TMSG(LINUX_PERF,"%d: *note* perf stop called when already in state stop",
723  self->sel_idx);
724  return;
725  }
726 
727  if (my_state != START) {
728  TMSG(LINUX_PERF,"%d: *warning* perf stop called when not in state start",
729  self->sel_idx);
730  return;
731  }
732 
733  event_thread_t *event_thread = TD_GET(ss_info)[self->sel_idx].ptr;
734  int nevents = self->evl.nevents;
735 
736  perf_stop_all(nevents, event_thread);
737 
739  td->ss_state[self->sel_idx] = STOP;
740 
741  TMSG(LINUX_PERF, "%d: stop ok", self->sel_idx);
742 }
743 
744 // --------------------------------------------------------------------------
745 // really end
746 // --------------------------------------------------------------------------
747 static void
748 METHOD_FN(shutdown)
749 {
750  TMSG(LINUX_PERF, "shutdown");
751 
752  METHOD_CALL(self, stop); // stop the sample source
753 
754  event_thread_t *event_thread = TD_GET(ss_info)[self->sel_idx].ptr;
755  int nevents = self->evl.nevents;
756 
757  perf_thread_fini(nevents, event_thread);
758 
759  self->state = UNINIT;
760 
761  TMSG(LINUX_PERF, "shutdown ok");
762 }
763 
764 
765 // --------------------------------------------------------------------------
766 // return true if linux perf recognizes the name, whether supported or not.
767 // we'll handle unsupported events later.
768 // --------------------------------------------------------------------------
769 static bool
770 METHOD_FN(supports_event, const char *ev_str)
771 {
772  TMSG(LINUX_PERF, "supports event %s", ev_str);
773 
774  if (self->state == UNINIT){
775  METHOD_CALL(self, init);
776  }
777 
778  // extract the event name and the threshold (unneeded in this phase)
779  long thresh;
780  char *ev_tmp;
781 
782  // check if the user specifies explicitly precise event
783  perf_skid_parse_event(ev_str, &ev_tmp);
784 
785  hpcrun_extract_ev_thresh(ev_tmp, strlen(ev_tmp), ev_tmp, &thresh, 0) ;
786 
787  // check if the event is a predefined event
788  if (event_custom_find(ev_tmp) != NULL) {
789  free(ev_tmp);
790  return true;
791  }
792 
793  size_t precise_ip_pos = exist_precise_ip_modifier(ev_tmp);
794  if (precise_ip_pos > 0) {
795  ev_tmp[precise_ip_pos] = '\0';
796  }
797  // this is not a predefined event, we need to consult to perfmon (if enabled)
798  bool retval = pfmu_isSupported(ev_tmp) >= 0;
799  free(ev_tmp);
800  return retval;
801 }
802 
803 
804 
805 // --------------------------------------------------------------------------
806 // handle a list of events
807 // --------------------------------------------------------------------------
808 static void
809 METHOD_FN(process_event_list, int lush_metrics)
810 {
811  TMSG(LINUX_PERF, "process event list");
812 
814  char *event;
815  char *evlist = METHOD_CALL(self, get_event_str);
816  int i=0;
817 
818  struct event_threshold_s default_threshold;
819  perf_util_get_default_threshold( &default_threshold );
820 
821  // ----------------------------------------------------------------------
822  // for each perf's event, create the metric descriptor which will be used later
823  // during thread initialization for perf event creation
824  // ----------------------------------------------------------------------
825  for (event = start_tok(evlist); more_tok(); event = next_tok(), i++) {
826  char *name;
827  long threshold = 1;
828 
829  TMSG(LINUX_PERF,"checking event spec = %s",event);
830 
831  perf_skid_parse_event(event, &name);
832  int period_type = hpcrun_extract_ev_thresh(name, strlen(name), name, &threshold,
833  default_threshold.threshold_num);
834 
835  // ------------------------------------------------------------
836  // need a special case if we have our own customized predefined event
837  // this "customized" event will use one or more perf events
838  // ------------------------------------------------------------
839 
840  if (event_custom_create_event(self, name) > 0) {
841  continue;
842  }
843 
844  // remove precise_ip modifier from event's name when necessary
845  size_t precise_ip_pos = exist_precise_ip_modifier(name);
846  if (precise_ip_pos > 0) {
847  name[precise_ip_pos] = '\0';
848  }
849 
851  struct perf_event_attr *event_attr = &event->attr;
852 
853  int ispmu = perf_get_pmu_support(name, event_attr);
854  if (ispmu < 0)
855  // case for unknown event
856  // it is impossible to be here, unless the code is buggy
857  continue;
858 
859  bool is_period = (period_type == PERIOD_THRESHOLD);
860 
861  // ------------------------------------------------------------
862  // initialize the generic perf event attributes for this event
863  // all threads and file descriptor will reuse the same attributes.
864  // ------------------------------------------------------------
865  perf_util_attr_init(name, event_attr, is_period, threshold, 0);
866 
867  if (precise_ip_pos>0) {
868  perf_skid_set_max_precise_ip(event_attr);
869  }
870  // ------------------------------------------------------------
871  // initialize the property of the metric
872  // if the metric's name has "cycles" it mostly a cycle metric
873  // this assumption is not true, but it's quite closed
874  // ------------------------------------------------------------
875 
876  if (strcasestr(name, "cycles") != NULL) {
877  prop = metric_property_cycles;
879  } else {
880  prop = metric_property_none;
881  }
882 
883  char *name_dup = strdup(name); // we need to duplicate the name of the metric until the end
884  // since the os will free it, we don't have to do it in hpcrun
885  int metric = hpcrun_new_metric();
886 
887  // ------------------------------------------------------------
888  // if we use frequency (event_type=1) then the period is not deterministic,
889  // it can change dynamically. in this case, the period is 1
890  // ------------------------------------------------------------
891  if (!is_period) {
892  threshold = 1;
893  }
894  metric_desc_t *metric_desc = hpcrun_set_metric_info_and_period(metric, name_dup,
895  MetricFlags_ValFmt_Real, threshold, prop);
896 
897  if (metric_desc == NULL) {
898  EMSG("error: unable to create metric #%d: %s", index, name);
899  } else {
900  metric_desc->is_frequency_metric = (event->attr.freq == 1);
901  }
902 
903  int index = METHOD_CALL(self, store_event_and_info,
904  event_attr->config, threshold, metric, event);;
905  if (index < 0) {
906  EMSG("error: cannot create event %s (%d)", name, event_attr->config);
907  }
908  free(name);
909  }
910 
911  int nevents = self->evl.nevents;
912  if (nevents > 0)
913  perf_init();
914 }
915 
916 
917 // --------------------------------------------------------------------------
918 // --------------------------------------------------------------------------
919 static void
920 METHOD_FN(gen_event_set, int lush_metrics)
921 {
922  TMSG(LINUX_PERF, "gen_event_set");
923 
924  int nevents = self->evl.nevents;
925  int num_metrics = hpcrun_get_num_metrics();
926 
927  // -------------------------------------------------------------------------
928  // TODO: we need to fix this allocation.
929  // there is no need to allocate a memory if we are reusing thread data
930  // -------------------------------------------------------------------------
931 
932  // a list of event information, private for each thread
933  event_thread_t *event_thread = (event_thread_t*) hpcrun_malloc(sizeof(event_thread_t) * nevents);
934 
935  // allocate and initialize perf_event additional metric info
936 
937  size_t mem_metrics_size = num_metrics * sizeof(metric_aux_info_t);
938  metric_aux_info_t* aux_info = (metric_aux_info_t*) hpcrun_malloc(mem_metrics_size);
939  memset(aux_info, 0, mem_metrics_size);
940 
942 
944  td->ss_info[self->sel_idx].ptr = event_thread;
945 
946  // setup all requested events
947  // if an event cannot be initialized, we still keep it in our list
948  // but there will be no samples
949  for (int i=0; i<nevents; i++)
950  {
951  // initialize this event. If it's valid, we set the metric for the event
952  event_info_t *event_desc = (event_info_t*) self->evl.events[i].event_info;
953  if (!perf_thread_init( event_desc, &(event_thread[i])) ) {
954  metric_desc_t *mdesc = hpcrun_id2metric(i);
955  EEMSG("Failed to initialize event %d (%s): %s", i, mdesc->name, strerror(errno));
956  exit(1);
957  }
958  }
959 
960  TMSG(LINUX_PERF, "gen_event_set OK");
961 }
962 
963 
964 // --------------------------------------------------------------------------
965 // list events
966 // --------------------------------------------------------------------------
967 static void
968 METHOD_FN(display_events)
969 {
970  event_custom_display(stdout);
971 
972  display_header(stdout, "Available Linux perf events");
973 
975  printf("\n");
976 }
977 
978 
979 // --------------------------------------------------------------------------
980 // read a counter from the file descriptor,
981 // and returns the value of the counter
982 // Note: this function is used for debugging purpose in gdb
983 // --------------------------------------------------------------------------
984 long
985 read_fd(int fd)
986 {
987  char buffer[1024];
988  if (fd <= 0)
989  return 0;
990 
991  size_t t = read(fd, buffer, 1024);
992  if (t>0) {
993  return atoi(buffer);
994  }
995  return -1;
996 }
997 
998 
999 
1000 /***************************************************************************
1001  * object
1002  ***************************************************************************/
1003 
1004 #define ss_name linux_perf
1005 #define ss_cls SS_HARDWARE
1006 #define ss_sort_order 60
1007 
1008 #include "sample-sources/ss_obj.h"
1009 
1010 // ---------------------------------------------
1011 // signal handler
1012 // ---------------------------------------------
1013 
1014 static int
1016  int sig,
1017  siginfo_t* siginfo,
1018  void* context
1019 )
1020 {
1022 
1023  // ----------------------------------------------------------------------------
1024  // check #0:
1025  // if the interrupt came while inside our code, then drop the sample
1026  // and return and avoid the potential for deadlock.
1027  // ----------------------------------------------------------------------------
1028 
1029  void *pc = hpcrun_context_pc(context);
1030 
1031  if (! hpcrun_safe_enter_async(pc)) {
1033 
1035 
1036  return 0; // tell monitor that the signal has been handled
1037  }
1038 
1039  // ----------------------------------------------------------------------------
1040  // disable all counters
1041  // ----------------------------------------------------------------------------
1042 
1043  sample_source_t *self = &obj_name();
1044  event_thread_t *event_thread = TD_GET(ss_info)[self->sel_idx].ptr;
1045 
1046  int nevents = self->evl.nevents;
1047 
1048  // if finalized already, refuse to handle any more samples
1049  if (perf_was_finalized(nevents, event_thread)) {
1051 
1052  return 0; // tell monitor that the signal has been handled
1053  }
1054 
1055  perf_stop_all(nevents, event_thread);
1056 
1057  // ----------------------------------------------------------------------------
1058  // check #1: check if signal generated by kernel for profiling
1059  // ----------------------------------------------------------------------------
1060 
1061  if (siginfo->si_code < 0) {
1062  TMSG(LINUX_PERF, "signal si_code %d < 0 indicates not from kernel",
1063  siginfo->si_code);
1064  perf_start_all(nevents, event_thread);
1065 
1067 
1068  return 1; // tell monitor the signal has not been handled
1069  }
1070 
1071  // ----------------------------------------------------------------------------
1072  // check #2:
1073  // if sampling disabled explicitly for this thread, skip all processing
1074  // ----------------------------------------------------------------------------
1077 
1078  return 0; // tell monitor that the signal has been handled
1079  }
1080 
1081  int fd = siginfo->si_fd;
1082 
1083  // ----------------------------------------------------------------------------
1084  // check #3: we expect only POLL_HUP, not POLL_IN
1085  // Sometimes we have signal code other than POll_HUP
1086  // and still has a valid information (x86 on les).
1087  // ----------------------------------------------------------------------------
1088 #if 0
1089  if (siginfo->si_code != POLL_HUP) {
1090  TMSG(LINUX_PERF, "signal si_code %d (fd: %d) not generated by signal %d",
1091  siginfo->si_code, siginfo->si_fd, PERF_SIGNAL);
1092 
1093  restart_perf_event(fd);
1094  perf_start_all(nevents, event_thread);
1095 
1097 
1098  return 0; // tell monitor the signal has not been handled.
1099  }
1100 #endif
1101 
1102  // ----------------------------------------------------------------------------
1103  // check #4:
1104  // check the index of the file descriptor (if we have multiple events)
1105  // if the file descriptor is not on the list, we shouldn't store the
1106  // metrics. Perhaps we should throw away?
1107  // ----------------------------------------------------------------------------
1108 
1109  int event_index = get_fd_index(nevents, fd, event_thread);
1110  event_thread_t *current = &(event_thread[event_index]);
1111 
1112  if (current == NULL) {
1113  // signal not from perf event
1114  TMSG(LINUX_PERF, "signal si_code %d with fd %d: unknown perf event",
1115  siginfo->si_code, fd);
1116  hpcrun_safe_exit();
1117 
1118  perf_start_all(nevents, event_thread);
1119 
1121 
1122  return 1; // tell monitor the signal has not been handled
1123  }
1124 
1125  // ----------------------------------------------------------------------------
1126  // parse the buffer until it finishes reading all buffers
1127  // ----------------------------------------------------------------------------
1128  event_info_t *event_info = (event_info_t *)self->evl.events[event_index].event_info;
1129  struct perf_event_attr *attr = &event_info->attr;
1130 
1131  int metric = self->evl.events[event_index].metric_id;
1132  int more_data = 0;
1133 
1134  do {
1135  perf_mmap_data_t mmap_data;
1136  memset(&mmap_data, 0, sizeof(perf_mmap_data_t));
1137 
1138  // reading info from mmapped buffer
1139  more_data = read_perf_buffer(current->mmap, attr, &mmap_data);
1140 
1141  sample_val_t sv;
1142  memset(&sv, 0, sizeof(sample_val_t));
1143 
1144  event_info_t *event_info = (event_info_t*) self->evl.events[event_index].event_info;
1145 
1146  if (mmap_data.header_type == PERF_RECORD_SAMPLE) {
1147 
1148  double val = record_sample(current, &mmap_data, context,
1149  metric, event_info->attr.freq, &sv);
1150 
1151  event_handler_arg_t arg;
1152  arg.context = context;
1153  arg.current = event_info;
1154  arg.data = &mmap_data;
1155  arg.metric = metric;
1156  arg.sample = &sv;
1157  arg.metric_value = val;
1158 
1159  event_custom_handler(&arg);
1160  }
1161 
1162  } while (more_data);
1163 
1164  perf_start_all(nevents, event_thread);
1165 
1166  hpcrun_safe_exit();
1167 
1169 
1170  return 0; // tell monitor that the signal has been handled
1171 }
1172 
#define GROUP_FD
struct event_info_s * current
Definition: event_custom.h:64
static void perf_thread_fini(int nevents, event_thread_t *event_thread)
Definition: linux_perf.c:443
bool is_frequency_metric
Definition: hpcrun-fmt.h:381
int read_perf_buffer(pe_mmap_t *current_perf_mmap, struct perf_event_attr *attr, perf_mmap_data_t *mmap_info)
Definition: perf_mmap.c:430
#define obj_name()
Definition: ss_obj.h:71
source_state_t
metric_aux_info_t * perf_event_info
static const struct timespec nowait
Definition: linux_perf.c:196
static struct perf_mem_metric metric
Definition: pmu_x86.c:114
static void perf_stop_all(int nevents, event_thread_t *event_thread)
Definition: linux_perf.c:257
void MONITOR_EXT_WRAP_NAME() free(void *ptr)
struct cct_custom_update_s sample_custom_cct
Definition: sample_event.h:121
#define PERF_SIGNAL
Definition: linux_perf.c:156
int perf_skid_set_max_precise_ip(struct perf_event_attr *attr)
Definition: perf_skid.c:176
pe_mmap_t * mmap
Definition: perf-util.h:153
static void hpcrun_safe_exit(void)
void memcentric_init()
Definition: memaddress.c:192
struct perf_event_attr attr
Definition: perf-util.h:138
cct_node_t *(* hpcrun_cct_update_after_t)(cct_node_t *path, void *data_aux)
sample_val_t hpcrun_sample_callpath(void *context, int metricId, hpcrun_metricVal_t metricIncr, int skipInner, int isSync, sampling_info_t *data)
Definition: sample_event.c:160
#define HPCTOOLKIT_APPLICATION_ERRNO_RESTORE()
Definition: ss-errno.h:64
void perf_unmmap(pe_mmap_t *mmap)
Definition: perf_mmap.c:527
const char * hpcrun_files_output_directory()
Definition: files.c:413
uint64_t sample_clock
Definition: sample_event.h:120
void blame_shift_source_register(bs_type bst)
Definition: blame-shift.c:25
int OSUtil_setCustomKernelName(char *buffer, size_t max_chars)
Definition: OSUtil.c:198
hpcrun_cct_update_after_t update_after_fn
static void perf_start_all(int nevents, event_thread_t *event_thread)
Definition: linux_perf.c:236
#define PERF_MULTIPLEX_RANGE
Definition: linux_perf.c:162
pe_mmap_t * set_mmap(int perf_fd)
Definition: perf_mmap.c:498
static size_t exist_precise_ip_modifier(const char *original_event)
Definition: linux_perf.c:587
static bool perf_thread_init(event_info_t *event, event_thread_t *et)
Definition: linux_perf.c:377
void datacentric_init()
Definition: datacentric.c:333
cct_node_t * sample_node
Definition: sample_event.h:96
int hpcrun_get_num_metrics()
Definition: metrics.c:209
#define PERF_FD_FINALIZED
Definition: linux_perf.c:172
void blame_shift_apply(int metric_id, cct_node_t *node, int metric_incr)
Definition: blame-shift.c:15
void hpcrun_stats_num_samples_blocked_async_inc(void)
Definition: hpcrun_stats.c:148
bool perf_util_is_ksym_available()
Definition: perf-util.c:339
cct_node_t * perf_util_add_kernel_callchain(cct_node_t *leaf, void *data_aux)
Definition: perf-util.c:358
char * next_tok(void)
Definition: tokenize.c:87
metric_desc_t * hpcrun_set_metric_info_and_period(int metric_id, const char *name, MetricFlags_ValFmt_t valFmt, size_t period, metric_desc_properties_t prop)
Definition: metrics.c:411
static int get_fd_index(int nevents, int fd, event_thread_t *event_thread)
Definition: linux_perf.c:483
uint64_t num_samples
Definition: hpcfmt.h:349
#define PERF_FLAGS
long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
int pfmu_isSupported(const char *eventname)
exit
Definition: names.cpp:1
int event_custom_handler(event_handler_arg_t *args)
Definition: event_custom.c:151
static void perf_init()
Definition: linux_perf.c:338
#define CPU_ANY
#define EMSG
Definition: messages.h:70
bool is_multiplexed
Definition: hpcfmt.h:346
source_state_t * ss_state
Definition: thread_data.h:150
static void METHOD_FN(init)
Definition: linux_perf.c:612
#define THREAD_SELF
#define metric_property_cycles
Definition: hpcrun-fmt.h:201
struct metric_aux_info_s metric_aux_info_t
double threshold_mean
Definition: hpcfmt.h:347
void perf_util_get_default_threshold(struct event_threshold_s *threshold)
Definition: perf-util.c:347
char * start_tok(char *lst)
Definition: tokenize.c:70
int event_custom_create_event(sample_source_t *self, char *name)
Definition: event_custom.c:136
int lush_metrics
Definition: main.c:188
core_profile_trace_data_t core_profile_trace_data
Definition: thread_data.h:168
void perf_util_init()
Definition: perf-util.c:303
#define DEFAULT_COMPRESSION
Definition: linux_perf.c:166
struct perf_mmap_data_s * data
Definition: event_custom.h:65
int pfmu_init()
void * hpcrun_malloc(size_t size)
Definition: mem.c:275
int pfmu_showEventList()
int perf_skid_parse_event(const char *event_string, char **event_string_without_skidmarks)
Definition: perf_skid.c:243
#define TD_GET(field)
Definition: thread_data.h:256
event_custom_t * event_custom_find(const char *name)
static int copy_kallsyms()
Definition: linux_perf.c:287
int perf_util_attr_init(const char *event_name, struct perf_event_attr *attr, bool usePeriod, u64 threshold, u64 sampletype)
Definition: perf-util.c:403
#define LINUX_KERNEL_SYMBOL_FILE
Definition: linux_info.h:8
hpcrun_cct_update_before_t update_before_fn
static char output_directory[PATH_MAX]
Definition: files.c:167
static int perf_get_pmu_support(const char *name, struct perf_event_attr *event_attr)
Definition: linux_perf.c:274
#define TMSG(f,...)
Definition: messages.h:93
__u64 u64
void event_custom_display(FILE *std)
Definition: event_custom.c:86
int hpcrun_extract_ev_thresh(const char *in, int evlen, char *ev, long *th, long def)
Definition: tokenize.c:157
static int hpcrun_safe_enter_async(void *pc)
int mkdir(const char *dir)
Definition: FileUtil.cpp:289
#define METHOD_CALL(obj, meth,...)
Definition: simple_oo.h:87
ssize_t MONITOR_EXT_WRAP_NAME() read(int fd, void *buf, size_t count)
Definition: io-over.c:152
__thread bool hpcrun_thread_suppress_sample
Definition: main.c:193
#define EEMSG(...)
Definition: messages.h:90
#define NULL
Definition: ElfHelper.cpp:85
static int perf_was_finalized(int nevents, event_thread_t *event_thread)
Definition: linux_perf.c:223
void perf_mmap_init()
Definition: perf_mmap.c:537
static int const threshold
int hpcrun_new_metric(void)
Definition: metrics.c:333
static double record_sample(event_thread_t *current, perf_mmap_data_t *mmap_data, void *context, int metric, int freq, sample_val_t *sv)
Definition: linux_perf.c:500
#define HPCTOOLKIT_APPLICATION_ERRNO_SAVE()
Definition: ss-errno.h:63
void display_header(FILE *output, const char *title)
Definition: display.c:127
long read_fd(int fd)
Definition: linux_perf.c:985
void * hpcrun_context_pc(void *context)
#define KERNEL_SYMBOLS_DIRECTORY
Definition: linux_info.h:14
#define PERIOD_THRESHOLD
Definition: linux_perf.c:168
static int perf_event_handler(int sig, siginfo_t *siginfo, void *context)
Definition: linux_perf.c:1015
source_info_t * ss_info
Definition: thread_data.h:151
void kernel_blocking_init()
int more_tok(void)
Definition: tokenize.c:78
enum compress_e compress_deflate(FILE *source, FILE *dest, int level)
metric_desc_t * hpcrun_id2metric(int metric_id)
Definition: metrics.c:251
thread_data_t *(* hpcrun_get_thread_data)(void)
Definition: thread_data.c:168
int pfmu_getEventAttribute(const char *eventname, struct perf_event_attr *event_attr)
#define metric_property_none
Definition: hpcrun-fmt.h:202
sample_val_t * sample
Definition: event_custom.h:62