HPCToolkit
perf_mmap.c
Go to the documentation of this file.
1 // -*-Mode: C++;-*- // technically C99
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // --------------------------------------------------------------------------
6 // Part of HPCToolkit (hpctoolkit.org)
7 //
8 // Information about sources of support for research and development of
9 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
10 // --------------------------------------------------------------------------
11 //
12 // Copyright ((c)) 2002-2019, Rice University
13 // All rights reserved.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met:
18 //
19 // * Redistributions of source code must retain the above copyright
20 // notice, this list of conditions and the following disclaimer.
21 //
22 // * Redistributions in binary form must reproduce the above copyright
23 // notice, this list of conditions and the following disclaimer in the
24 // documentation and/or other materials provided with the distribution.
25 //
26 // * Neither the name of Rice University (RICE) nor the names of its
27 // contributors may be used to endorse or promote products derived from
28 // this software without specific prior written permission.
29 //
30 // This software is provided by RICE and contributors "as is" and any
31 // express or implied warranties, including, but not limited to, the
32 // implied warranties of merchantability and fitness for a particular
33 // purpose are disclaimed. In no event shall RICE or contributors be
34 // liable for any direct, indirect, incidental, special, exemplary, or
35 // consequential damages (including, but not limited to, procurement of
36 // substitute goods or services; loss of use, data, or profits; or
37 // business interruption) however caused and on any theory of liability,
38 // whether in contract, strict liability, or tort (including negligence
39 // or otherwise) arising in any way out of the use of this software, even
40 // if advised of the possibility of such damage.
41 //
42 // ******************************************************* EndRiceCopyright *
43 
44 //
45 // Linux perf mmaped-buffer reading interface
46 //
47 
48 /******************************************************************************
49  * system includes
50  *****************************************************************************/
51 
52 #include <assert.h>
53 #include <errno.h>
54 #include <sys/mman.h>
55 #include <string.h>
56 #include <unistd.h>
57 
58 /******************************************************************************
59  * linux specific includes
60  *****************************************************************************/
61 
62 #include <linux/perf_event.h>
63 #include <linux/version.h>
64 
65 
66 /******************************************************************************
67  * hpcrun includes
68  *****************************************************************************/
69 
71 
72 /******************************************************************************
73  * local include
74  *****************************************************************************/
75 
76 #include "perf_mmap.h"
77 #include "perf-util.h"
78 #include "perf_barrier.h"
79 
80 /******************************************************************************
81  * Constants
82  *****************************************************************************/
83 
84 #define MMAP_OFFSET_0 0
85 
86 #define PERF_DATA_PAGE_EXP 1 // use 2^PERF_DATA_PAGE_EXP pages
87 #define PERF_DATA_PAGES (1 << PERF_DATA_PAGE_EXP)
88 
89 #define PERF_MMAP_SIZE(pagesz) ((pagesz) * (PERF_DATA_PAGES + 1))
90 #define PERF_TAIL_MASK(pagesz) (((pagesz) * PERF_DATA_PAGES) - 1)
91 
92 
93 
94 /******************************************************************************
95  * forward declarations
96  *****************************************************************************/
97 
98 static void
99 skip_perf_data(pe_mmap_t *current_perf_mmap, size_t sz)
100 __attribute__ ((unused));
101 
102 
103 
104 /******************************************************************************
105  * local variables
106  *****************************************************************************/
107 
108 static int pagesize = 0;
109 static size_t tail_mask = 0;
110 
111 
112 /******************************************************************************
113  * local methods
114  *****************************************************************************/
115 
116 
117 
118 static u64
120 {
121  u64 head = hdr->data_head;
122  rmb(); // required by the man page to issue a barrier for SMP-capable platforms
123  return head;
124 }
125 
126 /***
127  * number of reminder data in the buffer
128  */
129 static int
131 {
132  u64 head = perf_mmap_read_head(hdr);
133  return (head - hdr->data_tail);
134 }
135 
136 /***
137  * return true if we have more data to read
138  */
139 static int
141 {
142  return (num_of_more_perf_data(hdr) > 0);
143 }
144 
145 
146 //----------------------------------------------------------
147 // read from perf_events mmap'ed buffer
148 //----------------------------------------------------------
149 
150 static int
152  pe_mmap_t *current_perf_mmap,
153  void *buf,
154  size_t bytes_wanted
155 )
156 {
157  if (current_perf_mmap == NULL)
158  return -1;
159 
160  // front of the circular data buffer
161  char *data = BUFFER_FRONT(current_perf_mmap);
162 
163  // compute bytes available in the circular buffer
164  u64 data_head = perf_mmap_read_head(current_perf_mmap);
165 
166  size_t bytes_available = data_head - current_perf_mmap->data_tail;
167 
168  if (bytes_wanted > bytes_available) return -1;
169 
170  // compute offset of tail in the circular buffer
171  unsigned long tail = BUFFER_OFFSET(current_perf_mmap->data_tail);
172 
173  long bytes_at_right = BUFFER_SIZE - tail;
174 
175  // bytes to copy to the right of tail
176  size_t right = bytes_at_right < bytes_wanted ? bytes_at_right : bytes_wanted;
177 
178  // copy bytes from tail position
179  memcpy(buf, data + tail, right);
180 
181  // if necessary, wrap and continue copy from left edge of buffer
182  if (bytes_wanted > right) {
183  size_t left = bytes_wanted - right;
184  memcpy(buf + right, data, left);
185  }
186 
187  // update tail after consuming bytes_wanted
188  current_perf_mmap->data_tail += bytes_wanted;
189 
190  return 0;
191 }
192 
193 
194 static inline int
196  pe_mmap_t *current_perf_mmap,
197  pe_header_t *hdr
198 )
199 {
200  return perf_read(current_perf_mmap, hdr, sizeof(pe_header_t));
201 }
202 
203 
204 static inline int
206  pe_mmap_t *current_perf_mmap,
207  u32 *val
208 )
209 {
210  return perf_read(current_perf_mmap, val, sizeof(u32));
211 }
212 
213 
214 static inline int
216  pe_mmap_t *current_perf_mmap,
217  u64 *val
218 )
219 {
220  return perf_read(current_perf_mmap, val, sizeof(u64));
221 }
222 
223 
224 //----------------------------------------------------------
225 // special mmap buffer reading for PERF_SAMPLE_READ
226 //----------------------------------------------------------
227 static void
228 handle_struct_read_format( pe_mmap_t *perf_mmap, int read_format)
229 {
230  u64 value, id, nr, time_enabled, time_running;
231 
232  if (read_format & PERF_FORMAT_GROUP) {
233  perf_read_u64(perf_mmap, &nr);
234  } else {
235  perf_read_u64(perf_mmap, &value);
236  }
237 
238  if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
239  perf_read_u64(perf_mmap, &time_enabled);
240  }
241  if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
242  perf_read_u64(perf_mmap, &time_running);
243  }
244 
245  if (read_format & PERF_FORMAT_GROUP) {
246  for(int i=0;i<nr;i++) {
247  perf_read_u64(perf_mmap, &value);
248 
249  if (read_format & PERF_FORMAT_ID) {
250  perf_read_u64(perf_mmap, &id);
251  }
252  }
253  }
254  else {
255  if (read_format & PERF_FORMAT_ID) {
256  perf_read_u64(perf_mmap, &id);
257  }
258  }
259 }
260 
261 
262 
263 //----------------------------------------------------------
264 // processing of kernel callchains
265 //----------------------------------------------------------
266 
267 static int
268 perf_sample_callchain(pe_mmap_t *current_perf_mmap, perf_mmap_data_t* mmap_data)
269 {
270  mmap_data->nr = 0; // initialze the number of records to be 0
271  u64 num_records = 0;
272 
273  // determine how many frames in the call chain
274  if (perf_read_u64( current_perf_mmap, &num_records) == 0) {
275  if (num_records > 0) {
276 
277  // warning: if the number of frames is bigger than the storage (MAX_CALLCHAIN_FRAMES)
278  // we have to truncate them. This is not a good practice, but so far it's the only
279  // simplest solution I can come up.
280  mmap_data->nr = (num_records < MAX_CALLCHAIN_FRAMES ? num_records : MAX_CALLCHAIN_FRAMES);
281 
282  // read the IPs for the frames
283  if (perf_read( current_perf_mmap, mmap_data->ips, num_records * sizeof(u64)) != 0) {
284  // the data seems invalid
285  mmap_data->nr = 0;
286  TMSG(LINUX_PERF, "unable to read all %d frames", mmap_data->nr);
287  }
288  }
289  } else {
290  TMSG(LINUX_PERF, "unable to read the number of frames" );
291  }
292  return mmap_data->nr;
293 }
294 
295 #if 1
296 //----------------------------------------------------------
297 // part of the buffer to be skipped
298 //----------------------------------------------------------
299 static void
300 skip_perf_data(pe_mmap_t *current_perf_mmap, size_t sz)
301 {
302  struct perf_event_mmap_page *hdr = current_perf_mmap;
303  u64 data_head = perf_mmap_read_head(current_perf_mmap);
304  rmb();
305 
306  if ((hdr->data_tail + sz) > data_head)
307  sz = data_head - hdr->data_tail;
308 
309  hdr->data_tail += sz;
310 }
311 #endif
312 
318 static int
319 parse_buffer(int sample_type, pe_mmap_t *current_perf_mmap,
320  struct perf_event_attr *attr,
321  perf_mmap_data_t *mmap_info )
322 {
323  int data_read = 0;
324 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,0)
325  if (sample_type & PERF_SAMPLE_IDENTIFIER) {
326  perf_read_u64(current_perf_mmap, &mmap_info->sample_id);
327  data_read++;
328  }
329 #endif
330  if (sample_type & PERF_SAMPLE_IP) {
331  // to be used by datacentric event
332  perf_read_u64(current_perf_mmap, &mmap_info->ip);
333  data_read++;
334  }
335  if (sample_type & PERF_SAMPLE_TID) {
336  perf_read_u32(current_perf_mmap, &mmap_info->pid);
337  perf_read_u32(current_perf_mmap, &mmap_info->tid);
338  data_read++;
339  }
340  if (sample_type & PERF_SAMPLE_TIME) {
341  perf_read_u64(current_perf_mmap, &mmap_info->time);
342  data_read++;
343  }
344  if (sample_type & PERF_SAMPLE_ADDR) {
345  // to be used by datacentric event
346  perf_read_u64(current_perf_mmap, &mmap_info->addr);
347  data_read++;
348  }
349  if (sample_type & PERF_SAMPLE_ID) {
350  perf_read_u64(current_perf_mmap, &mmap_info->id);
351  data_read++;
352  }
353  if (sample_type & PERF_SAMPLE_STREAM_ID) {
354  perf_read_u64(current_perf_mmap, &mmap_info->stream_id);
355  data_read++;
356  }
357  if (sample_type & PERF_SAMPLE_CPU) {
358  perf_read_u32(current_perf_mmap, &mmap_info->cpu);
359  perf_read_u32(current_perf_mmap, &mmap_info->res);
360  data_read++;
361  }
362  if (sample_type & PERF_SAMPLE_PERIOD) {
363  perf_read_u64(current_perf_mmap, &mmap_info->period);
364  data_read++;
365  }
366  if (sample_type & PERF_SAMPLE_READ) {
367  // to be used by datacentric event
368  handle_struct_read_format(current_perf_mmap,
369  attr->read_format);
370  data_read++;
371  }
372  if (sample_type & PERF_SAMPLE_CALLCHAIN) {
373  // add call chain from the kernel
374  perf_sample_callchain(current_perf_mmap, mmap_info);
375  data_read++;
376  }
377  if (sample_type & PERF_SAMPLE_RAW) {
378  perf_read_u32(current_perf_mmap, &mmap_info->size);
379  mmap_info->data = alloca( sizeof(char) * mmap_info->size );
380  perf_read( current_perf_mmap, mmap_info->data, mmap_info->size) ;
381  data_read++;
382  }
383  if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
384  data_read++;
385  }
386 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
387  if (sample_type & PERF_SAMPLE_REGS_USER) {
388  data_read++;
389  }
390  if (sample_type & PERF_SAMPLE_STACK_USER) {
391  data_read++;
392  }
393 #endif
394 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
395  if (sample_type & PERF_SAMPLE_WEIGHT) {
396  data_read++;
397  }
398  if (sample_type & PERF_SAMPLE_DATA_SRC) {
399  perf_read_u64(current_perf_mmap, &mmap_info->data_src);
400  data_read++;
401  }
402 #endif
403 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0)
404  // only available since kernel 3.19
405  if (sample_type & PERF_SAMPLE_TRANSACTION) {
406  data_read++;
407  }
408 #endif
409 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
410  // only available since kernel 3.19
411  if (sample_type & PERF_SAMPLE_REGS_INTR) {
412  data_read++;
413  }
414 #endif
415  return data_read;
416 }
417 
418 //----------------------------------------------------------------------
419 // Public Interfaces
420 //----------------------------------------------------------------------
421 
422 
423 //----------------------------------------------------------
424 // reading mmap buffer from the kernel
425 // in/out: mmapped data of type perf_mmap_data_t.
426 // return true if there are more data to be read,
427 // false otherwise
428 //----------------------------------------------------------
429 int
430 read_perf_buffer(pe_mmap_t *current_perf_mmap,
431  struct perf_event_attr *attr, perf_mmap_data_t *mmap_info)
432 {
433  pe_header_t hdr;
434 
435  int read_successfully = perf_read_header(current_perf_mmap, &hdr);
436  if (read_successfully != 0) {
437  return 0;
438  }
439 
440  mmap_info->header_type = hdr.type;
441  mmap_info->header_misc = hdr.misc;
442 
443  if (hdr.type == PERF_RECORD_SAMPLE) {
444  if (hdr.size <= 0) {
445  return 0;
446  }
447  int sample_type = attr->sample_type;
448  parse_buffer(sample_type, current_perf_mmap, attr, mmap_info);
449 
450 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)
451  } else if (hdr.type == PERF_RECORD_SWITCH) {
452  // only available since kernel 4.3
453 
454  u64 type;
455  struct { uint32_t pid, tid; } pid;
456  struct { uint32_t cpu, reserved; } cpu;
457 
458  type = attr->sample_type;
459 
460  if (type & PERF_SAMPLE_TID) {
461  perf_read( current_perf_mmap, &pid, sizeof(pid)) ;
462  }
463 
464  if (type & PERF_SAMPLE_TIME) {
465  perf_read_u64( current_perf_mmap, &(mmap_info->context_switch_time) ) ;
466  }
467  if (type & PERF_SAMPLE_CPU) {
468  perf_read( current_perf_mmap, &cpu, sizeof(cpu) ) ;
469  }
470 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0)
471  } else if (hdr.type == PERF_RECORD_LOST_SAMPLES) {
472  u64 lost_samples;
473  perf_read_u64(current_perf_mmap, &lost_samples);
474  TMSG(LINUX_PERF, "[%d] lost samples %d",
475  current->fd, lost_samples);
476  skip_perf_data(current_perf_mmap, hdr.size-sizeof(lost_samples))
477 #endif
478 
479  } else {
480  // not a PERF_RECORD_SAMPLE nor PERF_RECORD_SWITCH
481  // skip it
482  if (hdr.size <= 0) {
483  return 0;
484  }
485  skip_perf_data(current_perf_mmap, hdr.size);
486  TMSG(LINUX_PERF, "[%d] skip header %d %d : %d bytes",
487  attr->config,
488  hdr.type, hdr.misc, hdr.size);
489  }
490 
491  return (has_more_perf_data(current_perf_mmap));
492 }
493 
494 //----------------------------------------------------------
495 // allocate mmap for a given file descriptor
496 //----------------------------------------------------------
497 pe_mmap_t*
498 set_mmap(int perf_fd)
499 {
500  if (pagesize == 0) {
501  perf_mmap_init();
502  }
503  void *map_result =
504  mmap(NULL, PERF_MMAP_SIZE(pagesize), PROT_WRITE | PROT_READ,
505  MAP_SHARED, perf_fd, MMAP_OFFSET_0);
506 
507  if (map_result == MAP_FAILED) {
508  EMSG("Linux perf mmap failed: %s", strerror(errno));
509  return NULL;
510  }
511 
512  pe_mmap_t *mmap = (pe_mmap_t *) map_result;
513 
514  memset(mmap, 0, sizeof(pe_mmap_t));
515  mmap->version = 0;
516  mmap->compat_version = 0;
517  mmap->data_head = 0;
518  mmap->data_tail = 0;
519 
520  return mmap;
521 }
522 
523 /***
524  * unmap buffer. need to call this at the end of the execution
525  */
526 void
528 {
529  munmap(mmap, PERF_MMAP_SIZE(pagesize));
530 }
531 
536 void
538 {
539  pagesize = sysconf(_SC_PAGESIZE);
540  tail_mask = PERF_TAIL_MASK(pagesize);
541 }
542 
static int perf_read_u64(pe_mmap_t *current_perf_mmap, u64 *val)
Definition: perf_mmap.c:215
int read_perf_buffer(pe_mmap_t *current_perf_mmap, struct perf_event_attr *attr, perf_mmap_data_t *mmap_info)
Definition: perf_mmap.c:430
#define BUFFER_SIZE
Definition: perf_mmap.h:66
static int pagesize
Definition: perf_mmap.c:108
#define MAX_CALLCHAIN_FRAMES
Definition: perf-util.h:65
void perf_unmmap(pe_mmap_t *mmap)
Definition: perf_mmap.c:527
static int perf_sample_callchain(pe_mmap_t *current_perf_mmap, perf_mmap_data_t *mmap_data)
Definition: perf_mmap.c:268
#define MMAP_OFFSET_0
Definition: perf_mmap.c:84
#define PERF_MMAP_SIZE(pagesz)
Definition: perf_mmap.c:89
Definition: fmt.c:108
pe_mmap_t * set_mmap(int perf_fd)
Definition: perf_mmap.c:498
static __thread u32 tid
static int perf_read_u32(pe_mmap_t *current_perf_mmap, u32 *val)
Definition: perf_mmap.c:205
#define BUFFER_FRONT(current_perf_mmap)
Definition: perf_mmap.h:65
static int has_more_perf_data(pe_mmap_t *hdr)
Definition: perf_mmap.c:140
static __thread u32 pid
struct perf_event_mmap_page pe_mmap_t
Definition: perf-util.h:144
static int perf_read(pe_mmap_t *current_perf_mmap, void *buf, size_t bytes_wanted)
Definition: perf_mmap.c:151
static size_t tail_mask
Definition: perf_mmap.c:109
static int parse_buffer(int sample_type, pe_mmap_t *current_perf_mmap, struct perf_event_attr *attr, perf_mmap_data_t *mmap_info)
Definition: perf_mmap.c:319
#define EMSG
Definition: messages.h:70
static int perf_read_header(pe_mmap_t *current_perf_mmap, pe_header_t *hdr)
Definition: perf_mmap.c:195
u64 context_switch_time
Definition: perf-util.h:122
__u32 u32
void __attribute__((weak))
Definition: hpctoolkit.c:64
#define BUFFER_OFFSET(tail)
Definition: perf_mmap.h:67
#define PERF_TAIL_MASK(pagesz)
Definition: perf_mmap.c:90
static u64 perf_mmap_read_head(pe_mmap_t *hdr)
Definition: perf_mmap.c:119
static int num_of_more_perf_data(pe_mmap_t *hdr)
Definition: perf_mmap.c:130
#define TMSG(f,...)
Definition: messages.h:93
__u64 u64
struct perf_event_header pe_header_t
Definition: perf_mmap.h:73
u64 ips[MAX_CALLCHAIN_FRAMES]
Definition: perf-util.h:97
static __thread u32 cpu
#define NULL
Definition: ElfHelper.cpp:85
void perf_mmap_init()
Definition: perf_mmap.c:537
static void handle_struct_read_format(pe_mmap_t *perf_mmap, int read_format)
Definition: perf_mmap.c:228
static void skip_perf_data(pe_mmap_t *current_perf_mmap, size_t sz) __attribute__((unused))
Definition: perf_mmap.c:300