HPCToolkit
perf-util.c
Go to the documentation of this file.
1 // -*-Mode: C++;-*- // technically C99
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // --------------------------------------------------------------------------
6 // Part of HPCToolkit (hpctoolkit.org)
7 //
8 // Information about sources of support for research and development of
9 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
10 // --------------------------------------------------------------------------
11 //
12 // Copyright ((c)) 2002-2019, Rice University
13 // All rights reserved.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met:
18 //
19 // * Redistributions of source code must retain the above copyright
20 // notice, this list of conditions and the following disclaimer.
21 //
22 // * Redistributions in binary form must reproduce the above copyright
23 // notice, this list of conditions and the following disclaimer in the
24 // documentation and/or other materials provided with the distribution.
25 //
26 // * Neither the name of Rice University (RICE) nor the names of its
27 // contributors may be used to endorse or promote products derived from
28 // this software without specific prior written permission.
29 //
30 // This software is provided by RICE and contributors "as is" and any
31 // express or implied warranties, including, but not limited to, the
32 // implied warranties of merchantability and fitness for a particular
33 // purpose are disclaimed. In no event shall RICE or contributors be
34 // liable for any direct, indirect, incidental, special, exemplary, or
35 // consequential damages (including, but not limited to, procurement of
36 // substitute goods or services; loss of use, data, or profits; or
37 // business interruption) however caused and on any theory of liability,
38 // whether in contract, strict liability, or tort (including negligence
39 // or otherwise) arising in any way out of the use of this software, even
40 // if advised of the possibility of such damage.
41 //
42 // ******************************************************* EndRiceCopyright *
43 
44 
45 /******************************************************************************
46  * includes
47  *****************************************************************************/
48 
49 #include <linux/version.h>
50 #include <ctype.h>
51 
52 
53 /******************************************************************************
54  * local includes
55  *****************************************************************************/
56 
59 
60 #include <lib/support-lean/OSUtil.h> // hostid
61 
62 #include <include/linux_info.h>
63 #include "perf-util.h"
64 #include "perf_skid.h"
65 
66 
67 #define HPCRUN_OPTION_PERF_COUNT "HPCRUN_PERF_COUNT"
68 
69 // default the number of samples per second
70 // linux perf tool has default of 4000. It looks very high but
71 // visually the overhead is still small for them.
72 // however, for some machines, the overhead is significant, and
73 // somehow it causes the kernel to adjust the period threshold to
74 // less than 100.
75 // 300 samples per sec looks has relatively similar percentage
76 // with perf tool
77 #define DEFAULT_THRESHOLD 300
78 #define MAX_BUFFER_LINUX_KERNEL 128
79 
80 
81 //******************************************************************************
82 // constants
83 //******************************************************************************
84 
85 const u64 anomalous_ip = 0xffffffffffffff80;
86 
87 
88 //******************************************************************************
89 // typedef, structure or enum
90 //******************************************************************************
91 
92 
93 //******************************************************************************
94 // local variables
95 //******************************************************************************
96 
97 static uint16_t perf_kernel_lm_id = 0;
98 
100 
102 
103 
104 //******************************************************************************
105 // forward declaration
106 //******************************************************************************
107 
108 
109 
110 //******************************************************************************
111 // implementation
112 //******************************************************************************
113 
114 #if KERNEL_SAMPLING_ENABLED
115 
117 
118 /***
119  * if the input is a retained (leaf) cct node, return a sibling
120  * non-retained proxy.
121  */
122 static cct_node_t *
125 )
126 {
127  if (!hpcrun_cct_retained(node)) return node;
128 
129  // node is retained. the caller of this routine would make it an
130  // interior node in the cct, which would cause trouble for hpcprof
131  // and hpctraceviewer. instead, use a sibling with that represents
132  // the machine code offset +1.
133 
134  // extract the abstract address in the node
136 
137  // create an abstract address representing the next machine code address
138  cct_addr_t sibling_addr = *addr;
139  sibling_addr.ip_norm.lm_ip++;
140 
141  // get the necessary sibling to node
143  &sibling_addr);
144 
145  return sibling;
146 }
147 
148 
149 /***
150  * insert a cct node for a PC in a kernel call path
151  */
152 static cct_node_t *
154  uint16_t lm_id,
155  cct_node_t *parent,
156  u64 ip
157 )
158 {
159  parent = perf_split_retained_node(parent);
160 
161  ip_normalized_t npc;
162  memset(&npc, 0, sizeof(ip_normalized_t));
163  npc.lm_id = lm_id;
164  npc.lm_ip = ip;
165 
166  cct_addr_t frm;
167  memset(&frm, 0, sizeof(cct_addr_t));
168  frm.ip_norm = npc;
169 
170  return hpcrun_cct_insert_addr(parent, &frm);
171 }
172 
173 
174 /***
175  * retrieve the value of kptr_restrict
176  */
177 static int
179 {
180  static int privilege = -1;
181 
182  if (privilege >= 0)
183  return privilege;
184 
185  FILE *fp = fopen(LINUX_KERNEL_KPTR_RESTICT, "r");
186  if (fp != NULL) {
187  fscanf(fp, "%d", &privilege);
188  fclose(fp);
189  }
190  return privilege;
191 }
192 
193 
194 
195 static uint16_t
197 {
199  // ensure that this is initialized only once per process
200  spinlock_lock(&perf_lock);
201  if (perf_kernel_lm_id == 0) {
202 
203  // in case of kptr_restrict = 0, we want to copy kernel symbol for each node
204  // if the value of kptr_restric != 0, all nodes has <vmlinux> module, and
205  // all calls to the kernel will be from address zero
206 
207  if (perf_util_get_kptr_restrict() == 0) {
208 
209  char buffer[MAX_BUFFER_LINUX_KERNEL];
212 
213  } else {
215 
216  }
217  }
218  spinlock_unlock(&perf_lock);
219  }
220  return perf_kernel_lm_id;
221 }
222 
223 //----------------------------------------------------------
224 // testing perf availability
225 //----------------------------------------------------------
226 static int
228 {
229  FILE *pe_paranoid = fopen(LINUX_PERF_EVENTS_FILE, "r");
230  FILE *ksyms = fopen(LINUX_KERNEL_SYMBOL_FILE, "r");
231 
232  int level = 3; // default : no access to perf event
233 
234  if (ksyms != NULL && pe_paranoid != NULL) {
235  fscanf(pe_paranoid, "%d", &level) ;
236  }
237  if (ksyms) fclose(ksyms);
238  if (pe_paranoid) fclose(pe_paranoid);
239 
240  return level;
241 }
242 #endif
243 
244 
245 
246 //----------------------------------------------------------
247 // returns the maximum sample rate of this node
248 // based on info provided by LINUX_PERF_EVENTS_MAX_RATE file
249 //----------------------------------------------------------
250 static int
252 {
253  static int initialized = 0;
254  static int max_sample_rate = HPCRUN_DEFAULT_SAMPLE_RATE; // unless otherwise limited
255  if (!initialized) {
256  FILE *perf_rate_file = fopen(LINUX_PERF_EVENTS_MAX_RATE, "r");
257 
258  if (perf_rate_file != NULL) {
259  fscanf(perf_rate_file, "%d", &max_sample_rate);
260  fclose(perf_rate_file);
261  }
262  initialized = 1;
263  }
264  return max_sample_rate;
265 }
266 
267 
268 /***
269  * (1) ensure that the default rate for frequency-based sampling is below the maximum.
270  * (2) if the environment variable HPCRUN_PERF_COUNT is set, use it to set the threshold
271  */
272 static void
274 {
275  static int initialized = 0;
276 
277  if (!initialized) {
278  int max_rate_m1 = perf_util_get_max_sample_rate() - 1;
279  if (default_threshold.threshold_num > max_rate_m1) {
280  default_threshold.threshold_num = max_rate_m1;
281  }
282  const char *val_str = getenv("HPCRUN_PERF_COUNT");
283  if (val_str != NULL) {
284  TMSG(LINUX_PERF, "HPCRUN_PERF_COUNT = %s", val_str);
285  int res = hpcrun_extract_threshold(val_str, &default_threshold.threshold_num, max_rate_m1);
286  if (res == 1) {
287  default_threshold.threshold_type = PERIOD;
288  }
289  }
290  initialized = 1;
291  }
292  TMSG(LINUX_PERF, "default threshold = %d", default_threshold.threshold_num);
293 }
294 
295 /*************************************************************
296  * Interface API
297  **************************************************************/
298 
299 //----------------------------------------------------------
300 // initialize perf_util. Need to be called as earliest as possible
301 //----------------------------------------------------------
302 void
304 {
306 
307  // perf_kernel_lm_id must be set for each process. here, we clear it
308  // because it is too early to allocate a load module. it will be set
309  // later, exactly once per process if ksym_status == PERF_AVAILABLE.
310  perf_kernel_lm_id = 0;
311 
312  // if kernel symbols are available, we will attempt to collect kernel
313  // callchains and add them to our call paths
314 
316 
317  // Conditions for the sample to include kernel if:
318  // 1. kptr_restric = 0 (zero)
319  // 2. paranoid_level < 2 (zero or one)
320  // 3. linux version > 3.7
321 
322 #if KERNEL_SAMPLING_ENABLED
323  int level = perf_util_kernel_syms_avail();
324  int krestrict = perf_util_get_kptr_restrict();
325 
326  if (krestrict == 0 && (level == 0 || level == 1)) {
328  }
329 #endif
330 }
331 
332 
333 //----------------------------------------------------------
334 // Interface to see if the kernel symbol is available
335 // this function caches the value so that we don't need
336 // enquiry the same question all the time.
337 //----------------------------------------------------------
338 bool
340 {
341  return (ksym_status == PERF_AVAILABLE);
342 }
343 
344 
345 
346 void
348 {
349  threshold->threshold_num = default_threshold.threshold_num;
350  threshold->threshold_type = default_threshold.threshold_type;
351 }
352 
353 #if KERNEL_SAMPLING_ENABLED
354 //----------------------------------------------------------
355 // extend a user-mode callchain with kernel frames (if any)
356 //----------------------------------------------------------
357 cct_node_t *
359  cct_node_t *leaf,
360  void *data_aux
361 )
362 {
363  cct_node_t *parent = leaf;
364 
365  if (data_aux == NULL) {
366  return parent;
367  }
368 
369  perf_mmap_data_t *data = (perf_mmap_data_t*) data_aux;
370  if (data->nr > 0) {
371  uint16_t kernel_lm_id = perf_get_kernel_lm_id();
372 
373  // bug #44 https://github.com/HPCToolkit/hpctoolkit/issues/44
374  // if no kernel symbols are available, collapse the kernel call
375  // chain into a single node
376  if (perf_util_get_kptr_restrict() != 0) {
377  return perf_insert_cct(kernel_lm_id, parent, 0);
378  }
379 
380  // add kernel IPs to the call chain top down, which is the
381  // reverse of the order in which they appear in ips[]
382  for (int i = data->nr - 1; i > 0; i--) {
383  parent = perf_insert_cct(kernel_lm_id, parent, data->ips[i]);
384  }
385 
386  // check ip[0] before adding as it often seems seems to be anomalous
387  if (data->ips[0] != anomalous_ip) {
388  parent = perf_insert_cct(kernel_lm_id, parent, data->ips[0]);
389  }
390  }
391  return parent;
392 }
393 #endif
394 
395 
396 
397 //----------------------------------------------------------
398 // generic default initialization for event attributes
399 // return true if the initialization is successful,
400 // false otherwise.
401 //----------------------------------------------------------
402 int
404  const char *event_name,
405  struct perf_event_attr *attr,
406  bool usePeriod, u64 threshold,
407  u64 sampletype
408 )
409 {
410  // by default, we always ask for sampling period information
411  // some PMUs is sensitive to the sample type.
412  // For instance, IDLE-CYCLES-BACKEND will fail if we set PERF_SAMPLE_ADDR.
413  // By default, we need to initialize sample_type as minimal as possible.
414  unsigned int sample_type = sampletype
415  | PERF_SAMPLE_PERIOD | PERF_SAMPLE_TIME;
416 
417  attr->size = sizeof(struct perf_event_attr); /* Size of attribute structure */
418  attr->freq = (usePeriod ? 0 : 1);
419 
420  attr->sample_period = threshold; /* Period or frequency of sampling */
421  int max_sample_rate = perf_util_get_max_sample_rate();
422 
423  if (attr->freq == 1 && threshold >= max_sample_rate) {
424  int our_rate = max_sample_rate - 1;
425  EMSG("WARNING: Lowered specified sample rate %d to %d, below max sample rate of %d.",
426  threshold, our_rate, max_sample_rate);
427  attr->sample_period = our_rate;
428  }
429 
430  attr->disabled = 1; /* the counter will be enabled later */
431  attr->sample_type = sample_type;
432  attr->exclude_kernel = EXCLUDE;
433  attr->exclude_hv = EXCLUDE;
434 
435  attr->exclude_kernel = EXCLUDE;
436  attr->exclude_hv = EXCLUDE;
437 
438 #if KERNEL_SAMPLING_ENABLED
439  attr->exclude_callchain_user = EXCLUDE_CALLCHAIN;
440  attr->exclude_callchain_kernel = EXCLUDE_CALLCHAIN;
441 #endif
442 
444  /* We have rights to record and interpret kernel callchains */
445 #if KERNEL_SAMPLING_ENABLED
446  attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
447  attr->exclude_callchain_kernel = INCLUDE_CALLCHAIN;
448 #endif
449  attr->exclude_kernel = INCLUDE;
450  }
451 
452  char *name;
453  int precise_ip_type = precise_ip_type = perf_skid_parse_event(event_name, &name);
454  free(name);
455 
456  u64 precise_ip = 0;
457 
458  switch (precise_ip_type) {
460  precise_ip = perf_skid_set_max_precise_ip(attr);
461  break;
464  // check the HPCRUN_PRECISE_IP env variable
465  precise_ip = perf_skid_get_precise_ip(attr);
466  break;
467  default:
468  precise_ip = precise_ip_type;
469  }
470 
471  attr->precise_ip = precise_ip;
472 
473  return true;
474 }
475 
int OSUtil_setCustomKernelNameWrap(char *buffer, size_t max_chars)
Definition: OSUtil.c:208
#define LINUX_KERNEL_NAME
Definition: linux_info.h:4
static cct_node_t * perf_split_retained_node(cct_node_t *node)
Definition: perf-util.c:123
static uint16_t perf_kernel_lm_id
Definition: perf-util.c:97
void MONITOR_EXT_WRAP_NAME() free(void *ptr)
int perf_skid_set_max_precise_ip(struct perf_event_attr *attr)
Definition: perf_skid.c:176
#define LINUX_PERF_EVENTS_MAX_RATE
Definition: linux_info.h:10
static int perf_util_kernel_syms_avail()
Definition: perf-util.c:227
#define HPCRUN_DEFAULT_SAMPLE_RATE
#define PERF_EVENT_SKID_ERROR
Definition: perf_skid.h:54
static void spinlock_unlock(spinlock_t *l)
Definition: spinlock.h:96
static int perf_util_get_kptr_restrict()
Definition: perf-util.c:178
static struct event_threshold_s default_threshold
Definition: perf-util.c:101
#define PERF_EVENT_AUTODETECT_SKID
Definition: perf_skid.h:49
#define INCLUDE_CALLCHAIN
#define LINUX_KERNEL_KPTR_RESTICT
Definition: linux_info.h:11
cct_node_t * node
Definition: cct.c:128
uint16_t hpcrun_loadModule_add(const char *name)
Definition: loadmap.c:476
uintptr_t lm_ip
Definition: ip-normalized.h:78
bool perf_util_is_ksym_available()
Definition: perf-util.c:339
cct_node_t * hpcrun_cct_parent(cct_node_t *x)
Definition: cct.c:357
cct_node_t * perf_util_add_kernel_callchain(cct_node_t *leaf, void *data_aux)
Definition: perf-util.c:358
#define EXCLUDE_CALLCHAIN
const u64 anomalous_ip
Definition: perf-util.c:85
cct_node_t * hpcrun_cct_insert_addr(cct_node_t *node, cct_addr_t *frm)
Definition: cct.c:405
static uint16_t perf_get_kernel_lm_id()
Definition: perf-util.c:196
#define PERF_EVENT_SKID_ARBITRARY
Definition: perf_skid.h:53
#define EMSG
Definition: messages.h:70
void perf_util_get_default_threshold(struct event_threshold_s *threshold)
Definition: perf-util.c:347
enum threshold_e threshold_type
Definition: perf-util.h:75
void perf_util_init()
Definition: perf-util.c:303
ip_normalized_t ip_norm
Definition: cct_addr.h:66
int perf_skid_parse_event(const char *event_string, char **event_string_without_skidmarks)
Definition: perf_skid.c:243
#define INCLUDE
static void spinlock_lock(spinlock_t *l)
Definition: spinlock.h:111
int perf_util_attr_init(const char *event_name, struct perf_event_attr *attr, bool usePeriod, u64 threshold, u64 sampletype)
Definition: perf-util.c:403
#define LINUX_KERNEL_SYMBOL_FILE
Definition: linux_info.h:8
int hpcrun_cct_retained(cct_node_t *x)
Definition: cct.c:576
#define TMSG(f,...)
Definition: messages.h:93
__u64 u64
static enum perf_ksym_e ksym_status
Definition: perf-util.c:99
u64 ips[MAX_CALLCHAIN_FRAMES]
Definition: perf-util.h:97
static cct_node_t * perf_insert_cct(uint16_t lm_id, cct_node_t *parent, u64 ip)
Definition: perf-util.c:153
static int perf_util_get_max_sample_rate()
Definition: perf-util.c:251
#define LINUX_PERF_EVENTS_FILE
Definition: linux_info.h:9
#define DEFAULT_THRESHOLD
Definition: perf-util.c:77
#define NULL
Definition: ElfHelper.cpp:85
#define EXCLUDE
#define MAX_BUFFER_LINUX_KERNEL
Definition: perf-util.c:78
Definition: cct.c:96
static spinlock_t perf_lock
Definition: perf-util.c:116
static int const threshold
cct_addr_t * addr
Definition: cct.c:130
int hpcrun_extract_threshold(const char *input_string, long *threshold, long default_value)
Definition: tokenize.c:103
u64 perf_skid_get_precise_ip(struct perf_event_attr *attr)
Definition: perf_skid.c:207
#define SPINLOCK_UNLOCKED
Definition: spinlock.h:84
static void set_default_threshold()
Definition: perf-util.c:273
cct_addr_t * hpcrun_cct_addr(cct_node_t *node)
Definition: cct.c:369