HPCToolkit
pmu_x86.c
Go to the documentation of this file.
1 // -*-Mode: C++;-*- // technically C99
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2018, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 #include <linux/version.h>
48 
50 #include <utilities/arch/cpuid.h>
51 
57 
58 #include "datacentric.h"
59 #include "pmu_x86.h"
60 
61 
62 /******************************************************************************
63  * data structure
64  *****************************************************************************/
65 
66 typedef union perf_mem_data_src perf_mem_data_src_t;
67 
70 
71  u32 locks; /* count of 'lock' transactions */
72  u32 store; /* count of all stores in trace */
73  u32 st_uncache; /* stores to uncacheable address */
74  u32 st_noadrs; /* cacheable store with no address */
75  u32 st_l1hit; /* count of stores that hit L1D */
76  u32 st_l1miss; /* count of stores that miss L1D */
77  u32 load; /* count of all loads in trace */
78  u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
79  u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
80  u32 ld_uncache; /* loads to uncacheable address */
81  u32 ld_io; /* loads to io address */
82  u32 ld_miss; /* loads miss */
83  u32 ld_noadrs; /* cacheable load with no address */
84  u32 ld_fbhit; /* count of loads hitting Fill Buffer */
85  u32 ld_l1hit; /* count of loads that hit L1D */
86  u32 ld_l2hit; /* count of loads that hit L2D */
87  u32 ld_llchit; /* count of loads that hit LLC */
88  u32 lcl_hitm; /* count of loads with local HITM */
89  u32 rmt_hitm; /* count of loads with remote HITM */
90  u32 tot_hitm; /* count of loads with local and remote HITM */
91  u32 rmt_hit; /* count of loads with remote hit clean; */
92  u32 lcl_dram; /* count of loads miss to local DRAM */
93  u32 rmt_dram; /* count of loads miss to remote DRAM */
94  u32 nomap; /* count of load/stores with no phys adrs */
95  u32 noparse; /* count of unparsable data sources */
96 };
97 
99  int memload;
101 
102  int memstore;
105 
107 };
108 
109 
110 /******************************************************************************
111  * local variables
112  *****************************************************************************/
113 
114 static struct perf_mem_metric metric;
115 
116 /******************************************************************************
117  * PRIVATE Function implementation
118  *****************************************************************************/
119 
120 #define P(a, b) PERF_MEM_##a##_##b
121 
122 
123 static void
124 datacentric_record_metric(int metric_id, cct_node_t *cct_node, cct_node_t *cct_datacentric,
125  cct_metric_data_t value)
126 {
127  cct_metric_data_increment(metric_id, cct_node, value);
128  cct_metric_data_increment(metric_id, cct_datacentric, value);
129 
131  metric_aux_info_t *info_aux = &(td->core_profile_trace_data.perf_event_info[metric_id]);
132  info_aux->num_samples++;
133 }
134 
135 static void
137  perf_mem_data_src_t *data_src)
138 {
139  struct perf_data_src_mem_lvl_s data_mem;
140 
141  u64 lvl = data_src->mem_lvl;
142  u64 snoop = data_src->mem_snoop;
143 
144  memset(&data_mem, 0, sizeof(struct perf_data_src_mem_lvl_s));
145 
146  // ---------------------------------------------------
147  // number of load operations
148  // ---------------------------------------------------
149  cct_metric_data_t value = (cct_metric_data_t){.i = 1};
150  datacentric_record_metric(metric.memload, node, datacentric_node, value );
151 
152  // ---------------------------------------------------
153  // local load hit
154  // ---------------------------------------------------
155  if ( lvl & P(LVL, HIT) ) {
156 
157  if (lvl & P(LVL, UNC)) data_mem.ld_uncache++; // uncached memory
158  if (lvl & P(LVL, IO)) data_mem.ld_io++; // I/O memory
159  if (lvl & P(LVL, LFB)) data_mem.ld_fbhit++; // life fill buffer
160  if (lvl & P(LVL, L1 )) data_mem.ld_l1hit++; // level 1 cache
161  if (lvl & P(LVL, L2 )) data_mem.ld_l2hit++; // level 2 cache
162  if (lvl & P(LVL, L3 )) { // level 3 cache
163  if (snoop & P(SNOOP, HITM))
164  data_mem.lcl_hitm++; // loads with local HITM
165  else
166  data_mem.ld_llchit++; // loads that hit LLC
167  }
168 
169  if (lvl & P(LVL, LOC_RAM)) {
170  data_mem.lcl_dram++; // loads miss to local DRAM
171  if (snoop & P(SNOOP, HIT))
172  data_mem.ld_shared++; // shared loads, rmt/lcl DRAM - snp hit
173  else
174  data_mem.ld_excl++; // exclusive loads, rmt/lcl DRAM - snp none/miss
175  }
176 
177  if ((lvl & P(LVL, REM_RAM1)) ||
178  (lvl & P(LVL, REM_RAM2))) {
179 
180  data_mem.rmt_dram++; // loads miss to remote DRAM
181  if (snoop & P(SNOOP, HIT))
182  data_mem.ld_shared++;
183  else
184  data_mem.ld_excl++;
185  }
186  }
187 
188  // ---------------------------------------------------
189  // remote load hit
190  // ---------------------------------------------------
191  if ((lvl & P(LVL, REM_CCE1)) ||
192  (lvl & P(LVL, REM_CCE2))) {
193  if (snoop & P(SNOOP, HIT)) {
194  data_mem.rmt_hit++;
195  }
196  else if (snoop & P(SNOOP, HITM)) {
197  data_mem.rmt_hitm++;
198  data_mem.tot_hitm++;
199  }
200  }
201 
202  // ---------------------------------------------------
203  // llc miss
204  // ---------------------------------------------------
205  u64 llc_miss = data_mem.lcl_dram + data_mem.rmt_dram +
206  data_mem.rmt_hit + data_mem.rmt_hitm ;
207  if (llc_miss > 0) {
208  value.i = llc_miss;
209  datacentric_record_metric(metric.memllc_miss, node, datacentric_node, value);
210  }
211 
212  // ---------------------------------------------------
213  // load miss
214  // ---------------------------------------------------
215  if ((lvl & P(LVL, MISS))) {
216  value.i = 1;
217  datacentric_record_metric(metric.memload_miss, node, datacentric_node, value);
218  }
219 }
220 
221 static void
223  perf_mem_data_src_t *data_src)
224 {
225  struct perf_data_src_mem_lvl_s data_mem;
226 
227  memset(&data_mem, 0, sizeof(struct perf_data_src_mem_lvl_s));
228 
229  cct_metric_data_t value = (cct_metric_data_t){.i = 1};
230  datacentric_record_metric(metric.memstore, node, datacentric_node, value);
231 
232  u64 lvl = data_src->mem_lvl;
233 
234  if (lvl & P(LVL, HIT)) {
235  if (lvl & P(LVL, UNC)) data_mem.st_uncache++;
236  if (lvl & P(LVL, L1 )) {
237  data_mem.st_l1hit++;
238  datacentric_record_metric(metric.memstore_l1_hit, node, datacentric_node,
239  value);
240  }
241  }
242  if (lvl & P(LVL, MISS))
243  if (lvl & P(LVL, L1)) {
244  data_mem.st_l1miss++;
245  datacentric_record_metric(metric.memstore_l1_miss, node, datacentric_node,
246  value);
247  }
248 }
249 
250 static void
252 {
253  // ------------------------------------------
254  // Memory load metric
255  // ------------------------------------------
258 
259  // ------------------------------------------
260  // Memory store metric
261  // ------------------------------------------
263  hpcrun_set_metric_info(metric.memstore, "MEM-Store");
264 
266  hpcrun_set_metric_info(metric.memstore_l1_hit, "MEM-Store-L1hit");
267 
269  hpcrun_set_metric_info(metric.memstore_l1_miss, "MEM-Store-L1miss");
270 
271  // ------------------------------------------
272  // Memory load miss metric
273  // ------------------------------------------
275  hpcrun_set_metric_info(metric.memload_miss, "MEM-Load-miss");
276 
277  // ------------------------------------------
278  // Memory llc load metric
279  // ------------------------------------------
281  hpcrun_set_metric_info(metric.memllc_miss, "MEM-LLC-miss");
282 }
283 
284 
285 // called when a sample occurs
286 void
288  cct_node_t *datacentric_node,
289  cct_node_t *sample_node)
290 {
291  if (mmap_data->data_src == 0) {
292  return ;
293  }
294 
295  // ---------------------------------------------------------
296  // data source information exist:
297  // - add metrics about load and store of the memory
298  // ---------------------------------------------------------
299 
300  perf_mem_data_src_t data_src = (perf_mem_data_src_t)mmap_data->data_src;
301 
302  if (data_src.mem_op & P(OP, LOAD)) {
303  datacentric_record_load_mem( datacentric_node, sample_node, &data_src );
304  }
305  if (data_src.mem_op & P(OP, STORE)) {
306  datacentric_record_store_mem( datacentric_node, sample_node, &data_src );
307  }
308 }
309 
310 
311 // called to create events by the main datacentric plugin
312 int
314  struct event_threshold_s *period)
315 {
316  int size = sizeof(pmu_events)/sizeof(struct pmu_config_s);
317  u64 sample_type = PERF_SAMPLE_CALLCHAIN
318  | PERF_SAMPLE_PERIOD | PERF_SAMPLE_TIME
319  | PERF_SAMPLE_IP | PERF_SAMPLE_ADDR
320  | PERF_SAMPLE_CPU | PERF_SAMPLE_TID
321 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
322  | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_WEIGHT
323 #endif
324  ;
325 
326  int num_pmu = 0;
327  cpu_type_t cpu_type = get_cpuid();
328 
329  for(int i=0; i<size ; i++) {
330 
331  if (pmu_events[i].cpu != cpu_type)
332  continue;
333 
334  struct perf_event_attr event_attr;
335  memset(&event_attr, 0, sizeof(event_attr));
336 
337  if (pfmu_getEventAttribute(pmu_events[i].event, &event_attr) < 0) {
338  EMSG("Cannot initialize event %s", pmu_events[i].event);
339  continue;
340  }
341 
342  //set_default_perf_event_attr(event_attr, period);
343  bool is_period = period->threshold_type == PERIOD;
344  perf_util_attr_init(pmu_events[i].event, &event_attr, is_period, period->threshold_num, sample_type);
345  perf_skid_set_max_precise_ip(&event_attr);
346 
347  num_pmu++;
348 
349  // ------------------------------------------
350  // create metric data centric
351  // ------------------------------------------
352  int metric = hpcrun_new_metric();
354  metric, pmu_events[i].event,
356 
357  // ------------------------------------------
358  // Register the event to the global list
359  // ------------------------------------------
361  einfo->metric_custom = event;
362  memcpy(&einfo->attr, &event_attr, sizeof(struct perf_event_attr));
363 
364  METHOD_CALL(self, store_event_and_info,
365  event_attr.config, /* event id */
366  1, /* threshold */
367  metric, /* metric id */
368  einfo /* info pointer */ );
369 
370  }
371  if (num_pmu > 0)
373 
374  return num_pmu;
375 }
#define OP(x)
static void create_metric_addons()
Definition: pmu_x86.c:251
metric_aux_info_t * perf_event_info
static struct perf_mem_metric metric
Definition: pmu_x86.c:114
int memstore_l1_miss
Definition: pmu_x86.c:104
int perf_skid_set_max_precise_ip(struct perf_event_attr *attr)
Definition: perf_skid.c:176
struct perf_event_attr attr
Definition: perf-util.h:138
cct_node_t * node
Definition: cct.c:128
struct event_custom_s * metric_custom
Definition: perf-util.h:139
static void cct_metric_data_increment(int metric_id, cct_node_t *x, cct_metric_data_t incr)
Definition: cct2metrics.h:86
cpu_type_t
Definition: cpuid.h:62
struct pmu_config_s pmu_events[]
Definition: pmu_x86.h:63
metric_desc_t * hpcrun_set_metric_info_and_period(int metric_id, const char *name, MetricFlags_ValFmt_t valFmt, size_t period, metric_desc_properties_t prop)
Definition: metrics.c:411
uint64_t num_samples
Definition: hpcfmt.h:349
static void datacentric_record_metric(int metric_id, cct_node_t *cct_node, cct_node_t *cct_datacentric, cct_metric_data_t value)
Definition: pmu_x86.c:124
#define EMSG
Definition: messages.h:70
#define P(a, b)
Definition: pmu_x86.c:120
cpu_type_t get_cpuid()
Definition: cpuid.c:52
int memload_miss
Definition: pmu_x86.c:100
int memstore_l1_hit
Definition: pmu_x86.c:103
__u32 u32
enum threshold_e threshold_type
Definition: perf-util.h:75
core_profile_trace_data_t core_profile_trace_data
Definition: thread_data.h:168
void * hpcrun_malloc(size_t size)
Definition: mem.c:275
static void datacentric_record_load_mem(cct_node_t *node, cct_node_t *datacentric_node, perf_mem_data_src_t *data_src)
Definition: pmu_x86.c:136
int perf_util_attr_init(const char *event_name, struct perf_event_attr *attr, bool usePeriod, u64 threshold, u64 sampletype)
Definition: perf-util.c:403
__u64 u64
int datacentric_hw_register(sample_source_t *self, event_custom_t *event, struct event_threshold_s *period)
Definition: pmu_x86.c:313
void datacentric_hw_handler(perf_mmap_data_t *mmap_data, cct_node_t *datacentric_node, cct_node_t *sample_node)
Definition: pmu_x86.c:287
#define METHOD_CALL(obj, meth,...)
Definition: simple_oo.h:87
static __thread u32 cpu
Definition: cct.c:96
hpcrun_metricVal_t cct_metric_data_t
Definition: metrics.h:73
int hpcrun_new_metric(void)
Definition: metrics.c:333
metric_desc_t * hpcrun_set_metric_info(int metric_id, const char *name)
Definition: metrics.c:423
union perf_mem_data_src perf_mem_data_src_t
Definition: pmu_x86.c:66
static long period
Definition: itimer.c:194
thread_data_t *(* hpcrun_get_thread_data)(void)
Definition: thread_data.c:168
int pfmu_getEventAttribute(const char *eventname, struct perf_event_attr *event_attr)
#define metric_property_none
Definition: hpcrun-fmt.h:202
static void datacentric_record_store_mem(cct_node_t *node, cct_node_t *datacentric_node, perf_mem_data_src_t *data_src)
Definition: pmu_x86.c:222