HPCToolkit
pthread-blame.c
Go to the documentation of this file.
1 // -*-Mode: C++;-*- // technically C99
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL: $
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 //
48 // directed blame shifting for locks, critical sections, ...
49 //
50 
51 /******************************************************************************
52  * system includes
53  *****************************************************************************/
54 
55 #include <ucontext.h>
56 #include <pthread.h>
57 #include <string.h>
58 #include <dlfcn.h>
59 #include <stdbool.h>
60 
61 
62 
63 /******************************************************************************
64  * local includes
65  *****************************************************************************/
66 
67 #include "simple_oo.h"
68 #include "sample_source_obj.h"
69 #include "common.h"
70 #include "pthread-blame.h"
73 
74 #include <hpcrun/cct2metrics.h>
75 #include <hpcrun/metrics.h>
76 
77 #include <hpcrun/hpctoolkit.h>
78 #include <hpcrun/safe-sampling.h>
79 #include <hpcrun/sample_event.h>
80 #include <hpcrun/thread_data.h>
81 #include <hpcrun/cct/cct.h>
82 #include <messages/messages.h>
83 
84 
85 // *****************************************************************************
86 // macros
87 // *****************************************************************************
88 
89 #define SKIP_ONE_FRAME 1
90 
91 
92 
93 // *****************************************************************************
94 // type definitions
95 // *****************************************************************************
96 
97 typedef enum {
101 } state_t;
102 
103 
104 typedef struct {
105  uint64_t target;
107 } blame_t;
108 
109 
110 
111 // *****************************************************************************
112 // static local variables
113 // *****************************************************************************
114 
115 static int blame_metric_id = -1;
116 static int blockwait_metric_id = -1;
117 static int spinwait_metric_id = -1;
118 
120 
121 static bool lockwait_enabled = false;
122 
124 
125 static bool metric_id_set = false;
126 
127 typedef struct dbg_t {
128  struct timeval tv;
129  char l[4]; // "add" or "get"
130  uint32_t amt;
131  uint64_t obj;
132 } dbg_t;
133 
134 typedef struct dbg_tr_t {
135  unsigned n_elts;
136  dbg_t trace[3000];
137 } dbg_tr_t;
138 
139 
140 
141 // *****************************************************************************
142 // thread local variables
143 // *****************************************************************************
144 static __thread blame_t pthread_blame = {0, Running};
145 
146 
147 
148 /***************************************************************************
149  * private operations
150  ***************************************************************************/
151 
152 static inline
153 uint64_t
155 {
156  return pthread_blame.target;
157 }
158 
159 
160 static inline
161 char*
163 {
164  if (s == Running) return "Running";
165  if (s == Spinning) return "Spinning";
166  if (s == Blocked) return "Blocked";
167  return "????";
168 }
169 
170 
171 static inline
172 int
174 {
175  return (metric_id_set) ? blame_metric_id : -1;
176 }
177 
178 /*--------------------------------------------------------------------------
179  | transfer directed blame as appropriate for a sample
180  --------------------------------------------------------------------------*/
181 
182 static inline
183 void
184 add_blame(uint64_t obj, uint32_t value)
185 {
186  if (! pthread_blame_table) {
187  EMSG("Attempted to add pthread blame before initialization");
188  return;
189  }
190  blame_map_add_blame(pthread_blame_table, obj, value);
191 }
192 
193 
194 static inline
195 uint64_t
196 get_blame(uint64_t obj)
197 {
198  if (! pthread_blame_table) {
199  EMSG("Attempted to fetch pthread blame before initialization");
200  return 0;
201  }
202  return blame_map_get_blame(pthread_blame_table, obj);
203 }
204 
205 
206 static void
207 process_directed_blame_for_sample(void* arg, int metric_id, cct_node_t* node, int metric_incr)
208 {
209  TMSG(LOCKWAIT, "Processing directed blame");
210  metric_desc_t* metric_desc = hpcrun_id2metric(metric_id);
211 
212 #ifdef LOCKWAIT_FIX
213  // Only blame shift idleness for time and cycle metrics.
214  if ( ! (metric_desc->properties.time | metric_desc->properties.cycles) )
215  return;
216 #endif // LOCKWAIT_FIX
217 
218  uint32_t metric_value = (uint32_t) (metric_desc->period * metric_incr);
219 
220  uint64_t obj_to_blame = get_blame_target();
221  if(obj_to_blame) {
222  TMSG(LOCKWAIT, "about to add %d to blame object %d", metric_incr, obj_to_blame);
223  add_blame(obj_to_blame, metric_value);
224  // update appropriate wait metric as well
226  TMSG(LOCKWAIT, "about to add %d to %s-waiting in node %d",
227  metric_incr, state2str(pthread_blame.state),
230  hpcrun_metric_std_inc(wait_metric,
231  metrics,
232  (cct_metric_data_t) {.i = metric_incr});
233  }
234 }
235 
236 
237 
238 // ******************************************************************************
239 // public interface to local variables
240 // ******************************************************************************
241 
242 bool
244 {
245  return lockwait_enabled;
246 }
247 
248 //
249 // public blame manipulation functions
250 //
251 void
253 {
254  TMSG(LOCKWAIT, "Start directed blaming using blame structure %x, for obj %d",
255  &pthread_blame, (uintptr_t) obj);
256  pthread_blame = (blame_t) {.target = (uint64_t)(uintptr_t)obj,
257  .state = Blocked};
258 }
259 
260 void
262 {
263  TMSG(LOCKWAIT, "Start directed blaming using blame structure %x, for obj %d",
264  &pthread_blame, (uintptr_t) obj);
265  pthread_blame = (blame_t) {.target = (uint64_t)(uintptr_t)obj,
266  .state = Spinning};
267 }
268 
269 void
271 {
272  pthread_blame = (blame_t) {.target = 0, .state = Running};
273  TMSG(LOCKWAIT, "End directed blaming for blame structure %x",
274  &pthread_blame);
275 }
276 
277 void
279 {
280  uint64_t blame = get_blame((uint64_t) (uintptr_t) obj);
281  TMSG(LOCKWAIT, "Blame obj %d accepting %d units of blame", obj, blame);
282  if (blame && hpctoolkit_sampling_is_active()) {
283  ucontext_t uc;
284  getcontext(&uc);
287  (hpcrun_metricVal_t) {.i=blame},
288  SKIP_ONE_FRAME, 1, NULL);
290  }
291 }
292 
293 /*--------------------------------------------------------------------------
294  | sample source methods
295  --------------------------------------------------------------------------*/
296 
297 static void
299 {
300  self->state = INIT;
301 }
302 
303 
304 static void
305 METHOD_FN(thread_init)
306 {
307 }
308 
309 
310 static void
311 METHOD_FN(thread_init_action)
312 {
313 }
314 
315 
316 static void
317 METHOD_FN(start)
318 {
319  lockwait_enabled = true;
320  TMSG(LOCKWAIT, "pthread blame ss STARTED, blame table = %x", pthread_blame_table);
321 }
322 
323 
324 static void
325 METHOD_FN(thread_fini_action)
326 {
327 }
328 
329 
330 static void
332 {
333 }
334 
335 static void
336 METHOD_FN(shutdown)
337 {
338  self->state = UNINIT;
339  lockwait_enabled = false;
340 }
341 
342 
343 static bool
344 METHOD_FN(supports_event,const char *ev_str)
345 {
346  return (strstr(ev_str, PTHREAD_EVENT_NAME) != NULL);
347 }
348 
349 
350 static void
351 METHOD_FN(process_event_list, int lush_metrics)
352 {
354  bs_entry.arg = NULL;
355  bs_entry.next = NULL;
356 
357  blame_shift_register(&bs_entry);
358 
368  metric_id_set = true;
369 
370  // create & initialize blame table (once per process)
371  if (! pthread_blame_table) pthread_blame_table = blame_map_new();
372 }
373 
374 
375 static void
376 METHOD_FN(gen_event_set,int lush_metrics)
377 {
378 }
379 
380 
381 static void
382 METHOD_FN(display_events)
383 {
384  printf("===========================================================================\n");
385  printf("Available directed blame shifting preset events\n");
386  printf("===========================================================================\n");
387  printf("Name\t\tDescription\n");
388  printf("---------------------------------------------------------------------------\n");
389  printf("%s\tShift the blame for waiting for a lock to the lock holder.\n"
390  "\t\tOnly suitable for threaded programs.\n",
392  printf("\n");
393 }
394 
395 
396 /*--------------------------------------------------------------------------
397  | sample source object
398  --------------------------------------------------------------------------*/
399 
400 #include <stdio.h>
401 
402 #define ss_name directed_blame
403 #define ss_cls SS_SOFTWARE
404 #define ss_sort_order 90
405 
406 #include "ss_obj.h"
state_t
Definition: pthread-blame.c:97
static __thread blame_t pthread_blame
metric_set_t * hpcrun_reify_metric_set(cct_node_id_t cct_id)
Definition: cct2metrics.c:115
void pthread_directed_blame_shift_spin_start(void *obj)
static void hpcrun_safe_exit(void)
sample_val_t hpcrun_sample_callpath(void *context, int metricId, hpcrun_metricVal_t metricIncr, int skipInner, int isSync, sampling_info_t *data)
Definition: sample_event.c:160
static int get_blame_metric_id(void)
#define PTHREAD_BLOCKWAIT_METRIC
Definition: pthread-blame.h:57
struct bs_fn_entry_t * next
Definition: blame-shift.h:9
static int blockwait_metric_id
uint32_t amt
cct_node_t * node
Definition: cct.c:128
#define PTHREAD_EVENT_NAME
Definition: pthread-blame.h:54
static bs_fn_entry_t bs_entry
static bool lockwait_enabled
Definition: blame-shift.h:8
static int blame_metric_id
Definition: blame-map.c:109
static uint64_t get_blame_target(void)
metric_desc_t * hpcrun_set_metric_info_and_period(int metric_id, const char *name, MetricFlags_ValFmt_t valFmt, size_t period, metric_desc_properties_t prop)
Definition: metrics.c:411
void pthread_directed_blame_accept(void *obj)
void blame_shift_register(bs_fn_entry_t *entry)
Definition: blame-shift.c:8
void pthread_directed_blame_shift_end(void)
static char * state2str(state_t s)
uint64_t blame_map_get_blame(blame_entry_t table[], uint64_t obj)
Definition: blame-map.c:233
void * arg
Definition: blame-shift.h:11
int trace
Definition: Trace.cpp:57
#define EMSG
Definition: messages.h:70
bool pthread_blame_lockwait_enabled(void)
void blame_map_add_blame(blame_entry_t table[], uint64_t obj, uint32_t metric_value)
Definition: blame-map.c:177
int lush_metrics
Definition: main.c:188
uint64_t target
struct dbg_t dbg_t
uint64_t period
Definition: hpcrun-fmt.h:374
bs_fn_t fn
Definition: blame-shift.h:10
#define SKIP_ONE_FRAME
Definition: pthread-blame.c:89
static void process_directed_blame_for_sample(void *arg, int metric_id, cct_node_t *node, int metric_incr)
struct dbg_tr_t dbg_tr_t
int32_t hpcrun_cct_persistent_id(cct_node_t *x)
Definition: cct.c:363
#define PTHREAD_SPINWAIT_METRIC
Definition: pthread-blame.h:58
static bool metric_id_set
void pthread_directed_blame_shift_blocked_start(void *obj)
#define TMSG(f,...)
Definition: messages.h:93
uint64_t obj
#define NULL
Definition: ElfHelper.cpp:85
int metrics[MAX_EVENTS][MAX_METRICS]
Definition: generic.c:147
unsigned char uc
Definition: amd-xop.c:3
Definition: cct.c:96
int hpctoolkit_sampling_is_active(void)
Definition: start-stop.c:100
int hpcrun_new_metric(void)
Definition: metrics.c:333
#define PTHREAD_BLAME_METRIC
Definition: pthread-blame.h:56
blame_entry_t * blame_map_new(void)
Definition: blame-map.c:158
metric_desc_properties_t properties
Definition: hpcrun-fmt.h:376
state_t state
static uint64_t get_blame(uint64_t obj)
unsigned n_elts
void hpcrun_metric_std_inc(int metric_id, metric_set_t *set, hpcrun_metricVal_t incr)
Definition: metrics.c:534
static int spinwait_metric_id
static void METHOD_FN(init)
static int hpcrun_safe_enter(void)
static blame_entry_t * pthread_blame_table
static void add_blame(uint64_t obj, uint32_t value)
metric_desc_t * hpcrun_id2metric(int metric_id)
Definition: metrics.c:251
#define metric_property_none
Definition: hpcrun-fmt.h:202