58 #include <hpcrun/unresolved.h> 60 #include "ompt-callstack.h" 61 #include "ompt-interface.h" 62 #include "ompt-state-placeholders.h" 63 #include "ompt-defer.h" 64 #include "ompt-region.h" 65 #include "ompt-task-map.h" 67 #if defined(HOST_CPU_PPC) 68 #include "ppc64-gnu-omp.h" 69 #elif defined(HOST_CPU_x86) || defined(HOST_CPU_x86_64) 70 #include "x86-gnu-omp.h" 72 #error "invalid architecture type" 83 #define elide_debug_dump(t,i,o,r) if (ompt_callstack_debug) stack_dump(t,i,o,r) 85 #define elide_debug_dump(t,i,o,r) 112 EMSG(
"-----%s start", tag);
113 for (
frame_t* x = inner; x <= outer; ++x) {
118 const char* lm_name = (lm) ? lm->
name :
"(null)";
120 EMSG(
"ip = %p (%p), load module = %s", ip, x->ip_norm.lm_ip, lm_name);
122 EMSG(
"-----%s end", tag);
123 EMSG(
"<0x%lx>\n", region_id);
134 uint64_t uaddr = (uint64_t) addr;
135 uint64_t ulower = (uint64_t) lower;
136 uint64_t uupper = (uint64_t) upper;
138 return ((ulower <= uaddr) & (uaddr <= uupper));
146 return hpcrun_ompt_get_state(&wait_id);
179 case ompt_state_wait_barrier:
180 case ompt_state_wait_barrier_implicit:
181 case ompt_state_wait_barrier_explicit:
185 case ompt_state_idle:
196 frame_t *bt_outer_at_entry = *bt_outer;
202 ompt_frame_t *frame0 = hpcrun_ompt_get_task_frame(i);
204 TD_GET(omp_task_context) = 0;
216 goto clip_base_frames;
219 while ((frame0->reenter_runtime_frame == 0) && (frame0->exit_runtime_frame == 0)) {
222 frame0 = hpcrun_ompt_get_task_frame(++i);
226 goto clip_base_frames;
230 if (frame0->exit_runtime_frame &&
231 (((uint64_t) frame0->exit_runtime_frame) < ((uint64_t) (*bt_inner)->cursor.sp))) {
235 frame0 = hpcrun_ompt_get_task_frame(++i);
240 goto clip_base_frames;
243 if (frame0->reenter_runtime_frame) {
247 for (it = *bt_inner; it <= *bt_outer; it++) {
248 if ((uint64_t)(it->
cursor.
sp) > (uint64_t)frame0->reenter_runtime_frame) {
260 goto clip_base_frames;
268 ompt_frame_t *frame1;
270 frame0 = hpcrun_ompt_get_task_frame(i);
274 ompt_task_id_t
tid = hpcrun_ompt_get_task_id(i);
275 cct_node_t *omp_task_context = task_map_lookup(tid);
277 void *low_sp = (*bt_inner)->cursor.sp;
278 void *high_sp = (*bt_outer)->cursor.sp;
294 for (; it <= *bt_outer; it++) {
295 if((uint64_t)(it->
cursor.
sp) > (uint64_t)(frame0->exit_runtime_frame)) {
302 if (exit0_flag && omp_task_context) {
303 TD_GET(omp_task_context) = omp_task_context;
304 *bt_outer = exit0 - 1;
308 frame1 = hpcrun_ompt_get_task_frame(++i);
314 for (; it <= *bt_outer; it++) {
315 if((uint64_t)(it->
cursor.
sp) > (uint64_t)(frame1->reenter_runtime_frame)) {
322 if (exit0 && reenter1) {
329 memmove(*bt_inner+(reenter1-exit0+1), *bt_inner,
330 (exit0 - *bt_inner)*
sizeof(
frame_t));
331 *bt_inner = *bt_inner + (reenter1 - exit0 + 1);
334 memmove(*bt_inner+(reenter1-exit0), *bt_inner,
335 (exit0 - *bt_inner)*
sizeof(
frame_t));
336 *bt_inner = *bt_inner + (reenter1 - exit0);
338 exit0 = reenter1 =
NULL;
339 }
else if (exit0 && !reenter1) {
342 *bt_outer = exit0 - 1;
347 if (*bt_outer != bt_outer_at_entry) {
352 bt->
trace_pc = (*bt_inner)->cursor.pc_unnorm;
359 int master =
TD_GET(master);
361 set_frame(*bt_outer, &ompt_placeholders.ompt_idle);
362 *bt_inner = *bt_outer;
365 bt->
trace_pc = (*bt_inner)->cursor.pc_unnorm;
370 uint64_t idle_frame = (uint64_t) hpcrun_ompt_get_idle_frame();
374 for (it = *bt_inner; it <= *bt_outer; it++) {
375 if ((uint64_t)(it->
cursor.
sp) >= idle_frame) {
386 elide_debug_dump(
"ELIDED INNERMOST FRAMES", *bt_inner, *bt_outer, region_id);
395 return (td->outer_region_id == region_id && td->outer_region_context) ?
396 td->outer_region_context :
403 td->outer_region_id = region_id;
404 td->outer_region_context = result;
415 if (IS_UNRESOLVED_ROOT(addr)) {
418 }
else if (IS_PARTIAL_ROOT(addr)) {
436 if (result)
return result;
438 cct_node_t *t0_path = hpcrun_region_lookup(region_id);
452 ompt_context_type_t ctype,
461 TMSG(DEFER_CTXT,
"unwind the callstack for region 0x%lx", region_id);
463 if (node && adjust_callsite) {
470 uintptr_t master_outlined_fn_return_addr;
474 if (ctype == ompt_context_begin) {
475 void *ip = hpcrun_denormalize_ip(&(n->
ip_norm));
476 uint64_t offset = offset_to_pc_after_next_call(ip);
477 master_outlined_fn_return_addr = lm_ip + offset;
479 uint64_t offset = length_of_call_instruction();
480 master_outlined_fn_return_addr = lm_ip - offset;
486 (n_parent, &(
ADDR2(lm_id, master_outlined_fn_return_addr)));
499 ++levels_to_skip, adjust_callsite);
512 int master =
TD_GET(master);
514 if (need_defer_cntxt()) {
518 uint64_t region_id =
TD_GET(region_id);
533 if (omp_task_context) {
538 if((is_partial_resolve((
cct_node_t *)omp_task_context) > 0)) {
550 uint64_t region_id =
TD_GET(region_id);
565 &(
ADDR2(UNRESOLVED, region_id)));
566 if (prefix) cct_cursor =
prefix;
580 ompt_finalizer.
next = 0;
static ompt_state_t check_state()
cct_node_t * region_root(cct_node_t *_node)
void cct_backtrace_finalize_register(cct_backtrace_finalize_entry_t *e)
static int ompt_callstack_debug
static int interval_contains(void *lower, void *upper, void *addr)
sample_val_t hpcrun_sample_callpath(void *context, int metricId, hpcrun_metricVal_t metricIncr, int skipInner, int isSync, sampling_info_t *data)
void cct_cursor_finalize_register(cct_cursor_finalize_fn fn)
cct_node_t * ompt_cct_cursor_finalize(cct_bundle_t *cct, backtrace_info_t *bt, cct_node_t *cct_cursor)
ip_normalized_t the_function
static int ompt_eager_context
cct_node_t * ompt_parallel_begin_context(ompt_parallel_id_t region_id, int levels_to_skip, int adjust_callsite)
static void stack_dump(char *tag, frame_t *inner, frame_t *outer, uint64_t region_id)
cct_node_t * ompt_region_context(uint64_t region_id, ompt_context_type_t ctype, int levels_to_skip, int adjust_callsite)
#define hpcrun_get_thread_epoch()
cct_node_t * hpcrun_cct_parent(cct_node_t *x)
cct_node_t * hpcrun_cct_insert_addr(cct_node_t *node, cct_addr_t *frm)
static void ompt_elide_runtime_frame(backtrace_info_t *bt, uint64_t region_id, int isSync)
void ompt_callstack_register_handlers(void)
int hpcrun_trace_isactive()
static cct_backtrace_finalize_entry_t ompt_finalizer
load_module_t * hpcrun_loadmap_findById(uint16_t id)
#define elide_debug_dump(t, i, o, r)
hpcrun_unw_cursor_t cursor
static cct_node_t * lookup_region_id(uint64_t region_id)
static void ompt_backtrace_finalize(backtrace_info_t *bt, int isSync)
cct_backtrace_finalize_fn fn
static cct_node_t * memoized_context_get(thread_data_t *td, uint64_t region_id)
cct_node_t * hpcrun_cct_insert_path_return_leaf(cct_node_t *path, cct_node_t *root)
static void set_frame(frame_t *f, ompt_placeholder_t *ph)
static void collapse_callstack(backtrace_info_t *bt, ompt_placeholder_t *placeholder)
<!-- ********************************************************************--> n<!-- HPCToolkit Experiment DTD --> n<!-- Version 2.1 --> n<!-- ********************************************************************--> n<!ELEMENT HPCToolkitExperiment(Header,(SecCallPathProfile|SecFlatProfile) *)> n<!ATTLIST HPCToolkitExperiment\n version CDATA #REQUIRED > n n<!-- ******************************************************************--> n n<!-- Info/NV:flexible name-value pairs:(n) ame;(t) ype;(v) alue --> n<!ELEMENT Info(NV *)> n<!ATTLIST Info\n n CDATA #IMPLIED > n<!ELEMENT NV EMPTY > n<!ATTLIST NV\n n CDATA #REQUIRED\n t CDATA #IMPLIED\n v CDATA #REQUIRED > n n<!-- ******************************************************************--> n<!-- Header --> n<!-- ******************************************************************--> n<!ELEMENT Header(Info *)> n<!ATTLIST Header\n n CDATA #REQUIRED > n n<!-- ******************************************************************--> n<!-- Section Header --> n<!-- ******************************************************************--> n<!ELEMENT SecHeader(MetricTable?, MetricDBTable?, TraceDBTable?, LoadModuleTable?, FileTable?, ProcedureTable?, Info *)> n n<!-- MetricTable:--> n<!ELEMENT MetricTable(Metric) * > n n<!-- Metric:(i) d;(n) ame --> n<!--(v) alue-type:transient type of values --> n<!--(t) ype:persistent type of metric --> n<!-- fmt:format;show;--> n<!ELEMENT Metric(MetricFormula *, Info?)> n<!ATTLIST Metric\n i CDATA #REQUIRED\n n CDATA #REQUIRED\n es CDATA #IMPLIED\n em CDATA #IMPLIED\n ep CDATA #IMPLIED\n v(raw|final|derived-incr|derived) \"raw\\ t (inclusive|exclusive|nil) \nil\\ partner CDATA #IMPLIED\ fmt CDATA #IMPLIED\ show (1|0) \1\\ show-percent (1|0) \1> n n<!-- MetricFormula represents derived metrics: (t)ype; (frm): formula --> n<!ELEMENT MetricFormula (Info?)> n<!ATTLIST MetricFormula\ t (combine|finalize) \finalize\\ i CDATA #IMPLIED\ frm CDATA #REQUIRED> n n<!-- Metric data, used in sections: (n)ame [from Metric]; (v)alue --> n<!ELEMENT M EMPTY> n<!ATTLIST M\ n CDATA #REQUIRED\ v CDATA #REQUIRED> n n<!-- MetricDBTable: --> n<!ELEMENT MetricDBTable (MetricDB)*> n n<!-- MetricDB: (i)d; (n)ame --> n<!-- (t)ype: persistent type of metric --> n<!-- db-glob: file glob describing files in metric db --> n<!-- db-id: id within metric db --> n<!-- db-num-metrics: number of metrics in db --> n<!-- db-header-sz: size (in bytes) of a db file header --> n<!ELEMENT MetricDB EMPTY> n<!ATTLIST MetricDB\ i CDATA #REQUIRED\ n CDATA #REQUIRED\ t (inclusive|exclusive|nil) \nil\\ partner CDATA #IMPLIED\ db-glob CDATA #IMPLIED\ db-id CDATA #IMPLIED\ db-num-metrics CDATA #IMPLIED\ db-header-sz CDATA #IMPLIED> n n<!-- TraceDBTable: --> n<!ELEMENT TraceDBTable (TraceDB)> n n<!-- TraceDB: (i)d --> n<!-- db-min-time: min beginning time stamp (global) --> n<!-- db-max-time: max ending time stamp (global) --> n<!ELEMENT TraceDB EMPTY> n<!ATTLIST TraceDB\ i CDATA #REQUIRED\ db-glob CDATA #IMPLIED\ db-min-time CDATA #IMPLIED\ db-max-time CDATA #IMPLIED\ db-header-sz CDATA #IMPLIED> n n<!-- LoadModuleTable assigns a short name to a load module --> n<!ELEMENT LoadModuleTable (LoadModule)*> n n<!ELEMENT LoadModule (Info?)> n<!ATTLIST LoadModule\ i CDATA #REQUIRED\ n CDATA #REQUIRED> n n<!-- FileTable assigns a short name to a file --> n<!ELEMENT FileTable (File)*> n n<!ELEMENT File (Info?)> n<!ATTLIST File\ i CDATA #REQUIRED\ n CDATA #REQUIRED> n n<!-- ProcedureTable assigns a short name to a procedure --> n<!ELEMENT ProcedureTable (Procedure)*> n n<!-- Info/NV: flexible name-value pairs: (n)ame; (t)ype; (v)alue --> n<!-- f: family of the procedure (fake, root, ...)--> n<!ELEMENT Procedure (Info?)> n<!ATTLIST Procedure\ i CDATA #REQUIRED\ n CDATA #REQUIRED\ f CDATA #IMPLIED> n n<!-- ****************************************************************** --> n<!-- Section: Call path profile --> n<!-- ****************************************************************** --> n<!ELEMENT SecCallPathProfile (SecHeader, SecCallPathProfileData)> n<!ATTLIST SecCallPathProfile\ i CDATA #REQUIRED\ n CDATA #REQUIRED> n n<!ELEMENT SecCallPathProfileData (PF|M)*> n<!-- Procedure frame --> n<!-- (i)d: unique identifier for cross referencing --> n<!-- (s)tatic scope id --> n<!-- (n)ame: a string or an id in ProcedureTable --> n<!-- (lm) load module: a string or an id in LoadModuleTable --> n<!-- (f)ile name: a string or an id in LoadModuleTable --> n<!-- (l)ine range: \beg-end\ (inclusive range) --> n<!-- (a)lien: whether frame is alien to enclosing P --> n<!-- (str)uct: hpcstruct node id --> n<!-- (t)ype: hpcrun node type: memory access, variable declaration, ... --> n<!-- (v)ma-range-set: \{[beg-end), [beg-end)...}\ --> n<!ELEMENT PF (PF|Pr|L|C|S|M)*> n<!ATTLIST PF\ i CDATA #IMPLIED\ s CDATA #IMPLIED\ n CDATA #REQUIRED\ lm CDATA #IMPLIED\ f CDATA #IMPLIED\ l CDATA #IMPLIED\ str CDATA #IMPLIED\ v CDATA #IMPLIED> n<!-- Procedure (static): GOAL: replace with 'P' --> n<!ELEMENT Pr (Pr|L|C|S|M)*> n<!ATTLIST Pr\ i CDATA #IMPLIED\ s CDATA #IMPLIED\ n CDATA #REQUIRED\ lm CDATA #IMPLIED\ f CDATA #IMPLIED\ l CDATA #IMPLIED\ a (1|0) \0\\ str CDATA #IMPLIED\ v CDATA #IMPLIED> n<!-- Callsite (a special StatementRange) --> n<!ELEMENT C (PF|M)*> n<!ATTLIST C\ i CDATA #IMPLIED\ s CDATA #IMPLIED\ l CDATA #IMPLIED\ str CDATA #IMPLIED\ v CDATA #IMPLIED> n n<!-- ****************************************************************** --> n<!-- Section: Flat profile --> n<!-- ****************************************************************** --> n<!ELEMENT SecFlatProfile (SecHeader, SecFlatProfileData)> n<!ATTLIST SecFlatProfile\ i CDATA #REQUIRED\ n CDATA #REQUIRED> n n<!ELEMENT SecFlatProfileData (LM|M)*> n<!-- Load module: (i)d; (n)ame; (v)ma-range-set --> n<!ELEMENT LM (F|P|M)*> n<!ATTLIST LM\ i CDATA #IMPLIED\ n CDATA #REQUIRED\ v CDATA #IMPLIED> n<!-- File --> n<!ELEMENT F (P|L|S|M)*> n<!ATTLIST F\ i CDATA #IMPLIED\ n CDATA #REQUIRED> n<!-- Procedure (Note 1) --> n<!ELEMENT P (P|A|L|S|C|M)*> n<!ATTLIST P\ i CDATA #IMPLIED\ n CDATA #REQUIRED\ l CDATA #IMPLIED\ str CDATA #IMPLIED\ v CDATA #IMPLIED> n<!-- Alien (Note 1) --> n<!ELEMENT A (A|L|S|C|M)*> n<!ATTLIST A\ i CDATA #IMPLIED\ f CDATA #IMPLIED\ n CDATA #IMPLIED\ l CDATA #IMPLIED\ str CDATA #IMPLIED\ v CDATA #IMPLIED> n<!-- Loop (Note 1,2) --> n<!ELEMENT L (A|Pr|L|S|C|M)*> n<!ATTLIST L\ i CDATA #IMPLIED\ s CDATA #IMPLIED\ l CDATA #IMPLIED\ f CDATA #IMPLIED\ str CDATA #IMPLIED\ v CDATA #IMPLIED> n<!-- Statement (Note 2) --> n<!-- (it): trace record identifier --> n<!ELEMENT S (S|M)*> n<!ATTLIST S\ i CDATA #IMPLIED\ it CDATA #IMPLIED\ s CDATA #IMPLIED\ l CDATA #IMPLIED\ str CDATA #IMPLIED\ v CDATA #IMPLIED> n<!-- Note 1: Contained Cs may not contain PFs --> n<!-- Note 2: The 's' attribute is not used for flat profiles --> n
int hpcrun_unw_get_ip_unnorm_reg(hpcrun_unw_cursor_t *c, void **reg_value)
cct_node_t * hpcrun_cct_find_addr(cct_node_t *cct, cct_addr_t *addr)
static void memoized_context_set(thread_data_t *td, uint64_t region_id, cct_node_t *result)
struct cct_backtrace_finalize_entry_s * next
thread_data_t *(* hpcrun_get_thread_data)(void)
cct_addr_t * hpcrun_cct_addr(cct_node_t *node)