Linux Perf
intel-pt.c
Go to the documentation of this file.
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <stdbool.h>
19 #include <errno.h>
20 #include <linux/kernel.h>
21 #include <linux/types.h>
22 
23 #include "../perf.h"
24 #include "session.h"
25 #include "machine.h"
26 #include "memswap.h"
27 #include "sort.h"
28 #include "tool.h"
29 #include "event.h"
30 #include "evlist.h"
31 #include "evsel.h"
32 #include "map.h"
33 #include "color.h"
34 #include "util.h"
35 #include "thread.h"
36 #include "thread-stack.h"
37 #include "symbol.h"
38 #include "callchain.h"
39 #include "dso.h"
40 #include "debug.h"
41 #include "auxtrace.h"
42 #include "tsc.h"
43 #include "intel-pt.h"
44 #include "config.h"
45 
50 
51 #define MAX_TIMESTAMP (~0ULL)
52 
53 struct intel_pt {
59  struct machine *machine;
66  bool have_tsc;
68  bool est_tsc;
72  u32 pmu_type;
74  u64 switch_ip;
75  u64 ptss_ip;
76 
79 
81 
85 
90 
94 
98 
101  u64 mwait_id;
102  u64 pwre_id;
104  u64 pwrx_id;
105  u64 cbr_id;
106 
107  u64 tsc_bit;
108  u64 mtc_bit;
112  u64 cyc_bit;
115  unsigned cbr2khz;
116 
117  unsigned long num_events;
118 
119  char *filter;
121 };
122 
129 };
130 
132  struct intel_pt *pt;
133  unsigned int queue_nr;
136  void *decoder;
137  const struct intel_pt_state *state;
143  bool on_heap;
144  bool stop;
148  pid_t pid, tid;
149  int cpu;
151  pid_t next_tid;
152  struct thread *thread;
155  u64 time;
157  u32 flags;
158  u16 insn_len;
161 };
162 
163 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
164  unsigned char *buf, size_t len)
165 {
166  struct intel_pt_pkt packet;
167  size_t pos = 0;
168  int ret, pkt_len, i;
170  const char *color = PERF_COLOR_BLUE;
171 
172  color_fprintf(stdout, color,
173  ". ... Intel Processor Trace data: size %zu bytes\n",
174  len);
175 
176  while (len) {
177  ret = intel_pt_get_packet(buf, len, &packet);
178  if (ret > 0)
179  pkt_len = ret;
180  else
181  pkt_len = 1;
182  printf(".");
183  color_fprintf(stdout, color, " %08x: ", pos);
184  for (i = 0; i < pkt_len; i++)
185  color_fprintf(stdout, color, " %02x", buf[i]);
186  for (; i < 16; i++)
187  color_fprintf(stdout, color, " ");
188  if (ret > 0) {
189  ret = intel_pt_pkt_desc(&packet, desc,
191  if (ret > 0)
192  color_fprintf(stdout, color, " %s\n", desc);
193  } else {
194  color_fprintf(stdout, color, " Bad packet!\n");
195  }
196  pos += pkt_len;
197  buf += pkt_len;
198  len -= pkt_len;
199  }
200 }
201 
202 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
203  size_t len)
204 {
205  printf(".\n");
206  intel_pt_dump(pt, buf, len);
207 }
208 
209 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
210  struct auxtrace_buffer *b)
211 {
212  bool consecutive = false;
213  void *start;
214 
215  start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
216  pt->have_tsc, &consecutive);
217  if (!start)
218  return -EINVAL;
219  b->use_size = b->data + b->size - start;
220  b->use_data = start;
221  if (b->use_size && consecutive)
222  b->consecutive = true;
223  return 0;
224 }
225 
226 /* This function assumes data is processed sequentially only */
227 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
228 {
229  struct intel_pt_queue *ptq = data;
230  struct auxtrace_buffer *buffer = ptq->buffer;
231  struct auxtrace_buffer *old_buffer = ptq->old_buffer;
232  struct auxtrace_queue *queue;
233  bool might_overlap;
234 
235  if (ptq->stop) {
236  b->len = 0;
237  return 0;
238  }
239 
240  queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
241 
242  buffer = auxtrace_buffer__next(queue, buffer);
243  if (!buffer) {
244  if (old_buffer)
245  auxtrace_buffer__drop_data(old_buffer);
246  b->len = 0;
247  return 0;
248  }
249 
250  ptq->buffer = buffer;
251 
252  if (!buffer->data) {
253  int fd = perf_data__fd(ptq->pt->session->data);
254 
255  buffer->data = auxtrace_buffer__get_data(buffer, fd);
256  if (!buffer->data)
257  return -ENOMEM;
258  }
259 
260  might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
261  if (might_overlap && !buffer->consecutive && old_buffer &&
262  intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
263  return -ENOMEM;
264 
265  if (buffer->use_data) {
266  b->len = buffer->use_size;
267  b->buf = buffer->use_data;
268  } else {
269  b->len = buffer->size;
270  b->buf = buffer->data;
271  }
272  b->ref_timestamp = buffer->reference;
273 
274  if (!old_buffer || (might_overlap && !buffer->consecutive)) {
275  b->consecutive = false;
276  b->trace_nr = buffer->buffer_nr + 1;
277  } else {
278  b->consecutive = true;
279  }
280 
281  if (ptq->step_through_buffers)
282  ptq->stop = true;
283 
284  if (b->len) {
285  if (old_buffer)
286  auxtrace_buffer__drop_data(old_buffer);
287  ptq->old_buffer = buffer;
288  } else {
290  return intel_pt_get_trace(b, data);
291  }
292 
293  return 0;
294 }
295 
297  struct auxtrace_cache_entry entry;
298  u64 insn_cnt;
299  u64 byte_cnt;
302  int length;
303  int32_t rel;
305 };
306 
307 static int intel_pt_config_div(const char *var, const char *value, void *data)
308 {
309  int *d = data;
310  long val;
311 
312  if (!strcmp(var, "intel-pt.cache-divisor")) {
313  val = strtol(value, NULL, 0);
314  if (val > 0 && val <= INT_MAX)
315  *d = val;
316  }
317 
318  return 0;
319 }
320 
321 static int intel_pt_cache_divisor(void)
322 {
323  static int d;
324 
325  if (d)
326  return d;
327 
329 
330  if (!d)
331  d = 64;
332 
333  return d;
334 }
335 
336 static unsigned int intel_pt_cache_size(struct dso *dso,
337  struct machine *machine)
338 {
339  off_t size;
340 
341  size = dso__data_size(dso, machine);
342  size /= intel_pt_cache_divisor();
343  if (size < 1000)
344  return 10;
345  if (size > (1 << 21))
346  return 21;
347  return 32 - __builtin_clz(size);
348 }
349 
350 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
351  struct machine *machine)
352 {
353  struct auxtrace_cache *c;
354  unsigned int bits;
355 
356  if (dso->auxtrace_cache)
357  return dso->auxtrace_cache;
358 
359  bits = intel_pt_cache_size(dso, machine);
360 
361  /* Ignoring cache creation failure */
362  c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
363 
364  dso->auxtrace_cache = c;
365 
366  return c;
367 }
368 
369 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
370  u64 offset, u64 insn_cnt, u64 byte_cnt,
372 {
373  struct auxtrace_cache *c = intel_pt_cache(dso, machine);
374  struct intel_pt_cache_entry *e;
375  int err;
376 
377  if (!c)
378  return -ENOMEM;
379 
381  if (!e)
382  return -ENOMEM;
383 
384  e->insn_cnt = insn_cnt;
385  e->byte_cnt = byte_cnt;
386  e->op = intel_pt_insn->op;
387  e->branch = intel_pt_insn->branch;
388  e->length = intel_pt_insn->length;
389  e->rel = intel_pt_insn->rel;
390  memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
391 
392  err = auxtrace_cache__add(c, offset, &e->entry);
393  if (err)
395 
396  return err;
397 }
398 
399 static struct intel_pt_cache_entry *
400 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
401 {
402  struct auxtrace_cache *c = intel_pt_cache(dso, machine);
403 
404  if (!c)
405  return NULL;
406 
407  return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
408 }
409 
411  uint64_t *insn_cnt_ptr, uint64_t *ip,
412  uint64_t to_ip, uint64_t max_insn_cnt,
413  void *data)
414 {
415  struct intel_pt_queue *ptq = data;
416  struct machine *machine = ptq->pt->machine;
417  struct thread *thread;
418  struct addr_location al;
419  unsigned char buf[INTEL_PT_INSN_BUF_SZ];
420  ssize_t len;
421  int x86_64;
422  u8 cpumode;
423  u64 offset, start_offset, start_ip;
424  u64 insn_cnt = 0;
425  bool one_map = true;
426 
427  intel_pt_insn->length = 0;
428 
429  if (to_ip && *ip == to_ip)
430  goto out_no_cache;
431 
432  if (*ip >= ptq->pt->kernel_start)
433  cpumode = PERF_RECORD_MISC_KERNEL;
434  else
435  cpumode = PERF_RECORD_MISC_USER;
436 
437  thread = ptq->thread;
438  if (!thread) {
439  if (cpumode != PERF_RECORD_MISC_KERNEL)
440  return -EINVAL;
441  thread = ptq->pt->unknown_thread;
442  }
443 
444  while (1) {
445  if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
446  return -EINVAL;
447 
448  if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
451  return -ENOENT;
452 
453  offset = al.map->map_ip(al.map, *ip);
454 
455  if (!to_ip && one_map) {
456  struct intel_pt_cache_entry *e;
457 
458  e = intel_pt_cache_lookup(al.map->dso, machine, offset);
459  if (e &&
460  (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
461  *insn_cnt_ptr = e->insn_cnt;
462  *ip += e->byte_cnt;
463  intel_pt_insn->op = e->op;
464  intel_pt_insn->branch = e->branch;
465  intel_pt_insn->length = e->length;
466  intel_pt_insn->rel = e->rel;
467  memcpy(intel_pt_insn->buf, e->insn,
469  intel_pt_log_insn_no_data(intel_pt_insn, *ip);
470  return 0;
471  }
472  }
473 
474  start_offset = offset;
475  start_ip = *ip;
476 
477  /* Load maps to ensure dso->is_64_bit has been updated */
478  map__load(al.map);
479 
480  x86_64 = al.map->dso->is_64_bit;
481 
482  while (1) {
483  len = dso__data_read_offset(al.map->dso, machine,
484  offset, buf,
486  if (len <= 0)
487  return -EINVAL;
488 
489  if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
490  return -EINVAL;
491 
492  intel_pt_log_insn(intel_pt_insn, *ip);
493 
494  insn_cnt += 1;
495 
496  if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
497  goto out;
498 
499  if (max_insn_cnt && insn_cnt >= max_insn_cnt)
500  goto out_no_cache;
501 
502  *ip += intel_pt_insn->length;
503 
504  if (to_ip && *ip == to_ip)
505  goto out_no_cache;
506 
507  if (*ip >= al.map->end)
508  break;
509 
510  offset += intel_pt_insn->length;
511  }
512  one_map = false;
513  }
514 out:
515  *insn_cnt_ptr = insn_cnt;
516 
517  if (!one_map)
518  goto out_no_cache;
519 
520  /*
521  * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
522  * entries.
523  */
524  if (to_ip) {
525  struct intel_pt_cache_entry *e;
526 
527  e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
528  if (e)
529  return 0;
530  }
531 
532  /* Ignore cache errors */
533  intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
534  *ip - start_ip, intel_pt_insn);
535 
536  return 0;
537 
538 out_no_cache:
539  *insn_cnt_ptr = insn_cnt;
540  return 0;
541 }
542 
543 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
544  uint64_t offset, const char *filename)
545 {
546  struct addr_filter *filt;
547  bool have_filter = false;
548  bool hit_tracestop = false;
549  bool hit_filter = false;
550 
551  list_for_each_entry(filt, &pt->filts.head, list) {
552  if (filt->start)
553  have_filter = true;
554 
555  if ((filename && !filt->filename) ||
556  (!filename && filt->filename) ||
557  (filename && strcmp(filename, filt->filename)))
558  continue;
559 
560  if (!(offset >= filt->addr && offset < filt->addr + filt->size))
561  continue;
562 
563  intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
564  ip, offset, filename ? filename : "[kernel]",
565  filt->start ? "filter" : "stop",
566  filt->addr, filt->size);
567 
568  if (filt->start)
569  hit_filter = true;
570  else
571  hit_tracestop = true;
572  }
573 
574  if (!hit_tracestop && !hit_filter)
575  intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
576  ip, offset, filename ? filename : "[kernel]");
577 
578  return hit_tracestop || (have_filter && !hit_filter);
579 }
580 
581 static int __intel_pt_pgd_ip(uint64_t ip, void *data)
582 {
583  struct intel_pt_queue *ptq = data;
584  struct thread *thread;
585  struct addr_location al;
586  u8 cpumode;
587  u64 offset;
588 
589  if (ip >= ptq->pt->kernel_start)
590  return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
591 
592  cpumode = PERF_RECORD_MISC_USER;
593 
594  thread = ptq->thread;
595  if (!thread)
596  return -EINVAL;
597 
598  if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
599  return -EINVAL;
600 
601  offset = al.map->map_ip(al.map, ip);
602 
603  return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
604  al.map->dso->long_name);
605 }
606 
607 static bool intel_pt_pgd_ip(uint64_t ip, void *data)
608 {
609  return __intel_pt_pgd_ip(ip, data) > 0;
610 }
611 
612 static bool intel_pt_get_config(struct intel_pt *pt,
613  struct perf_event_attr *attr, u64 *config)
614 {
615  if (attr->type == pt->pmu_type) {
616  if (config)
617  *config = attr->config;
618  return true;
619  }
620 
621  return false;
622 }
623 
624 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
625 {
626  struct perf_evsel *evsel;
627 
628  evlist__for_each_entry(pt->session->evlist, evsel) {
629  if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
630  !evsel->attr.exclude_kernel)
631  return false;
632  }
633  return true;
634 }
635 
636 static bool intel_pt_return_compression(struct intel_pt *pt)
637 {
638  struct perf_evsel *evsel;
639  u64 config;
640 
641  if (!pt->noretcomp_bit)
642  return true;
643 
644  evlist__for_each_entry(pt->session->evlist, evsel) {
645  if (intel_pt_get_config(pt, &evsel->attr, &config) &&
646  (config & pt->noretcomp_bit))
647  return false;
648  }
649  return true;
650 }
651 
652 static bool intel_pt_branch_enable(struct intel_pt *pt)
653 {
654  struct perf_evsel *evsel;
655  u64 config;
656 
657  evlist__for_each_entry(pt->session->evlist, evsel) {
658  if (intel_pt_get_config(pt, &evsel->attr, &config) &&
659  (config & 1) && !(config & 0x2000))
660  return false;
661  }
662  return true;
663 }
664 
665 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
666 {
667  struct perf_evsel *evsel;
668  unsigned int shift;
669  u64 config;
670 
671  if (!pt->mtc_freq_bits)
672  return 0;
673 
674  for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
675  config >>= 1;
676 
677  evlist__for_each_entry(pt->session->evlist, evsel) {
678  if (intel_pt_get_config(pt, &evsel->attr, &config))
679  return (config & pt->mtc_freq_bits) >> shift;
680  }
681  return 0;
682 }
683 
684 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
685 {
686  struct perf_evsel *evsel;
687  bool timeless_decoding = true;
688  u64 config;
689 
690  if (!pt->tsc_bit || !pt->cap_user_time_zero)
691  return true;
692 
693  evlist__for_each_entry(pt->session->evlist, evsel) {
694  if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
695  return true;
696  if (intel_pt_get_config(pt, &evsel->attr, &config)) {
697  if (config & pt->tsc_bit)
698  timeless_decoding = false;
699  else
700  return true;
701  }
702  }
703  return timeless_decoding;
704 }
705 
706 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
707 {
708  struct perf_evsel *evsel;
709 
710  evlist__for_each_entry(pt->session->evlist, evsel) {
711  if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
712  !evsel->attr.exclude_kernel)
713  return true;
714  }
715  return false;
716 }
717 
718 static bool intel_pt_have_tsc(struct intel_pt *pt)
719 {
720  struct perf_evsel *evsel;
721  bool have_tsc = false;
722  u64 config;
723 
724  if (!pt->tsc_bit)
725  return false;
726 
727  evlist__for_each_entry(pt->session->evlist, evsel) {
728  if (intel_pt_get_config(pt, &evsel->attr, &config)) {
729  if (config & pt->tsc_bit)
730  have_tsc = true;
731  else
732  return false;
733  }
734  }
735  return have_tsc;
736 }
737 
738 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
739 {
740  u64 quot, rem;
741 
742  quot = ns / pt->tc.time_mult;
743  rem = ns % pt->tc.time_mult;
744  return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
745  pt->tc.time_mult;
746 }
747 
749  unsigned int queue_nr)
750 {
751  struct intel_pt_params params = { .get_trace = 0, };
752  struct perf_env *env = pt->machine->env;
753  struct intel_pt_queue *ptq;
754 
755  ptq = zalloc(sizeof(struct intel_pt_queue));
756  if (!ptq)
757  return NULL;
758 
759  if (pt->synth_opts.callchain) {
760  size_t sz = sizeof(struct ip_callchain);
761 
762  sz += pt->synth_opts.callchain_sz * sizeof(u64);
763  ptq->chain = zalloc(sz);
764  if (!ptq->chain)
765  goto out_free;
766  }
767 
768  if (pt->synth_opts.last_branch) {
769  size_t sz = sizeof(struct branch_stack);
770 
771  sz += pt->synth_opts.last_branch_sz *
772  sizeof(struct branch_entry);
773  ptq->last_branch = zalloc(sz);
774  if (!ptq->last_branch)
775  goto out_free;
776  ptq->last_branch_rb = zalloc(sz);
777  if (!ptq->last_branch_rb)
778  goto out_free;
779  }
780 
782  if (!ptq->event_buf)
783  goto out_free;
784 
785  ptq->pt = pt;
786  ptq->queue_nr = queue_nr;
788  ptq->pid = -1;
789  ptq->tid = -1;
790  ptq->cpu = -1;
791  ptq->next_tid = -1;
792 
793  params.get_trace = intel_pt_get_trace;
795  params.data = ptq;
799  params.mtc_period = intel_pt_mtc_period(pt);
800  params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
801  params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
802 
803  if (pt->filts.cnt > 0)
804  params.pgd_ip = intel_pt_pgd_ip;
805 
806  if (pt->synth_opts.instructions) {
807  if (pt->synth_opts.period) {
808  switch (pt->synth_opts.period_type) {
810  params.period_type =
812  params.period = pt->synth_opts.period;
813  break;
816  params.period = pt->synth_opts.period;
817  break;
820  params.period = intel_pt_ns_to_ticks(pt,
821  pt->synth_opts.period);
822  break;
823  default:
824  break;
825  }
826  }
827 
828  if (!params.period) {
830  params.period = 1;
831  }
832  }
833 
834  if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
835  params.flags |= INTEL_PT_FUP_WITH_NLIP;
836 
837  ptq->decoder = intel_pt_decoder_new(&params);
838  if (!ptq->decoder)
839  goto out_free;
840 
841  return ptq;
842 
843 out_free:
844  zfree(&ptq->event_buf);
845  zfree(&ptq->last_branch);
846  zfree(&ptq->last_branch_rb);
847  zfree(&ptq->chain);
848  free(ptq);
849  return NULL;
850 }
851 
852 static void intel_pt_free_queue(void *priv)
853 {
854  struct intel_pt_queue *ptq = priv;
855 
856  if (!ptq)
857  return;
858  thread__zput(ptq->thread);
860  zfree(&ptq->event_buf);
861  zfree(&ptq->last_branch);
862  zfree(&ptq->last_branch_rb);
863  zfree(&ptq->chain);
864  free(ptq);
865 }
866 
868  struct auxtrace_queue *queue)
869 {
870  struct intel_pt_queue *ptq = queue->priv;
871 
872  if (queue->tid == -1 || pt->have_sched_switch) {
873  ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
874  thread__zput(ptq->thread);
875  }
876 
877  if (!ptq->thread && ptq->tid != -1)
878  ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
879 
880  if (ptq->thread) {
881  ptq->pid = ptq->thread->pid_;
882  if (queue->cpu == -1)
883  ptq->cpu = ptq->thread->cpu;
884  }
885 }
886 
887 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
888 {
889  if (ptq->state->flags & INTEL_PT_ABORT_TX) {
891  } else if (ptq->state->flags & INTEL_PT_ASYNC) {
892  if (ptq->state->to_ip)
896  else
897  ptq->flags = PERF_IP_FLAG_BRANCH |
899  ptq->insn_len = 0;
900  } else {
901  if (ptq->state->from_ip)
902  ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
903  else
904  ptq->flags = PERF_IP_FLAG_BRANCH |
906  if (ptq->state->flags & INTEL_PT_IN_TX)
907  ptq->flags |= PERF_IP_FLAG_IN_TX;
908  ptq->insn_len = ptq->state->insn_len;
909  memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
910  }
911 }
912 
913 static int intel_pt_setup_queue(struct intel_pt *pt,
914  struct auxtrace_queue *queue,
915  unsigned int queue_nr)
916 {
917  struct intel_pt_queue *ptq = queue->priv;
918 
919  if (list_empty(&queue->head))
920  return 0;
921 
922  if (!ptq) {
923  ptq = intel_pt_alloc_queue(pt, queue_nr);
924  if (!ptq)
925  return -ENOMEM;
926  queue->priv = ptq;
927 
928  if (queue->cpu != -1)
929  ptq->cpu = queue->cpu;
930  ptq->tid = queue->tid;
931 
932  if (pt->sampling_mode && !pt->snapshot_mode &&
933  pt->timeless_decoding)
934  ptq->step_through_buffers = true;
935 
936  ptq->sync_switch = pt->sync_switch;
937  }
938 
939  if (!ptq->on_heap &&
940  (!ptq->sync_switch ||
942  const struct intel_pt_state *state;
943  int ret;
944 
945  if (pt->timeless_decoding)
946  return 0;
947 
948  intel_pt_log("queue %u getting timestamp\n", queue_nr);
949  intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
950  queue_nr, ptq->cpu, ptq->pid, ptq->tid);
951  while (1) {
952  state = intel_pt_decode(ptq->decoder);
953  if (state->err) {
954  if (state->err == INTEL_PT_ERR_NODATA) {
955  intel_pt_log("queue %u has no timestamp\n",
956  queue_nr);
957  return 0;
958  }
959  continue;
960  }
961  if (state->timestamp)
962  break;
963  }
964 
965  ptq->timestamp = state->timestamp;
966  intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
967  queue_nr, ptq->timestamp);
968  ptq->state = state;
969  ptq->have_sample = true;
971  ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
972  if (ret)
973  return ret;
974  ptq->on_heap = true;
975  }
976 
977  return 0;
978 }
979 
980 static int intel_pt_setup_queues(struct intel_pt *pt)
981 {
982  unsigned int i;
983  int ret;
984 
985  for (i = 0; i < pt->queues.nr_queues; i++) {
986  ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
987  if (ret)
988  return ret;
989  }
990  return 0;
991 }
992 
993 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
994 {
995  struct branch_stack *bs_src = ptq->last_branch_rb;
996  struct branch_stack *bs_dst = ptq->last_branch;
997  size_t nr = 0;
998 
999  bs_dst->nr = bs_src->nr;
1000 
1001  if (!bs_src->nr)
1002  return;
1003 
1004  nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1005  memcpy(&bs_dst->entries[0],
1006  &bs_src->entries[ptq->last_branch_pos],
1007  sizeof(struct branch_entry) * nr);
1008 
1009  if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1010  memcpy(&bs_dst->entries[nr],
1011  &bs_src->entries[0],
1012  sizeof(struct branch_entry) * ptq->last_branch_pos);
1013  }
1014 }
1015 
1016 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1017 {
1018  ptq->last_branch_pos = 0;
1019  ptq->last_branch_rb->nr = 0;
1020 }
1021 
1023 {
1024  const struct intel_pt_state *state = ptq->state;
1025  struct branch_stack *bs = ptq->last_branch_rb;
1026  struct branch_entry *be;
1027 
1028  if (!ptq->last_branch_pos)
1030 
1031  ptq->last_branch_pos -= 1;
1032 
1033  be = &bs->entries[ptq->last_branch_pos];
1034  be->from = state->from_ip;
1035  be->to = state->to_ip;
1036  be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1037  be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1038  /* No support for mispredict */
1039  be->flags.mispred = ptq->pt->mispred_all;
1040 
1041  if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1042  bs->nr += 1;
1043 }
1044 
1045 static inline bool intel_pt_skip_event(struct intel_pt *pt)
1046 {
1047  return pt->synth_opts.initial_skip &&
1048  pt->num_events++ < pt->synth_opts.initial_skip;
1049 }
1050 
1051 static void intel_pt_prep_b_sample(struct intel_pt *pt,
1052  struct intel_pt_queue *ptq,
1053  union perf_event *event,
1054  struct perf_sample *sample)
1055 {
1056  event->sample.header.type = PERF_RECORD_SAMPLE;
1057  event->sample.header.misc = PERF_RECORD_MISC_USER;
1058  event->sample.header.size = sizeof(struct perf_event_header);
1059 
1060  if (!pt->timeless_decoding)
1061  sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1062 
1063  sample->cpumode = PERF_RECORD_MISC_USER;
1064  sample->ip = ptq->state->from_ip;
1065  sample->pid = ptq->pid;
1066  sample->tid = ptq->tid;
1067  sample->addr = ptq->state->to_ip;
1068  sample->period = 1;
1069  sample->cpu = ptq->cpu;
1070  sample->flags = ptq->flags;
1071  sample->insn_len = ptq->insn_len;
1072  memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1073 }
1074 
1076  struct perf_sample *sample, u64 type)
1077 {
1078  event->header.size = perf_event__sample_event_size(sample, type, 0);
1079  return perf_event__synthesize_sample(event, type, 0, sample);
1080 }
1081 
1082 static inline int intel_pt_opt_inject(struct intel_pt *pt,
1083  union perf_event *event,
1084  struct perf_sample *sample, u64 type)
1085 {
1086  if (!pt->synth_opts.inject)
1087  return 0;
1088 
1089  return intel_pt_inject_event(event, sample, type);
1090 }
1091 
1093  union perf_event *event,
1094  struct perf_sample *sample, u64 type)
1095 {
1096  int ret;
1097 
1098  ret = intel_pt_opt_inject(pt, event, sample, type);
1099  if (ret)
1100  return ret;
1101 
1102  ret = perf_session__deliver_synth_event(pt->session, event, sample);
1103  if (ret)
1104  pr_err("Intel PT: failed to deliver event, error %d\n", ret);
1105 
1106  return ret;
1107 }
1108 
1110 {
1111  struct intel_pt *pt = ptq->pt;
1112  union perf_event *event = ptq->event_buf;
1113  struct perf_sample sample = { .ip = 0, };
1114  struct dummy_branch_stack {
1115  u64 nr;
1116  struct branch_entry entries;
1117  } dummy_bs;
1118 
1119  if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1120  return 0;
1121 
1122  if (intel_pt_skip_event(pt))
1123  return 0;
1124 
1125  intel_pt_prep_b_sample(pt, ptq, event, &sample);
1126 
1127  sample.id = ptq->pt->branches_id;
1128  sample.stream_id = ptq->pt->branches_id;
1129 
1130  /*
1131  * perf report cannot handle events without a branch stack when using
1132  * SORT_MODE__BRANCH so make a dummy one.
1133  */
1135  dummy_bs = (struct dummy_branch_stack){
1136  .nr = 1,
1137  .entries = {
1138  .from = sample.ip,
1139  .to = sample.addr,
1140  },
1141  };
1142  sample.branch_stack = (struct branch_stack *)&dummy_bs;
1143  }
1144 
1145  return intel_pt_deliver_synth_b_event(pt, event, &sample,
1146  pt->branches_sample_type);
1147 }
1148 
1149 static void intel_pt_prep_sample(struct intel_pt *pt,
1150  struct intel_pt_queue *ptq,
1151  union perf_event *event,
1152  struct perf_sample *sample)
1153 {
1154  intel_pt_prep_b_sample(pt, ptq, event, sample);
1155 
1156  if (pt->synth_opts.callchain) {
1157  thread_stack__sample(ptq->thread, ptq->chain,
1158  pt->synth_opts.callchain_sz, sample->ip);
1159  sample->callchain = ptq->chain;
1160  }
1161 
1162  if (pt->synth_opts.last_branch) {
1164  sample->branch_stack = ptq->last_branch;
1165  }
1166 }
1167 
1168 static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
1169  struct intel_pt_queue *ptq,
1170  union perf_event *event,
1171  struct perf_sample *sample,
1172  u64 type)
1173 {
1174  int ret;
1175 
1176  ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
1177 
1178  if (pt->synth_opts.last_branch)
1180 
1181  return ret;
1182 }
1183 
1185 {
1186  struct intel_pt *pt = ptq->pt;
1187  union perf_event *event = ptq->event_buf;
1188  struct perf_sample sample = { .ip = 0, };
1189 
1190  if (intel_pt_skip_event(pt))
1191  return 0;
1192 
1193  intel_pt_prep_sample(pt, ptq, event, &sample);
1194 
1195  sample.id = ptq->pt->instructions_id;
1196  sample.stream_id = ptq->pt->instructions_id;
1197  sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1198 
1199  ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1200 
1201  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1203 }
1204 
1206 {
1207  struct intel_pt *pt = ptq->pt;
1208  union perf_event *event = ptq->event_buf;
1209  struct perf_sample sample = { .ip = 0, };
1210 
1211  if (intel_pt_skip_event(pt))
1212  return 0;
1213 
1214  intel_pt_prep_sample(pt, ptq, event, &sample);
1215 
1216  sample.id = ptq->pt->transactions_id;
1217  sample.stream_id = ptq->pt->transactions_id;
1218 
1219  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1221 }
1222 
1223 static void intel_pt_prep_p_sample(struct intel_pt *pt,
1224  struct intel_pt_queue *ptq,
1225  union perf_event *event,
1226  struct perf_sample *sample)
1227 {
1228  intel_pt_prep_sample(pt, ptq, event, sample);
1229 
1230  /*
1231  * Zero IP is used to mean "trace start" but that is not the case for
1232  * power or PTWRITE events with no IP, so clear the flags.
1233  */
1234  if (!sample->ip)
1235  sample->flags = 0;
1236 }
1237 
1239 {
1240  struct intel_pt *pt = ptq->pt;
1241  union perf_event *event = ptq->event_buf;
1242  struct perf_sample sample = { .ip = 0, };
1243  struct perf_synth_intel_ptwrite raw;
1244 
1245  if (intel_pt_skip_event(pt))
1246  return 0;
1247 
1248  intel_pt_prep_p_sample(pt, ptq, event, &sample);
1249 
1250  sample.id = ptq->pt->ptwrites_id;
1251  sample.stream_id = ptq->pt->ptwrites_id;
1252 
1253  raw.flags = 0;
1254  raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1255  raw.payload = cpu_to_le64(ptq->state->ptw_payload);
1256 
1257  sample.raw_size = perf_synth__raw_size(raw);
1258  sample.raw_data = perf_synth__raw_data(&raw);
1259 
1260  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1261  pt->ptwrites_sample_type);
1262 }
1263 
1265 {
1266  struct intel_pt *pt = ptq->pt;
1267  union perf_event *event = ptq->event_buf;
1268  struct perf_sample sample = { .ip = 0, };
1269  struct perf_synth_intel_cbr raw;
1270  u32 flags;
1271 
1272  if (intel_pt_skip_event(pt))
1273  return 0;
1274 
1275  intel_pt_prep_p_sample(pt, ptq, event, &sample);
1276 
1277  sample.id = ptq->pt->cbr_id;
1278  sample.stream_id = ptq->pt->cbr_id;
1279 
1280  flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
1281  raw.flags = cpu_to_le32(flags);
1282  raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
1283  raw.reserved3 = 0;
1284 
1285  sample.raw_size = perf_synth__raw_size(raw);
1286  sample.raw_data = perf_synth__raw_data(&raw);
1287 
1288  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1290 }
1291 
1293 {
1294  struct intel_pt *pt = ptq->pt;
1295  union perf_event *event = ptq->event_buf;
1296  struct perf_sample sample = { .ip = 0, };
1297  struct perf_synth_intel_mwait raw;
1298 
1299  if (intel_pt_skip_event(pt))
1300  return 0;
1301 
1302  intel_pt_prep_p_sample(pt, ptq, event, &sample);
1303 
1304  sample.id = ptq->pt->mwait_id;
1305  sample.stream_id = ptq->pt->mwait_id;
1306 
1307  raw.reserved = 0;
1308  raw.payload = cpu_to_le64(ptq->state->mwait_payload);
1309 
1310  sample.raw_size = perf_synth__raw_size(raw);
1311  sample.raw_data = perf_synth__raw_data(&raw);
1312 
1313  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1315 }
1316 
1318 {
1319  struct intel_pt *pt = ptq->pt;
1320  union perf_event *event = ptq->event_buf;
1321  struct perf_sample sample = { .ip = 0, };
1322  struct perf_synth_intel_pwre raw;
1323 
1324  if (intel_pt_skip_event(pt))
1325  return 0;
1326 
1327  intel_pt_prep_p_sample(pt, ptq, event, &sample);
1328 
1329  sample.id = ptq->pt->pwre_id;
1330  sample.stream_id = ptq->pt->pwre_id;
1331 
1332  raw.reserved = 0;
1333  raw.payload = cpu_to_le64(ptq->state->pwre_payload);
1334 
1335  sample.raw_size = perf_synth__raw_size(raw);
1336  sample.raw_data = perf_synth__raw_data(&raw);
1337 
1338  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1340 }
1341 
1343 {
1344  struct intel_pt *pt = ptq->pt;
1345  union perf_event *event = ptq->event_buf;
1346  struct perf_sample sample = { .ip = 0, };
1347  struct perf_synth_intel_exstop raw;
1348 
1349  if (intel_pt_skip_event(pt))
1350  return 0;
1351 
1352  intel_pt_prep_p_sample(pt, ptq, event, &sample);
1353 
1354  sample.id = ptq->pt->exstop_id;
1355  sample.stream_id = ptq->pt->exstop_id;
1356 
1357  raw.flags = 0;
1358  raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1359 
1360  sample.raw_size = perf_synth__raw_size(raw);
1361  sample.raw_data = perf_synth__raw_data(&raw);
1362 
1363  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1365 }
1366 
1368 {
1369  struct intel_pt *pt = ptq->pt;
1370  union perf_event *event = ptq->event_buf;
1371  struct perf_sample sample = { .ip = 0, };
1372  struct perf_synth_intel_pwrx raw;
1373 
1374  if (intel_pt_skip_event(pt))
1375  return 0;
1376 
1377  intel_pt_prep_p_sample(pt, ptq, event, &sample);
1378 
1379  sample.id = ptq->pt->pwrx_id;
1380  sample.stream_id = ptq->pt->pwrx_id;
1381 
1382  raw.reserved = 0;
1383  raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
1384 
1385  sample.raw_size = perf_synth__raw_size(raw);
1386  sample.raw_data = perf_synth__raw_data(&raw);
1387 
1388  return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1390 }
1391 
1392 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1393  pid_t pid, pid_t tid, u64 ip)
1394 {
1395  union perf_event event;
1396  char msg[MAX_AUXTRACE_ERROR_MSG];
1397  int err;
1398 
1400 
1402  code, cpu, pid, tid, ip, msg);
1403 
1404  err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1405  if (err)
1406  pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1407  err);
1408 
1409  return err;
1410 }
1411 
1412 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1413 {
1414  struct auxtrace_queue *queue;
1415  pid_t tid = ptq->next_tid;
1416  int err;
1417 
1418  if (tid == -1)
1419  return 0;
1420 
1421  intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1422 
1423  err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1424 
1425  queue = &pt->queues.queue_array[ptq->queue_nr];
1426  intel_pt_set_pid_tid_cpu(pt, queue);
1427 
1428  ptq->next_tid = -1;
1429 
1430  return err;
1431 }
1432 
1433 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1434 {
1435  struct intel_pt *pt = ptq->pt;
1436 
1437  return ip == pt->switch_ip &&
1438  (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1441 }
1442 
1443 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1444  INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
1445  INTEL_PT_CBR_CHG)
1446 
1447 static int intel_pt_sample(struct intel_pt_queue *ptq)
1448 {
1449  const struct intel_pt_state *state = ptq->state;
1450  struct intel_pt *pt = ptq->pt;
1451  int err;
1452 
1453  if (!ptq->have_sample)
1454  return 0;
1455 
1456  ptq->have_sample = false;
1457 
1458  if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
1459  if (state->type & INTEL_PT_CBR_CHG) {
1460  err = intel_pt_synth_cbr_sample(ptq);
1461  if (err)
1462  return err;
1463  }
1464  if (state->type & INTEL_PT_MWAIT_OP) {
1465  err = intel_pt_synth_mwait_sample(ptq);
1466  if (err)
1467  return err;
1468  }
1469  if (state->type & INTEL_PT_PWR_ENTRY) {
1470  err = intel_pt_synth_pwre_sample(ptq);
1471  if (err)
1472  return err;
1473  }
1474  if (state->type & INTEL_PT_EX_STOP) {
1475  err = intel_pt_synth_exstop_sample(ptq);
1476  if (err)
1477  return err;
1478  }
1479  if (state->type & INTEL_PT_PWR_EXIT) {
1480  err = intel_pt_synth_pwrx_sample(ptq);
1481  if (err)
1482  return err;
1483  }
1484  }
1485 
1486  if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
1488  if (err)
1489  return err;
1490  }
1491 
1492  if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
1494  if (err)
1495  return err;
1496  }
1497 
1498  if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
1499  err = intel_pt_synth_ptwrite_sample(ptq);
1500  if (err)
1501  return err;
1502  }
1503 
1504  if (!(state->type & INTEL_PT_BRANCH))
1505  return 0;
1506 
1508  thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1509  state->to_ip, ptq->insn_len,
1510  state->trace_nr);
1511  else
1513 
1514  if (pt->sample_branches) {
1515  err = intel_pt_synth_branch_sample(ptq);
1516  if (err)
1517  return err;
1518  }
1519 
1520  if (pt->synth_opts.last_branch)
1522 
1523  if (!ptq->sync_switch)
1524  return 0;
1525 
1526  if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1527  switch (ptq->switch_state) {
1529  case INTEL_PT_SS_UNKNOWN:
1531  err = intel_pt_next_tid(pt, ptq);
1532  if (err)
1533  return err;
1535  break;
1536  default:
1538  return 1;
1539  }
1540  } else if (!state->to_ip) {
1542  } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1544  } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1545  state->to_ip == pt->ptss_ip &&
1546  (ptq->flags & PERF_IP_FLAG_CALL)) {
1548  }
1549 
1550  return 0;
1551 }
1552 
1553 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1554 {
1555  struct machine *machine = pt->machine;
1556  struct map *map;
1557  struct symbol *sym, *start;
1558  u64 ip, switch_ip = 0;
1559  const char *ptss;
1560 
1561  if (ptss_ip)
1562  *ptss_ip = 0;
1563 
1564  map = machine__kernel_map(machine);
1565  if (!map)
1566  return 0;
1567 
1568  if (map__load(map))
1569  return 0;
1570 
1571  start = dso__first_symbol(map->dso);
1572 
1573  for (sym = start; sym; sym = dso__next_symbol(sym)) {
1574  if (sym->binding == STB_GLOBAL &&
1575  !strcmp(sym->name, "__switch_to")) {
1576  ip = map->unmap_ip(map, sym->start);
1577  if (ip >= map->start && ip < map->end) {
1578  switch_ip = ip;
1579  break;
1580  }
1581  }
1582  }
1583 
1584  if (!switch_ip || !ptss_ip)
1585  return 0;
1586 
1587  if (pt->have_sched_switch == 1)
1588  ptss = "perf_trace_sched_switch";
1589  else
1590  ptss = "__perf_event_task_sched_out";
1591 
1592  for (sym = start; sym; sym = dso__next_symbol(sym)) {
1593  if (!strcmp(sym->name, ptss)) {
1594  ip = map->unmap_ip(map, sym->start);
1595  if (ip >= map->start && ip < map->end) {
1596  *ptss_ip = ip;
1597  break;
1598  }
1599  }
1600  }
1601 
1602  return switch_ip;
1603 }
1604 
1605 static void intel_pt_enable_sync_switch(struct intel_pt *pt)
1606 {
1607  unsigned int i;
1608 
1609  pt->sync_switch = true;
1610 
1611  for (i = 0; i < pt->queues.nr_queues; i++) {
1612  struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1613  struct intel_pt_queue *ptq = queue->priv;
1614 
1615  if (ptq)
1616  ptq->sync_switch = true;
1617  }
1618 }
1619 
1620 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1621 {
1622  const struct intel_pt_state *state = ptq->state;
1623  struct intel_pt *pt = ptq->pt;
1624  int err;
1625 
1626  if (!pt->kernel_start) {
1628  if (pt->per_cpu_mmaps &&
1629  (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1631  !pt->sampling_mode) {
1632  pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1633  if (pt->switch_ip) {
1634  intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1635  pt->switch_ip, pt->ptss_ip);
1637  }
1638  }
1639  }
1640 
1641  intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1642  ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1643  while (1) {
1644  err = intel_pt_sample(ptq);
1645  if (err)
1646  return err;
1647 
1648  state = intel_pt_decode(ptq->decoder);
1649  if (state->err) {
1650  if (state->err == INTEL_PT_ERR_NODATA)
1651  return 1;
1652  if (ptq->sync_switch &&
1653  state->from_ip >= pt->kernel_start) {
1654  ptq->sync_switch = false;
1655  intel_pt_next_tid(pt, ptq);
1656  }
1657  if (pt->synth_opts.errors) {
1658  err = intel_pt_synth_error(pt, state->err,
1659  ptq->cpu, ptq->pid,
1660  ptq->tid,
1661  state->from_ip);
1662  if (err)
1663  return err;
1664  }
1665  continue;
1666  }
1667 
1668  ptq->state = state;
1669  ptq->have_sample = true;
1670  intel_pt_sample_flags(ptq);
1671 
1672  /* Use estimated TSC upon return to user space */
1673  if (pt->est_tsc &&
1674  (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1675  state->to_ip && state->to_ip < pt->kernel_start) {
1676  intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1677  state->timestamp, state->est_timestamp);
1678  ptq->timestamp = state->est_timestamp;
1679  /* Use estimated TSC in unknown switch state */
1680  } else if (ptq->sync_switch &&
1682  intel_pt_is_switch_ip(ptq, state->to_ip) &&
1683  ptq->next_tid == -1) {
1684  intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1685  state->timestamp, state->est_timestamp);
1686  ptq->timestamp = state->est_timestamp;
1687  } else if (state->timestamp > ptq->timestamp) {
1688  ptq->timestamp = state->timestamp;
1689  }
1690 
1691  if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1692  *timestamp = ptq->timestamp;
1693  return 0;
1694  }
1695  }
1696  return 0;
1697 }
1698 
1699 static inline int intel_pt_update_queues(struct intel_pt *pt)
1700 {
1701  if (pt->queues.new_data) {
1702  pt->queues.new_data = false;
1703  return intel_pt_setup_queues(pt);
1704  }
1705  return 0;
1706 }
1707 
1708 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1709 {
1710  unsigned int queue_nr;
1711  u64 ts;
1712  int ret;
1713 
1714  while (1) {
1715  struct auxtrace_queue *queue;
1716  struct intel_pt_queue *ptq;
1717 
1718  if (!pt->heap.heap_cnt)
1719  return 0;
1720 
1721  if (pt->heap.heap_array[0].ordinal >= timestamp)
1722  return 0;
1723 
1724  queue_nr = pt->heap.heap_array[0].queue_nr;
1725  queue = &pt->queues.queue_array[queue_nr];
1726  ptq = queue->priv;
1727 
1728  intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1729  queue_nr, pt->heap.heap_array[0].ordinal,
1730  timestamp);
1731 
1732  auxtrace_heap__pop(&pt->heap);
1733 
1734  if (pt->heap.heap_cnt) {
1735  ts = pt->heap.heap_array[0].ordinal + 1;
1736  if (ts > timestamp)
1737  ts = timestamp;
1738  } else {
1739  ts = timestamp;
1740  }
1741 
1742  intel_pt_set_pid_tid_cpu(pt, queue);
1743 
1744  ret = intel_pt_run_decoder(ptq, &ts);
1745 
1746  if (ret < 0) {
1747  auxtrace_heap__add(&pt->heap, queue_nr, ts);
1748  return ret;
1749  }
1750 
1751  if (!ret) {
1752  ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1753  if (ret < 0)
1754  return ret;
1755  } else {
1756  ptq->on_heap = false;
1757  }
1758  }
1759 
1760  return 0;
1761 }
1762 
1764  u64 time_)
1765 {
1766  struct auxtrace_queues *queues = &pt->queues;
1767  unsigned int i;
1768  u64 ts = 0;
1769 
1770  for (i = 0; i < queues->nr_queues; i++) {
1771  struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1772  struct intel_pt_queue *ptq = queue->priv;
1773 
1774  if (ptq && (tid == -1 || ptq->tid == tid)) {
1775  ptq->time = time_;
1776  intel_pt_set_pid_tid_cpu(pt, queue);
1777  intel_pt_run_decoder(ptq, &ts);
1778  }
1779  }
1780  return 0;
1781 }
1782 
1783 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1784 {
1785  return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1786  sample->pid, sample->tid, 0);
1787 }
1788 
1789 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1790 {
1791  unsigned i, j;
1792 
1793  if (cpu < 0 || !pt->queues.nr_queues)
1794  return NULL;
1795 
1796  if ((unsigned)cpu >= pt->queues.nr_queues)
1797  i = pt->queues.nr_queues - 1;
1798  else
1799  i = cpu;
1800 
1801  if (pt->queues.queue_array[i].cpu == cpu)
1802  return pt->queues.queue_array[i].priv;
1803 
1804  for (j = 0; i > 0; j++) {
1805  if (pt->queues.queue_array[--i].cpu == cpu)
1806  return pt->queues.queue_array[i].priv;
1807  }
1808 
1809  for (; j < pt->queues.nr_queues; j++) {
1810  if (pt->queues.queue_array[j].cpu == cpu)
1811  return pt->queues.queue_array[j].priv;
1812  }
1813 
1814  return NULL;
1815 }
1816 
1817 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1818  u64 timestamp)
1819 {
1820  struct intel_pt_queue *ptq;
1821  int err;
1822 
1823  if (!pt->sync_switch)
1824  return 1;
1825 
1826  ptq = intel_pt_cpu_to_ptq(pt, cpu);
1827  if (!ptq || !ptq->sync_switch)
1828  return 1;
1829 
1830  switch (ptq->switch_state) {
1832  ptq->next_tid = -1;
1833  break;
1834  case INTEL_PT_SS_UNKNOWN:
1835  case INTEL_PT_SS_TRACING:
1836  ptq->next_tid = tid;
1838  return 0;
1840  if (!ptq->on_heap) {
1841  ptq->timestamp = perf_time_to_tsc(timestamp,
1842  &pt->tc);
1843  err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1844  ptq->timestamp);
1845  if (err)
1846  return err;
1847  ptq->on_heap = true;
1848  }
1850  break;
1852  ptq->next_tid = tid;
1853  intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1854  break;
1855  default:
1856  break;
1857  }
1858 
1859  return 1;
1860 }
1861 
1863  struct perf_sample *sample)
1864 {
1865  struct perf_evsel *evsel;
1866  pid_t tid;
1867  int cpu, ret;
1868 
1869  evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1870  if (evsel != pt->switch_evsel)
1871  return 0;
1872 
1873  tid = perf_evsel__intval(evsel, sample, "next_pid");
1874  cpu = sample->cpu;
1875 
1876  intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1877  cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1878  &pt->tc));
1879 
1880  ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1881  if (ret <= 0)
1882  return ret;
1883 
1884  return machine__set_current_tid(pt->machine, cpu, -1, tid);
1885 }
1886 
1887 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1888  struct perf_sample *sample)
1889 {
1890  bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1891  pid_t pid, tid;
1892  int cpu, ret;
1893 
1894  cpu = sample->cpu;
1895 
1896  if (pt->have_sched_switch == 3) {
1897  if (!out)
1898  return 0;
1899  if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1900  pr_err("Expecting CPU-wide context switch event\n");
1901  return -EINVAL;
1902  }
1903  pid = event->context_switch.next_prev_pid;
1904  tid = event->context_switch.next_prev_tid;
1905  } else {
1906  if (out)
1907  return 0;
1908  pid = sample->pid;
1909  tid = sample->tid;
1910  }
1911 
1912  if (tid == -1) {
1913  pr_err("context_switch event has no tid\n");
1914  return -EINVAL;
1915  }
1916 
1917  intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1918  cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1919  &pt->tc));
1920 
1921  ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1922  if (ret <= 0)
1923  return ret;
1924 
1925  return machine__set_current_tid(pt->machine, cpu, pid, tid);
1926 }
1927 
1929  union perf_event *event,
1930  struct perf_sample *sample)
1931 {
1932  if (!pt->per_cpu_mmaps)
1933  return 0;
1934 
1935  intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1936  sample->cpu, event->itrace_start.pid,
1937  event->itrace_start.tid, sample->time,
1938  perf_time_to_tsc(sample->time, &pt->tc));
1939 
1940  return machine__set_current_tid(pt->machine, sample->cpu,
1941  event->itrace_start.pid,
1942  event->itrace_start.tid);
1943 }
1944 
1946  union perf_event *event,
1947  struct perf_sample *sample,
1948  struct perf_tool *tool)
1949 {
1950  struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1951  auxtrace);
1952  u64 timestamp;
1953  int err = 0;
1954 
1955  if (dump_trace)
1956  return 0;
1957 
1958  if (!tool->ordered_events) {
1959  pr_err("Intel Processor Trace requires ordered events\n");
1960  return -EINVAL;
1961  }
1962 
1963  if (sample->time && sample->time != (u64)-1)
1964  timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1965  else
1966  timestamp = 0;
1967 
1968  if (timestamp || pt->timeless_decoding) {
1969  err = intel_pt_update_queues(pt);
1970  if (err)
1971  return err;
1972  }
1973 
1974  if (pt->timeless_decoding) {
1975  if (event->header.type == PERF_RECORD_EXIT) {
1977  event->fork.tid,
1978  sample->time);
1979  }
1980  } else if (timestamp) {
1981  err = intel_pt_process_queues(pt, timestamp);
1982  }
1983  if (err)
1984  return err;
1985 
1986  if (event->header.type == PERF_RECORD_AUX &&
1987  (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1988  pt->synth_opts.errors) {
1989  err = intel_pt_lost(pt, sample);
1990  if (err)
1991  return err;
1992  }
1993 
1994  if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1995  err = intel_pt_process_switch(pt, sample);
1996  else if (event->header.type == PERF_RECORD_ITRACE_START)
1997  err = intel_pt_process_itrace_start(pt, event, sample);
1998  else if (event->header.type == PERF_RECORD_SWITCH ||
1999  event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
2000  err = intel_pt_context_switch(pt, event, sample);
2001 
2002  intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
2003  perf_event__name(event->header.type), event->header.type,
2004  sample->cpu, sample->time, timestamp);
2005 
2006  return err;
2007 }
2008 
2009 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
2010 {
2011  struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2012  auxtrace);
2013  int ret;
2014 
2015  if (dump_trace)
2016  return 0;
2017 
2018  if (!tool->ordered_events)
2019  return -EINVAL;
2020 
2021  ret = intel_pt_update_queues(pt);
2022  if (ret < 0)
2023  return ret;
2024 
2025  if (pt->timeless_decoding)
2026  return intel_pt_process_timeless_queues(pt, -1,
2027  MAX_TIMESTAMP - 1);
2028 
2030 }
2031 
2033 {
2034  struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2035  auxtrace);
2036  struct auxtrace_queues *queues = &pt->queues;
2037  unsigned int i;
2038 
2039  for (i = 0; i < queues->nr_queues; i++) {
2040  intel_pt_free_queue(queues->queue_array[i].priv);
2041  queues->queue_array[i].priv = NULL;
2042  }
2044  auxtrace_queues__free(queues);
2045 }
2046 
2047 static void intel_pt_free(struct perf_session *session)
2048 {
2049  struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2050  auxtrace);
2051 
2052  auxtrace_heap__free(&pt->heap);
2053  intel_pt_free_events(session);
2054  session->auxtrace = NULL;
2056  addr_filters__exit(&pt->filts);
2057  zfree(&pt->filter);
2058  free(pt);
2059 }
2060 
2062  union perf_event *event,
2063  struct perf_tool *tool __maybe_unused)
2064 {
2065  struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2066  auxtrace);
2067 
2068  if (!pt->data_queued) {
2069  struct auxtrace_buffer *buffer;
2070  off_t data_offset;
2071  int fd = perf_data__fd(session->data);
2072  int err;
2073 
2074  if (perf_data__is_pipe(session->data)) {
2075  data_offset = 0;
2076  } else {
2077  data_offset = lseek(fd, 0, SEEK_CUR);
2078  if (data_offset == -1)
2079  return -errno;
2080  }
2081 
2082  err = auxtrace_queues__add_event(&pt->queues, session, event,
2083  data_offset, &buffer);
2084  if (err)
2085  return err;
2086 
2087  /* Dump here now we have copied a piped trace out of the pipe */
2088  if (dump_trace) {
2089  if (auxtrace_buffer__get_data(buffer, fd)) {
2090  intel_pt_dump_event(pt, buffer->data,
2091  buffer->size);
2092  auxtrace_buffer__put_data(buffer);
2093  }
2094  }
2095  }
2096 
2097  return 0;
2098 }
2099 
2101  struct perf_tool dummy_tool;
2103 };
2104 
2106  union perf_event *event,
2107  struct perf_sample *sample __maybe_unused,
2108  struct machine *machine __maybe_unused)
2109 {
2110  struct intel_pt_synth *intel_pt_synth =
2111  container_of(tool, struct intel_pt_synth, dummy_tool);
2112 
2113  return perf_session__deliver_synth_event(intel_pt_synth->session, event,
2114  NULL);
2115 }
2116 
2117 static int intel_pt_synth_event(struct perf_session *session, const char *name,
2118  struct perf_event_attr *attr, u64 id)
2119 {
2120  struct intel_pt_synth intel_pt_synth;
2121  int err;
2122 
2123  pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2124  name, id, (u64)attr->sample_type);
2125 
2126  memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
2127  intel_pt_synth.session = session;
2128 
2129  err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
2130  &id, intel_pt_event_synth);
2131  if (err)
2132  pr_err("%s: failed to synthesize '%s' event type\n",
2133  __func__, name);
2134 
2135  return err;
2136 }
2137 
2138 static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
2139  const char *name)
2140 {
2141  struct perf_evsel *evsel;
2142 
2143  evlist__for_each_entry(evlist, evsel) {
2144  if (evsel->id && evsel->id[0] == id) {
2145  if (evsel->name)
2146  zfree(&evsel->name);
2147  evsel->name = strdup(name);
2148  break;
2149  }
2150  }
2151 }
2152 
2153 static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
2154  struct perf_evlist *evlist)
2155 {
2156  struct perf_evsel *evsel;
2157 
2158  evlist__for_each_entry(evlist, evsel) {
2159  if (evsel->attr.type == pt->pmu_type && evsel->ids)
2160  return evsel;
2161  }
2162 
2163  return NULL;
2164 }
2165 
2166 static int intel_pt_synth_events(struct intel_pt *pt,
2167  struct perf_session *session)
2168 {
2169  struct perf_evlist *evlist = session->evlist;
2170  struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
2171  struct perf_event_attr attr;
2172  u64 id;
2173  int err;
2174 
2175  if (!evsel) {
2176  pr_debug("There are no selected events with Intel Processor Trace data\n");
2177  return 0;
2178  }
2179 
2180  memset(&attr, 0, sizeof(struct perf_event_attr));
2181  attr.size = sizeof(struct perf_event_attr);
2182  attr.type = PERF_TYPE_HARDWARE;
2183  attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
2184  attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
2185  PERF_SAMPLE_PERIOD;
2186  if (pt->timeless_decoding)
2187  attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
2188  else
2189  attr.sample_type |= PERF_SAMPLE_TIME;
2190  if (!pt->per_cpu_mmaps)
2191  attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
2192  attr.exclude_user = evsel->attr.exclude_user;
2193  attr.exclude_kernel = evsel->attr.exclude_kernel;
2194  attr.exclude_hv = evsel->attr.exclude_hv;
2195  attr.exclude_host = evsel->attr.exclude_host;
2196  attr.exclude_guest = evsel->attr.exclude_guest;
2197  attr.sample_id_all = evsel->attr.sample_id_all;
2198  attr.read_format = evsel->attr.read_format;
2199 
2200  id = evsel->id[0] + 1000000000;
2201  if (!id)
2202  id = 1;
2203 
2204  if (pt->synth_opts.branches) {
2205  attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2206  attr.sample_period = 1;
2207  attr.sample_type |= PERF_SAMPLE_ADDR;
2208  err = intel_pt_synth_event(session, "branches", &attr, id);
2209  if (err)
2210  return err;
2211  pt->sample_branches = true;
2212  pt->branches_sample_type = attr.sample_type;
2213  pt->branches_id = id;
2214  id += 1;
2215  attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
2216  }
2217 
2218  if (pt->synth_opts.callchain)
2219  attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2220  if (pt->synth_opts.last_branch)
2221  attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2222 
2223  if (pt->synth_opts.instructions) {
2224  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2226  attr.sample_period =
2228  else
2229  attr.sample_period = pt->synth_opts.period;
2230  err = intel_pt_synth_event(session, "instructions", &attr, id);
2231  if (err)
2232  return err;
2233  pt->sample_instructions = true;
2234  pt->instructions_sample_type = attr.sample_type;
2235  pt->instructions_id = id;
2236  id += 1;
2237  }
2238 
2239  attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
2240  attr.sample_period = 1;
2241 
2242  if (pt->synth_opts.transactions) {
2243  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2244  err = intel_pt_synth_event(session, "transactions", &attr, id);
2245  if (err)
2246  return err;
2247  pt->sample_transactions = true;
2248  pt->transactions_sample_type = attr.sample_type;
2249  pt->transactions_id = id;
2250  intel_pt_set_event_name(evlist, id, "transactions");
2251  id += 1;
2252  }
2253 
2254  attr.type = PERF_TYPE_SYNTH;
2255  attr.sample_type |= PERF_SAMPLE_RAW;
2256 
2257  if (pt->synth_opts.ptwrites) {
2258  attr.config = PERF_SYNTH_INTEL_PTWRITE;
2259  err = intel_pt_synth_event(session, "ptwrite", &attr, id);
2260  if (err)
2261  return err;
2262  pt->sample_ptwrites = true;
2263  pt->ptwrites_sample_type = attr.sample_type;
2264  pt->ptwrites_id = id;
2265  intel_pt_set_event_name(evlist, id, "ptwrite");
2266  id += 1;
2267  }
2268 
2269  if (pt->synth_opts.pwr_events) {
2270  pt->sample_pwr_events = true;
2271  pt->pwr_events_sample_type = attr.sample_type;
2272 
2273  attr.config = PERF_SYNTH_INTEL_CBR;
2274  err = intel_pt_synth_event(session, "cbr", &attr, id);
2275  if (err)
2276  return err;
2277  pt->cbr_id = id;
2278  intel_pt_set_event_name(evlist, id, "cbr");
2279  id += 1;
2280  }
2281 
2282  if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
2283  attr.config = PERF_SYNTH_INTEL_MWAIT;
2284  err = intel_pt_synth_event(session, "mwait", &attr, id);
2285  if (err)
2286  return err;
2287  pt->mwait_id = id;
2288  intel_pt_set_event_name(evlist, id, "mwait");
2289  id += 1;
2290 
2291  attr.config = PERF_SYNTH_INTEL_PWRE;
2292  err = intel_pt_synth_event(session, "pwre", &attr, id);
2293  if (err)
2294  return err;
2295  pt->pwre_id = id;
2296  intel_pt_set_event_name(evlist, id, "pwre");
2297  id += 1;
2298 
2299  attr.config = PERF_SYNTH_INTEL_EXSTOP;
2300  err = intel_pt_synth_event(session, "exstop", &attr, id);
2301  if (err)
2302  return err;
2303  pt->exstop_id = id;
2304  intel_pt_set_event_name(evlist, id, "exstop");
2305  id += 1;
2306 
2307  attr.config = PERF_SYNTH_INTEL_PWRX;
2308  err = intel_pt_synth_event(session, "pwrx", &attr, id);
2309  if (err)
2310  return err;
2311  pt->pwrx_id = id;
2312  intel_pt_set_event_name(evlist, id, "pwrx");
2313  id += 1;
2314  }
2315 
2316  return 0;
2317 }
2318 
2320 {
2321  struct perf_evsel *evsel;
2322 
2323  evlist__for_each_entry_reverse(evlist, evsel) {
2324  const char *name = perf_evsel__name(evsel);
2325 
2326  if (!strcmp(name, "sched:sched_switch"))
2327  return evsel;
2328  }
2329 
2330  return NULL;
2331 }
2332 
2334 {
2335  struct perf_evsel *evsel;
2336 
2337  evlist__for_each_entry(evlist, evsel) {
2338  if (evsel->attr.context_switch)
2339  return true;
2340  }
2341 
2342  return false;
2343 }
2344 
2345 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2346 {
2347  struct intel_pt *pt = data;
2348 
2349  if (!strcmp(var, "intel-pt.mispred-all"))
2350  pt->mispred_all = perf_config_bool(var, value);
2351 
2352  return 0;
2353 }
2354 
2355 static const char * const intel_pt_info_fmts[] = {
2356  [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
2357  [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
2358  [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
2359  [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
2360  [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
2361  [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
2362  [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
2363  [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
2364  [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
2365  [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
2366  [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
2367  [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
2368  [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
2369  [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
2370  [INTEL_PT_MAX_NONTURBO_RATIO] = " Max non-turbo ratio %"PRIu64"\n",
2371  [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n",
2372 };
2373 
2374 static void intel_pt_print_info(u64 *arr, int start, int finish)
2375 {
2376  int i;
2377 
2378  if (!dump_trace)
2379  return;
2380 
2381  for (i = start; i <= finish; i++)
2382  fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2383 }
2384 
2385 static void intel_pt_print_info_str(const char *name, const char *str)
2386 {
2387  if (!dump_trace)
2388  return;
2389 
2390  fprintf(stdout, " %-20s%s\n", name, str ? str : "");
2391 }
2392 
2393 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2394 {
2395  return auxtrace_info->header.size >=
2396  sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2397 }
2398 
2400  struct perf_session *session)
2401 {
2402  struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2403  size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2404  struct intel_pt *pt;
2405  void *info_end;
2406  u64 *info;
2407  int err;
2408 
2409  if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2410  min_sz)
2411  return -EINVAL;
2412 
2413  pt = zalloc(sizeof(struct intel_pt));
2414  if (!pt)
2415  return -ENOMEM;
2416 
2417  addr_filters__init(&pt->filts);
2418 
2419  err = perf_config(intel_pt_perf_config, pt);
2420  if (err)
2421  goto err_free;
2422 
2423  err = auxtrace_queues__init(&pt->queues);
2424  if (err)
2425  goto err_free;
2426 
2428 
2429  pt->session = session;
2430  pt->machine = &session->machines.host; /* No kvm support */
2431  pt->auxtrace_type = auxtrace_info->type;
2432  pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2433  pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2434  pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2435  pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2436  pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2437  pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2438  pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2439  pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2440  pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2441  pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2442  intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2444 
2445  if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2446  pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2447  pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2448  pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2449  pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2450  pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2451  intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2453  }
2454 
2455  if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2456  pt->max_non_turbo_ratio =
2457  auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2458  intel_pt_print_info(&auxtrace_info->priv[0],
2461  }
2462 
2463  info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
2464  info_end = (void *)info + auxtrace_info->header.size;
2465 
2466  if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
2467  size_t len;
2468 
2469  len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
2470  intel_pt_print_info(&auxtrace_info->priv[0],
2473  if (len) {
2474  const char *filter = (const char *)info;
2475 
2476  len = roundup(len + 1, 8);
2477  info += len >> 3;
2478  if ((void *)info > info_end) {
2479  pr_err("%s: bad filter string length\n", __func__);
2480  err = -EINVAL;
2481  goto err_free_queues;
2482  }
2483  pt->filter = memdup(filter, len);
2484  if (!pt->filter) {
2485  err = -ENOMEM;
2486  goto err_free_queues;
2487  }
2488  if (session->header.needs_swap)
2489  mem_bswap_64(pt->filter, len);
2490  if (pt->filter[len - 1]) {
2491  pr_err("%s: filter string not null terminated\n", __func__);
2492  err = -EINVAL;
2493  goto err_free_queues;
2494  }
2496  filter);
2497  if (err)
2498  goto err_free_queues;
2499  }
2500  intel_pt_print_info_str("Filter string", pt->filter);
2501  }
2502 
2504  pt->have_tsc = intel_pt_have_tsc(pt);
2505  pt->sampling_mode = false;
2506  pt->est_tsc = !pt->timeless_decoding;
2507 
2508  pt->unknown_thread = thread__new(999999999, 999999999);
2509  if (!pt->unknown_thread) {
2510  err = -ENOMEM;
2511  goto err_free_queues;
2512  }
2513 
2514  /*
2515  * Since this thread will not be kept in any rbtree not in a
2516  * list, initialize its list node so that at thread__put() the
2517  * current thread lifetime assuption is kept and we don't segfault
2518  * at list_del_init().
2519  */
2520  INIT_LIST_HEAD(&pt->unknown_thread->node);
2521 
2522  err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2523  if (err)
2524  goto err_delete_thread;
2526  err = -ENOMEM;
2527  goto err_delete_thread;
2528  }
2529 
2534  pt->auxtrace.free = intel_pt_free;
2535  session->auxtrace = &pt->auxtrace;
2536 
2537  if (dump_trace)
2538  return 0;
2539 
2540  if (pt->have_sched_switch == 1) {
2542  if (!pt->switch_evsel) {
2543  pr_err("%s: missing sched_switch event\n", __func__);
2544  err = -EINVAL;
2545  goto err_delete_thread;
2546  }
2547  } else if (pt->have_sched_switch == 2 &&
2548  !intel_pt_find_switch(session->evlist)) {
2549  pr_err("%s: missing context_switch attribute flag\n", __func__);
2550  err = -EINVAL;
2551  goto err_delete_thread;
2552  }
2553 
2554  if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2555  pt->synth_opts = *session->itrace_synth_opts;
2556  } else {
2558  if (use_browser != -1) {
2559  pt->synth_opts.branches = false;
2560  pt->synth_opts.callchain = true;
2561  }
2562  if (session->itrace_synth_opts)
2563  pt->synth_opts.thread_stack =
2564  session->itrace_synth_opts->thread_stack;
2565  }
2566 
2567  if (pt->synth_opts.log)
2569 
2570  /* Maximum non-turbo ratio is TSC freq / 100 MHz */
2571  if (pt->tc.time_mult) {
2572  u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2573 
2574  if (!pt->max_non_turbo_ratio)
2575  pt->max_non_turbo_ratio =
2576  (tsc_freq + 50000000) / 100000000;
2577  intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2578  intel_pt_log("Maximum non-turbo ratio %u\n",
2579  pt->max_non_turbo_ratio);
2580  pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
2581  }
2582 
2583  if (pt->synth_opts.calls)
2586  if (pt->synth_opts.returns)
2589 
2591  symbol_conf.use_callchain = true;
2593  symbol_conf.use_callchain = false;
2594  pt->synth_opts.callchain = false;
2595  }
2596  }
2597 
2598  err = intel_pt_synth_events(pt, session);
2599  if (err)
2600  goto err_delete_thread;
2601 
2602  err = auxtrace_queues__process_index(&pt->queues, session);
2603  if (err)
2604  goto err_delete_thread;
2605 
2606  if (pt->queues.populated)
2607  pt->data_queued = true;
2608 
2609  if (pt->timeless_decoding)
2610  pr_debug2("Intel PT decoding without timestamps\n");
2611 
2612  return 0;
2613 
2614 err_delete_thread:
2616 err_free_queues:
2619  session->auxtrace = NULL;
2620 err_free:
2621  addr_filters__exit(&pt->filts);
2622  zfree(&pt->filter);
2623  free(pt);
2624  return err;
2625 }
static bool intel_pt_pgd_ip(uint64_t ip, void *data)
Definition: intel-pt.c:607
#define INTEL_PT_ASYNC
u8 is_64_bit
Definition: dso.h:166
static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample, u64 type)
Definition: intel-pt.c:1092
Definition: insn.h:36
bool cap_user_time_zero
Definition: intel-pt.c:78
#define intel_pt_log(fmt,...)
Definition: intel-pt-log.h:40
static void intel_pt_dump(struct intel_pt *pt __maybe_unused, unsigned char *buf, size_t len)
Definition: intel-pt.c:163
void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused, void *entry)
Definition: auxtrace.c:1403
static struct intel_pt_queue * intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
Definition: intel-pt.c:1789
static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, size_t len)
Definition: intel-pt.c:202
uint32_t tsc_ctc_ratio_d
int color_fprintf(FILE *fp, const char *color, const char *fmt,...)
Definition: color.c:123
u64(* map_ip)(struct map *, u64)
Definition: map.h:41
int value
Definition: python.c:1143
static void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1016
Definition: env.h:36
static int intel_pt_synth_events(struct intel_pt *pt, struct perf_session *session)
Definition: intel-pt.c:2166
struct perf_evlist * evlist
Definition: session.h:25
bool snapshot_mode
Definition: intel-pt.c:64
struct perf_data * data
Definition: session.h:36
int(* process_event)(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool)
Definition: auxtrace.h:139
void mem_bswap_64(void *src, int byte_size)
Definition: memswap.c:16
static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
Definition: intel-pt.c:665
struct aux_event aux
Definition: event.h:644
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t buf_len)
struct perf_session * session
Definition: intel-pt.c:58
struct perf_tool dummy_tool
Definition: intel-pt.c:2101
struct perf_event_header header
Definition: event.h:504
int length
Definition: intel-pt.c:302
u32 tsc_ctc_ratio_n
Definition: intel-pt.c:110
pid_t pid_
Definition: thread.h:24
int auxtrace_queues__process_index(struct auxtrace_queues *queues, struct perf_session *session)
Definition: auxtrace.c:764
struct auxtrace_cache_entry entry
Definition: intel-pt.c:297
static void intel_pt_free(struct perf_session *session)
Definition: intel-pt.c:2047
enum sort_mode sort__mode
Definition: sort.c:31
size_t size
Definition: evsel.c:60
char insn[MAX_INSN]
Definition: event.h:209
static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
Definition: intel-pt.c:1783
u32 tid
Definition: event.h:53
struct dso::@70 data
struct perf_tsc_conversion tc
Definition: intel-pt.c:77
u64 mtc_bit
Definition: intel-pt.c:108
static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
Definition: intel-pt.c:738
void * auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
Definition: auxtrace.c:804
struct auxtrace_queue * queue_array
Definition: auxtrace.h:219
uint64_t cbr_payload
unsigned int mtc_period
u64 byte_cnt
Definition: intel-pt.c:299
struct machine host
Definition: machine.h:136
uint64_t ptw_payload
const unsigned char * buf
struct auxtrace * auxtrace
Definition: session.h:26
static bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
Definition: intel-pt.c:1433
static struct intel_pt_cache_entry * intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
Definition: intel-pt.c:400
static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id, const char *name)
Definition: intel-pt.c:2138
#define thread__zput(thread)
Definition: thread.h:64
intel_pt_insn_op
#define INTEL_PT_INSN_BUF_SZ
void addr_filters__exit(struct addr_filters *filts)
Definition: auxtrace.c:1485
static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1317
const char * filename
Definition: hists_common.c:26
u64 addr
Definition: event.h:195
struct auxtrace_heap_item * heap_array
Definition: auxtrace.h:244
unsigned max_non_turbo_ratio
u16 insn_len
Definition: event.h:206
bool est_tsc
Definition: intel-pt.c:68
#define MAX_AUXTRACE_ERROR_MSG
Definition: event.h:521
static void intel_pt_prep_b_sample(struct intel_pt *pt, struct intel_pt_queue *ptq, union perf_event *event, struct perf_sample *sample)
Definition: intel-pt.c:1051
unsigned int callchain_sz
Definition: auxtrace.h:96
dictionary data
Definition: stat-cpi.py:4
struct ip_callchain * callchain
Definition: event.h:211
static void * perf_synth__raw_data(void *p)
Definition: event.h:372
int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample)
Definition: session.c:1413
static bool intel_pt_have_tsc(struct intel_pt *pt)
Definition: intel-pt.c:718
struct map * thread__find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al)
Definition: event.c:1511
int int err
Definition: 5sec.c:44
int perf_config_bool(const char *name, const char *value)
Definition: config.c:389
struct list_head head
Definition: auxtrace.h:369
static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1292
int intel_pt_insn_type(enum intel_pt_insn_op op)
u64 nr
Definition: event.h:157
u64 mwait_id
Definition: intel-pt.c:101
uint64_t pwre_payload
u64 transactions_sample_type
Definition: intel-pt.c:92
static int intel_pt_setup_queues(struct intel_pt *pt)
Definition: intel-pt.c:980
enum intel_pt_insn_op insn_op
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
Definition: intel-pt.c:887
static int intel_pt_deliver_synth_event(struct intel_pt *pt, struct intel_pt_queue *ptq, union perf_event *event, struct perf_sample *sample, u64 type)
Definition: intel-pt.c:1168
struct auxtrace_buffer * auxtrace_buffer__next(struct auxtrace_queue *queue, struct auxtrace_buffer *buffer)
Definition: auxtrace.c:788
enum intel_pt_insn_branch branch
Definition: intel-pt.c:301
static bool intel_pt_exclude_kernel(struct intel_pt *pt)
Definition: intel-pt.c:624
static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip, uint64_t offset, const char *filename)
Definition: intel-pt.c:543
struct auxtrace_buffer * old_buffer
Definition: intel-pt.c:135
static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
Definition: intel-pt.c:1620
struct itrace_synth_opts * itrace_synth_opts
Definition: session.h:27
struct list_head node
Definition: thread.h:21
void * priv
Definition: auxtrace.h:207
uint64_t pwrx_payload
#define config
enum intel_pt_insn_branch branch
#define pr_debug2(fmt,...)
Definition: debug.h:33
#define evlist__for_each_entry_reverse(evlist, evsel)
Definition: evlist.h:279
unsigned int bits
Definition: auxtrace.c:1333
void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, int code, int cpu, pid_t pid, pid_t tid, u64 ip, const char *msg)
Definition: auxtrace.c:854
int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr)
Definition: thread-stack.c:219
int switch_state
Definition: intel-pt.c:150
u64 from
Definition: event.h:151
static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
Definition: intel-pt.c:2009
int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct perf_session *session, union perf_event *event, off_t data_offset, struct auxtrace_buffer **buffer_ptr)
Definition: auxtrace.c:357
const char * long_name
Definition: dso.h:173
uint64_t mwait_payload
u64 instructions_sample_type
Definition: intel-pt.c:83
static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1238
u64 end
Definition: symbol.h:58
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip)
Definition: intel-pt.c:1392
u64 ptwrites_sample_type
Definition: intel-pt.c:96
static int intel_pt_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1447
unsigned cbr2khz
Definition: intel-pt.c:115
int auxtrace_cache__add(struct auxtrace_cache *c, u32 key, struct auxtrace_cache_entry *entry)
Definition: auxtrace.c:1409
bool timeless_decoding
Definition: intel-pt.c:62
u64 exstop_id
Definition: intel-pt.c:103
struct perf_session * session
Definition: intel-pt.c:2102
int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid)
Definition: machine.c:2419
const struct intel_pt_state * state
Definition: intel-pt.c:137
unsigned char * intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, bool have_tsc, bool *consecutive)
#define INTEL_PT_PKT_DESC_MAX
static bool intel_pt_find_switch(struct perf_evlist *evlist)
Definition: intel-pt.c:2333
u16 time_shift
Definition: tsc.h:10
struct intel_pt_decoder * intel_pt_decoder_new(struct intel_pt_params *params)
struct auxtrace_buffer * buffer
Definition: intel-pt.c:134
int have_sched_switch
Definition: intel-pt.c:71
struct itrace_start_event itrace_start
Definition: event.h:645
bool have_tsc
Definition: intel-pt.c:66
u64 ip
Definition: event.h:192
x86 movsq based memset() in arch/x86/lib/memset_64.S") MEMSET_FN(memset_erms
#define pr_err(fmt,...)
Definition: json.h:21
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b)
Definition: intel-pt.c:209
int(* get_trace)(struct intel_pt_buffer *buffer, void *data)
static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
Definition: intel-pt.c:1412
void addr_filters__init(struct addr_filters *filts)
Definition: auxtrace.c:1479
u32 auxtrace_type
Definition: intel-pt.c:57
#define INTEL_PT_PWR_EVT
Definition: intel-pt.c:1443
uint64_t tot_insn_cnt
int use_browser
Definition: setup.c:12
int thread__init_map_groups(struct thread *thread, struct machine *machine)
Definition: thread.c:19
u32 ids
Definition: evsel.h:101
unsigned int nr_queues
Definition: auxtrace.h:220
static void intel_pt_print_info(u64 *arr, int start, int finish)
Definition: intel-pt.c:2374
u64 start
Definition: map.h:28
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data)
Definition: intel-pt.c:410
unsigned char buf[INTEL_PT_INSN_BUF_SZ]
struct branch_entry entries[0]
Definition: event.h:158
void intel_pt_log_enable(void)
Definition: intel-pt-log.c:34
static struct perf_evsel * intel_pt_find_sched_switch(struct perf_evlist *evlist)
Definition: intel-pt.c:2319
u64 id
Definition: event.h:196
static void intel_pt_free_events(struct perf_session *session)
Definition: intel-pt.c:2032
def ns(sec, nsec)
void * malloc(YYSIZE_T)
static bool intel_pt_get_config(struct intel_pt *pt, struct perf_event_attr *attr, u64 *config)
Definition: intel-pt.c:612
u64 noretcomp_bit
Definition: intel-pt.c:113
static struct map * machine__kernel_map(struct machine *machine)
Definition: machine.h:72
if(!yyg->yy_init)
struct thread * machine__find_thread(struct machine *machine, pid_t pid, pid_t tid)
Definition: machine.c:504
enum intel_pt_insn_op op
bool sample_ptwrites
Definition: intel-pt.c:95
#define roundup(x, y)
Definition: json.h:31
struct map * map
Definition: symbol.h:210
u8 binding
Definition: symbol.h:61
Definition: thread.h:18
const char * name
u64 branches_id
Definition: intel-pt.c:89
u64 pwre_id
Definition: intel-pt.c:102
int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample)
Definition: evsel.c:2490
bool have_sample
Definition: intel-pt.c:154
bool data_queued
Definition: intel-pt.c:67
struct branch_stack * last_branch_rb
Definition: intel-pt.c:140
static void intel_pt_prep_p_sample(struct intel_pt *pt, struct intel_pt_queue *ptq, union perf_event *event, struct perf_sample *sample)
Definition: intel-pt.c:1223
#define PERF_COLOR_BLUE
Definition: color.h:16
static int intel_pt_cache_divisor(void)
Definition: intel-pt.c:321
struct perf_evlist * evlist
Definition: evsel.h:92
static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, u64 timestamp)
Definition: intel-pt.c:1817
#define pr_debug(fmt,...)
Definition: json.h:27
int intel_pt_get_packet(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet)
Definition: tool.h:44
u32 pmu_type
Definition: intel-pt.c:72
bool sync_switch
Definition: intel-pt.c:69
u64 start
Definition: symbol.h:57
static int intel_pt_synth_event(struct perf_session *session, const char *name, struct perf_event_attr *attr, u64 id)
Definition: intel-pt.c:2117
static int __intel_pt_pgd_ip(uint64_t ip, void *data)
Definition: intel-pt.c:581
static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused)
Definition: intel-pt.c:2061
#define MAX_TIMESTAMP
Definition: intel-pt.c:51
#define evlist__for_each_entry(evlist, evsel)
Definition: evlist.h:247
struct thread * unknown_thread
Definition: intel-pt.c:61
void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
Definition: auxtrace.c:826
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
Definition: tsc.c:7
static int entry(u64 ip, struct unwind_info *ui)
Definition: unwind-libdw.c:71
int cpu
Definition: thread.h:27
unsigned int heap_cnt
Definition: auxtrace.h:245
unsigned long num_events
Definition: intel-pt.c:117
#define INTEL_PT_PMU_NAME
Definition: intel-pt.h:19
struct dso * dso
Definition: map.h:45
enum intel_pt_period_type period_type
static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample)
Definition: intel-pt.c:1887
bool sample_pwr_events
Definition: intel-pt.c:99
size_t use_size
Definition: auxtrace.h:190
static void intel_pt_free_queue(void *priv)
Definition: intel-pt.c:852
pid_t next_tid
Definition: intel-pt.c:151
static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1367
u64 branches_sample_type
Definition: intel-pt.c:88
static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool)
Definition: intel-pt.c:1945
union perf_event * event_buf
Definition: intel-pt.c:142
bool sample_branches
Definition: intel-pt.c:86
void intel_pt_log_set_name(const char *name)
Definition: intel-pt-log.c:46
struct itrace_synth_opts synth_opts
Definition: intel-pt.c:80
static int intel_pt_opt_inject(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample, u64 type)
Definition: intel-pt.c:1082
u64 transactions_id
Definition: intel-pt.c:93
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
Definition: auxtrace.c:961
#define perf_synth__raw_size(d)
Definition: event.h:377
static struct perf_tool tool
Definition: builtin-diff.c:362
bool sampling_mode
Definition: intel-pt.c:63
static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1342
u64 period
Definition: event.h:198
u32 pid
Definition: event.h:193
bool sample_transactions
Definition: intel-pt.c:91
static void intel_pt_print_info_str(const char *name, const char *str)
Definition: intel-pt.c:2385
u32 tid
Definition: event.h:193
static int str(yyscan_t scanner, int token)
enum intel_pt_sample_type type
char * cpuid
Definition: env.h:44
char name[0]
Definition: symbol.h:66
struct list_head list
Definition: auxtrace.h:349
int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, struct intel_pt_insn *intel_pt_insn)
char insn[INTEL_PT_INSN_BUF_SZ]
void auxtrace_heap__free(struct auxtrace_heap *heap)
Definition: auxtrace.c:463
unsigned long long period
Definition: auxtrace.h:98
unsigned int queue_nr
Definition: intel-pt.c:133
struct perf_evsel * perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
Definition: evlist.c:589
struct auxtrace_cache * auxtrace_cache__new(unsigned int bits, size_t entry_size, unsigned int limit_percent)
Definition: auxtrace.c:1336
bool ordered_events
Definition: tool.h:76
intel_pt_insn_branch
bool exclude_kernel
Definition: intel-pt.c:153
static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
Definition: intel-pt.c:2393
struct thread * thread
Definition: intel-pt.c:152
Definition: dso.h:138
#define event
void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
Definition: auxtrace.c:837
int intel_pt_process_auxtrace_info(union perf_event *event, struct perf_session *session)
Definition: intel-pt.c:2399
u32 tsc_ctc_ratio_d
Definition: intel-pt.c:111
u64 cbr_id
Definition: intel-pt.c:105
struct fork_event fork
Definition: event.h:629
char insn[INTEL_PT_INSN_BUF_SZ]
Definition: intel-pt.c:304
struct branch_stack * branch_stack
Definition: event.h:212
u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name)
Definition: evsel.c:2722
#define INTEL_PT_ABORT_TX
u64 in_tx
Definition: event.h:143
u32 raw_size
Definition: event.h:202
u32 cpu
Definition: event.h:201
static void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
Definition: intel-pt.c:993
static unsigned int intel_pt_cache_size(struct dso *dso, struct machine *machine)
Definition: intel-pt.c:336
off_t dso__data_size(struct dso *dso, struct machine *machine)
Definition: dso.c:950
const char * perf_event__name(unsigned int id)
Definition: event.c:75
struct list_head head
Definition: auxtrace.h:203
u64 insn_cnt
Definition: intel-pt.c:298
char insn[INTEL_PT_INSN_BUF_SZ]
Definition: intel-pt.c:160
static int perf_data__fd(struct perf_data *data)
Definition: data.h:40
x86 movsq based memcpy() in arch/x86/lib/memcpy_64.S") MEMCPY_FN(memcpy_erms
u64 ptss_ip
Definition: intel-pt.c:75
off_t data_offset
Definition: auxtrace.h:182
#define INTEL_PT_IN_TX
void intel_pt_log_disable(void)
Definition: intel-pt-log.c:39
static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample)
Definition: intel-pt.c:1862
bool step_through_buffers
Definition: intel-pt.c:145
#define zfree(ptr)
Definition: util.h:25
switch_state
Definition: intel-pt.c:123
unsigned int last_branch_sz
Definition: auxtrace.h:97
const char * perf_evsel__name(struct perf_evsel *evsel)
Definition: evsel.c:577
static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1205
static bool intel_pt_timeless_decoding(struct intel_pt *pt)
Definition: intel-pt.c:684
static int intel_pt_cache_add(struct dso *dso, struct machine *machine, u64 offset, u64 insn_cnt, u64 byte_cnt, struct intel_pt_insn *intel_pt_insn)
Definition: intel-pt.c:369
int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process)
Definition: header.c:3348
static int intel_pt_perf_config(const char *var, const char *value, void *data)
Definition: intel-pt.c:2345
void * raw_data
Definition: event.h:210
static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
Definition: intel-pt.c:1075
struct perf_event_header header
Definition: event.h:624
u64 instructions_id
Definition: intel-pt.c:84
Definition: event.h:150
unsigned long initial_skip
Definition: auxtrace.h:100
static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1022
#define PERF_TYPE_SYNTH
Definition: event.h:261
u32 pid
Definition: hists_common.c:15
static int thread__set_comm(struct thread *thread, const char *comm, u64 timestamp)
Definition: thread.h:77
u64 ptwrites_id
Definition: intel-pt.c:97
void * auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
Definition: auxtrace.c:1421
struct auxtrace_error_event auxtrace_error
Definition: event.h:643
struct branch_stack * last_branch
Definition: intel-pt.c:139
const char * desc
Definition: clang.c:10
static int perf_data__is_pipe(struct perf_data *data)
Definition: data.h:35
static void intel_pt_prep_sample(struct intel_pt *pt, struct intel_pt_queue *ptq, union perf_event *event, struct perf_sample *sample)
Definition: intel-pt.c:1149
int32_t rel
Definition: intel-pt.c:303
static struct intel_pt_queue * intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr)
Definition: intel-pt.c:748
struct auxtrace_cache * auxtrace_cache
Definition: dso.h:177
struct ip_callchain * chain
Definition: intel-pt.c:138
u64 cyc_bit
Definition: intel-pt.c:112
struct perf_evsel * switch_evsel
Definition: intel-pt.c:60
u64 start
Definition: hists_common.c:25
static int intel_pt_setup_queue(struct intel_pt *pt, struct auxtrace_queue *queue, unsigned int queue_nr)
Definition: intel-pt.c:913
u64 last_insn_cnt
Definition: intel-pt.c:159
static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
Definition: intel-pt.c:1553
struct auxtrace_queues queues
Definition: intel-pt.c:55
void(* free_events)(struct perf_session *session)
Definition: auxtrace.h:148
int(* flush_events)(struct perf_session *session, struct perf_tool *tool)
Definition: auxtrace.h:146
static void intel_pt_enable_sync_switch(struct intel_pt *pt)
Definition: intel-pt.c:1605
void thread__put(struct thread *thread)
Definition: thread.c:119
static const char *const intel_pt_info_fmts[]
Definition: intel-pt.c:2355
Definition: jevents.c:228
bool use_callchain
Definition: symbol.h:93
u64 pwr_events_sample_type
Definition: intel-pt.c:100
int perf_config(config_fn_t fn, void *data)
Definition: config.c:718
unsigned max_non_turbo_ratio
Definition: intel-pt.c:114
int status
Definition: dso.h:183
static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1109
u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
Definition: tsc.c:18
static int intel_pt_event_synth(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused)
Definition: intel-pt.c:2105
u32 flags
Definition: event.h:205
static int sym(yyscan_t scanner, int type, int config)
static bool intel_pt_branch_enable(struct intel_pt *pt)
Definition: intel-pt.c:652
uint64_t est_timestamp
const char * filename
Definition: auxtrace.h:359
u64 time
Definition: event.h:194
u32 flags
bool(* pgd_ip)(uint64_t ip, void *data)
unsigned int queue_nr
Definition: auxtrace.h:233
struct intel_pt * pt
Definition: intel-pt.c:132
struct thread * thread__new(pid_t pid, pid_t tid)
Definition: thread.c:36
u64 tsc_bit
Definition: intel-pt.c:107
size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format)
Definition: evsel.c:2382
void auxtrace_heap__pop(struct auxtrace_heap *heap)
Definition: auxtrace.c:470
#define PERF_SAMPLE_MASK
Definition: event.h:87
struct perf_header header
Definition: session.h:23
static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
Definition: intel-pt.c:1708
#define PERF_SAMPLE_MAX_SIZE
Definition: event.h:95
size_t last_branch_pos
Definition: intel-pt.c:141
static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1184
bool dump_trace
Definition: debug.c:27
void auxtrace_queues__free(struct auxtrace_queues *queues)
Definition: auxtrace.c:404
void free(void *)
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, size_t sz, u64 ip)
Definition: thread-stack.c:288
static struct perf_evsel * intel_pt_evsel(struct intel_pt *pt, struct perf_evlist *evlist)
Definition: intel-pt.c:2153
struct machine * machine
Definition: intel-pt.c:59
bool mispred_all
Definition: intel-pt.c:70
static bool intel_pt_skip_event(struct intel_pt *pt)
Definition: intel-pt.c:1045
u64 switch_ip
Definition: intel-pt.c:74
u64 mtc_freq_bits
Definition: intel-pt.c:109
struct auxtrace_heap heap
Definition: intel-pt.c:56
static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
Definition: intel-pt.c:227
bool use_buffer_pid_tid
Definition: intel-pt.c:146
int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr, u64 ordinal)
Definition: auxtrace.c:440
pid_t machine__get_current_tid(struct machine *machine, int cpu)
Definition: machine.c:2411
u32 branches_filter
Definition: intel-pt.c:87
static struct auxtrace_cache * intel_pt_cache(struct dso *dso, struct machine *machine)
Definition: intel-pt.c:350
enum intel_pt_param_flags flags
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
Definition: thread-stack.c:267
uint32_t tsc_ctc_ratio_n
void * use_data
Definition: auxtrace.h:191
static int intel_pt_process_itrace_start(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample)
Definition: intel-pt.c:1928
Definition: attr.py:1
u64(* unmap_ip)(struct map *, u64)
Definition: map.h:43
Definition: symbol.h:55
struct addr_filters filts
Definition: intel-pt.c:120
ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size)
Definition: dso.c:986
void(* free)(struct perf_session *session)
Definition: auxtrace.h:149
int auxtrace_queues__init(struct auxtrace_queues *queues)
Definition: auxtrace.c:173
static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
Definition: intel-pt.c:1264
Definition: intel-pt.c:296
bool start
Definition: auxtrace.h:351
char * name
Definition: evsel.h:102
static int intel_pt_config_div(const char *var, const char *value, void *data)
Definition: intel-pt.c:307
static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, struct auxtrace_queue *queue)
Definition: intel-pt.c:867
static int intel_pt_update_queues(struct intel_pt *pt)
Definition: intel-pt.c:1699
bool sample_instructions
Definition: intel-pt.c:82
struct symbol * dso__next_symbol(struct symbol *sym)
Definition: symbol.c:516
#define intel_pt_log_insn_no_data(arg,...)
Definition: intel-pt-log.h:58
static bool intel_pt_return_compression(struct intel_pt *pt)
Definition: intel-pt.c:636
void * decoder
Definition: intel-pt.c:136
u64 stream_id
Definition: event.h:197
bool needs_swap
Definition: header.h:77
bool sync_switch
Definition: intel-pt.c:147
u64 pwrx_id
Definition: intel-pt.c:104
enum intel_pt_insn_op op
Definition: intel-pt.c:300
void * auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
Definition: auxtrace.c:1398
u64 kernel_start
Definition: intel-pt.c:73
int map__load(struct map *map)
Definition: map.c:307
void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
struct auxtrace auxtrace
Definition: intel-pt.c:54
static u64 machine__kernel_start(struct machine *machine)
Definition: machine.h:88
int intel_pt__strerror(int code, char *buf, size_t buflen)
int callchain_register_param(struct callchain_param *param)
Definition: callchain.c:494
u8 cpumode
Definition: event.h:207
u64 * id
Definition: evsel.h:97
u64 end
Definition: map.h:29
int(* walk_insn)(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data)
#define INTEL_PT_FUP_IP
struct symbol * dso__first_symbol(struct dso *dso)
Definition: symbol.c:506
struct machines machines
Definition: session.h:24
u64 to
Definition: event.h:152
struct perf_event_attr attr
Definition: evsel.h:93
u64 flags
Definition: event.h:539
uint64_t ref_timestamp
u64 abort
Definition: event.h:144
const struct intel_pt_state * intel_pt_decode(struct intel_pt_decoder *decoder)
static bool intel_pt_tracing_kernel(struct intel_pt *pt)
Definition: intel-pt.c:706
struct branch_flags flags
Definition: event.h:153
struct perf_env * env
Definition: machine.h:49
bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
Definition: dso.c:699
int addr_filters__parse_bare_filter(struct addr_filters *filts, const char *filter)
Definition: auxtrace.c:1645
u64 mispred
Definition: event.h:141
bool consecutive
Definition: auxtrace.h:186
#define intel_pt_log_insn(arg,...)
Definition: intel-pt-log.h:52
bool per_cpu_mmaps
Definition: intel-pt.c:65
static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, u64 time_)
Definition: intel-pt.c:1763
char * filter
Definition: intel-pt.c:119
int(* process_auxtrace_event)(struct perf_session *session, union perf_event *event, struct perf_tool *tool)
Definition: auxtrace.h:143
void static void * zalloc(size_t size)
Definition: util.h:20
enum itrace_period_type period_type
Definition: auxtrace.h:99