48 #include <subcmd/parse-options.h> 70 #include <linux/time64.h> 71 #include <api/fs/fs.h> 75 #include <sys/prctl.h> 79 #include <sys/types.h> 84 #include <sys/resource.h> 89 #define DEFAULT_SEPARATOR " " 90 #define CNTR_NOT_SUPPORTED "<not supported>" 91 #define CNTR_NOT_COUNTED "<not counted>" 92 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 121 "topdown-total-slots",
122 "topdown-slots-retired",
123 "topdown-recovery-bubbles",
124 "topdown-fetch-bubbles",
125 "topdown-slots-issued",
197 #define STAT_RECORD perf_stat.record 208 return !strcmp(evsel->
name,
"duration_time");
214 r->tv_sec = a->tv_sec - b->tv_sec;
215 if (a->tv_nsec < b->tv_nsec) {
219 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
230 for (i = 0; i < stat_config.
stats_num; i++)
236 struct perf_event_attr *
attr = &evsel->
attr;
239 if (stat_config.
scale) {
240 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
241 PERF_FORMAT_TOTAL_TIME_RUNNING;
250 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
258 attr->sample_period = 0;
270 attr->sample_type = PERF_SAMPLE_IDENTIFIER;
285 attr->enable_on_exec = 1;
312 pr_err(
"failed to write perf data, error: %m\n");
327 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 328 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 330 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 362 for (thread = 0; thread <
nthreads; thread++) {
363 for (cpu = 0; cpu <
ncpus; cpu++) {
384 pr_err(
"failed to write stat event\n");
390 fprintf(stat_config.
output,
391 "%s: %d: %" PRIu64
" %" PRIu64
" %" PRIu64
"\n",
419 struct timespec ts, rs;
423 clock_gettime(CLOCK_MONOTONIC, &ts);
428 pr_err(
"failed to write stat round event\n");
469 void *ucontext __maybe_unused)
482 pr_err(
"Couldn't synthesize attrs.\n");
496 pr_err(
"Couldn't synthesize thread map.\n");
503 pr_err(
"Couldn't synthesize thread map.\n");
510 pr_err(
"Couldn't synthesize config.\n");
517 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 526 int fd =
FD(counter, cpu, thread);
529 cpu, thread, fd) < 0)
559 pr_debug(
"Weak group for %s/%d failed\n",
569 if (c2->
leader == leader) {
583 int timeout = stat_config.
timeout;
585 unsigned long long t0, t1;
590 const bool forks = (argc > 0);
595 ts.tv_sec = interval / USEC_PER_MSEC;
596 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
597 }
else if (timeout) {
598 ts.tv_sec = timeout / USEC_PER_MSEC;
599 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
608 perror(
"failed to prepare workload");
622 if ((errno == EINVAL || errno == EBADF) &&
623 counter->
leader != counter &&
633 if (errno == EINVAL || errno == ENOSYS ||
634 errno == ENOENT || errno == EOPNOTSUPP ||
637 ui__warning(
"%s event is not supported by the kernel.\n",
641 if ((counter->
leader != counter) ||
663 errno, msg,
sizeof(msg));
673 l = strlen(counter->
unit);
683 pr_err(
"failed to set filter \"%s\" on event %s with %d (%s)\n",
685 str_error_r(errno, msg,
sizeof(msg)));
690 pr_err(
"failed to set config \"%s\" on event %s with %d (%s)\n",
692 str_error_r(errno, msg,
sizeof(msg)));
718 clock_gettime(CLOCK_MONOTONIC, &
ref_time);
724 if (interval || timeout) {
725 while (!waitpid(
child_pid, &status, WNOHANG)) {
726 nanosleep(&ts, NULL);
738 pr_err(
"Workload failed: %s\n", emsg);
742 if (WIFSIGNALED(status))
743 psignal(WTERMSIG(status), argv[0]);
747 nanosleep(&ts, NULL);
776 return WEXITSTATUS(status);
808 fprintf(stat_config.
output,
"%s%" PRIu64
"%s%.2f",
812 ena ? 100.0 * run / ena : 100.0);
813 }
else if (run != ena) {
814 fprintf(stat_config.
output,
" (%.2f%%)", 100.0 * run / ena);
825 fprintf(stat_config.
output,
" ( +-%6.2f%% )", pct);
843 fprintf(stat_config.
output,
"S%d-C%*d%s%*d%s",
853 fprintf(stat_config.
output,
"S%*d%s%*d%s",
862 fprintf(stat_config.
output,
"CPU%*d%s",
867 fprintf(stat_config.
output,
"%*s-%*d%s",
890 #define METRIC_LEN 35 905 fprintf(os->
fh,
" ");
906 fprintf(os->
fh,
" ");
910 const char *unit,
double val)
919 if (unit == NULL || fmt == NULL) {
927 n = fprintf(out,
" # ");
931 n += fprintf(out, fmt, val);
932 fprintf(out,
" %-*s",
METRIC_LEN - n - 1, unit);
944 for (i = 0; i < os->
nfields; i++)
949 const char *color __maybe_unused,
950 const char *
fmt,
const char *unit,
double val)
954 char buf[64], *vals, *ends;
956 if (unit == NULL || fmt == NULL) {
960 snprintf(buf,
sizeof(buf), fmt, val);
961 ends = vals =
ltrim(buf);
962 while (
isdigit(*ends) || *ends ==
'.')
970 #define METRIC_ONLY_LEN 20 978 if (strstr(unit,
"/sec") ||
979 strstr(unit,
"hz") ||
980 strstr(unit,
"Hz") ||
981 strstr(unit,
"CPUs utilized"))
989 if (!strncmp(unit,
"of all", 6)) {
998 const char *unit,
double val)
1012 n = fprintf(out, fmt, val);
1015 if (mlen < strlen(unit))
1016 mlen = strlen(unit) + 1;
1017 fprintf(out,
"%*s", mlen - n,
"");
1022 const char *unit,
double val)
1026 char buf[64], *vals, *ends;
1032 snprintf(buf,
sizeof buf, fmt, val);
1033 ends = vals =
ltrim(buf);
1034 while (
isdigit(*ends) || *ends ==
'.')
1037 fprintf(out,
"%s%s", vals,
csv_sep);
1045 const char *
fmt __maybe_unused,
1046 const char *unit,
double val __maybe_unused)
1063 double msecs = avg / NSEC_PER_MSEC;
1064 const char *fmt_v, *fmt_n;
1072 scnprintf(name,
sizeof(name),
"%s%s",
1075 fprintf(output, fmt_v, msecs,
csv_sep);
1082 fprintf(output, fmt_n, name);
1113 double sc = evsel->
scale;
1117 fmt = floor(sc) != sc ?
"%.2f%s" :
"%.0f%s";
1120 fmt = floor(sc) != sc ?
"%'18.2f%s" :
"%'18.0f%s";
1122 fmt = floor(sc) != sc ?
"%18.2f%s" :
"%18.0f%s";
1127 fprintf(output, fmt, avg,
csv_sep);
1130 fprintf(output,
"%-*s%s",
1151 if (pos->
attr.type == PERF_TYPE_SOFTWARE)
1153 if (pmu_type == PERF_TYPE_SOFTWARE) {
1154 pmu_type = pos->
attr.type;
1157 if (pmu_type != pos->
attr.type)
1165 char *prefix, u64 run, u64 ena,
double noise,
1171 .prefix = prefix ?
prefix :
"",
1189 static int aggr_fields[] = {
1204 if (run == 0 || ena == 0 || counter->
counts->
scaled == -1) {
1206 pm(&os, NULL,
"",
"", 0);
1211 fprintf(stat_config.
output,
"%*s%s",
1222 fprintf(stat_config.
output,
"%-*s%s",
1226 fprintf(stat_config.
output,
"%*s",
1231 fprintf(stat_config.
output,
"%s%s",
1235 pm(&os, NULL, NULL,
"", 0);
1239 pm(&os, NULL, NULL,
"", 0);
1275 for (s = 0; s < aggr_map->
nr; s++) {
1276 id = aggr_map->
map[s];
1302 config = strchr(counter->
name,
'/');
1304 if (asprintf(&new_name,
1305 "%s%s", counter->
pmu_name, config) > 0) {
1307 counter->
name = new_name;
1310 if (asprintf(&new_name,
1313 counter->
name = new_name;
1327 alias = list_prepare_entry(counter, &(evsel_list->
entries),
node);
1328 list_for_each_entry_continue (alias, &evsel_list->
entries,
node) {
1332 strcmp(alias->
unit, counter->
unit) ||
1336 cb(alias, data,
false);
1347 cb(counter, data,
true);
1380 if (counts->
ena == 0 || counts->
run == 0 ||
1410 for (s = 0; s < aggr_map->
nr; s++) {
1413 fprintf(output,
"%s", prefix);
1415 ad.
id =
id = aggr_map->
map[s];
1434 fprintf(output,
"%s", prefix);
1436 uval = val * counter->
scale;
1437 printout(
id, nr, counter, uval, prefix, run, ena, 1.0,
1440 fputc(
'\n', output);
1443 fputc(
'\n', output);
1466 for (thread = 0; thread <
nthreads; thread++) {
1469 for (cpu = 0; cpu <
ncpus; cpu++) {
1511 perror(
"cannot sort aggr thread");
1515 for (thread = 0; thread < sorted_threads; thread++) {
1517 fprintf(output,
"%s", prefix);
1519 id = buf[thread].
id;
1520 if (stat_config.
stats)
1522 prefix, buf[thread].
run, buf[thread].
ena, 1.0,
1523 &stat_config.
stats[
id]);
1525 printout(
id, 0, buf[thread].counter, buf[thread].uval,
1526 prefix, buf[thread].run, buf[thread].ena, 1.0,
1528 fputc(
'\n', output);
1539 bool first __maybe_unused)
1563 fprintf(output,
"%s", prefix);
1569 fprintf(output,
"\n");
1573 bool first __maybe_unused)
1603 fprintf(output,
"%s", prefix);
1605 uval = val * counter->
scale;
1606 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
1609 fputc(
'\n', output);
1621 nrcpus = evsel_list->
cpus->
nr;
1622 for (cpu = 0; cpu < nrcpus; cpu++) {
1626 fputs(prefix, stat_config.
output);
1638 uval = val * counter->
scale;
1639 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
1642 fputc(
'\n', stat_config.
output);
1671 fprintf(stat_config.
output,
"%s", prefix);
1674 fprintf(stat_config.
output,
"%*s",
1675 aggr_header_lens[stat_config.
aggr_mode],
"");
1678 fputs(
"time,", stat_config.
output);
1679 fputs(aggr_header_csv[stat_config.
aggr_mode],
1699 fputc(
'\n', stat_config.
output);
1705 static int num_print_interval;
1707 sprintf(prefix,
"%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec,
csv_sep);
1709 if (num_print_interval == 0 && !
csv_output) {
1712 fprintf(output,
"# time socket cpus");
1714 fprintf(output,
" counts %*s events\n",
unit_width,
"unit");
1717 fprintf(output,
"# time core cpus");
1719 fprintf(output,
" counts %*s events\n",
unit_width,
"unit");
1722 fprintf(output,
"# time CPU");
1724 fprintf(output,
" counts %*s events\n",
unit_width,
"unit");
1727 fprintf(output,
"# time comm-pid");
1729 fprintf(output,
" counts %*s events\n",
unit_width,
"unit");
1733 fprintf(output,
"# time");
1735 fprintf(output,
" counts %*s events\n",
unit_width,
"unit");
1743 if (++num_print_interval == 25)
1744 num_print_interval = 0;
1755 fprintf(output,
"\n");
1756 fprintf(output,
" Performance counter stats for ");
1758 fprintf(output,
"\'system wide");
1760 fprintf(output,
"\'CPU(s) %s", target.
cpu_list);
1762 fprintf(output,
"\'%s", argv ? argv[0] :
"pipe");
1763 for (i = 1; argv && (i <
argc); i++)
1764 fprintf(output,
" %s", argv[i]);
1765 }
else if (target.
pid)
1766 fprintf(output,
"process id \'%s", target.
pid);
1768 fprintf(output,
"thread id \'%s", target.
tid);
1770 fprintf(output,
"\'");
1772 fprintf(output,
" (%d runs)",
run_count);
1773 fprintf(output,
":\n\n");
1782 return lround(ceil(-log10(num)));
1788 int idx, indent = 0;
1790 scnprintf(tmp, 64,
" %17.*f", precision, avg);
1791 while (tmp[indent] ==
' ')
1794 fprintf(output,
"%*s# Table of individual measurements:\n", indent,
"");
1798 int h, n = 1 + abs((
int) (100.0 * (run - avg)/run) / 5);
1800 fprintf(output,
" %17.*f (%+.*f) ",
1801 precision, run, precision, run - avg);
1803 for (h = 0; h < n; h++)
1804 fprintf(output,
"#");
1806 fprintf(output,
"\n");
1809 fprintf(output,
"\n%*s# Final result:\n", indent,
"");
1814 return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
1824 fprintf(output,
"\n");
1827 fprintf(output,
" %17.9f seconds time elapsed", avg);
1833 fprintf(output,
"\n\n");
1834 fprintf(output,
" %17.9f seconds user\n", ru_utime);
1835 fprintf(output,
" %17.9f seconds sys\n", ru_stime);
1848 fprintf(output,
" %17.*f +- %.*f seconds time elapsed",
1849 precision, avg, precision, sd);
1853 fprintf(output,
"\n\n");
1856 sysctl__read_int(
"kernel/nmi_watchdog", &n) >= 0 &&
1859 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1860 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1862 " echo 1 > /proc/sys/kernel/nmi_watchdog\n");
1866 "The events in group usually have to be from " 1867 "the same PMU. Try reorganizing the group.\n");
1874 char buf[64], *prefix = NULL;
1886 static int num_print_iv;
1888 if (num_print_iv == 0 && !interval)
1890 if (num_print_iv++ == 25)
1893 fprintf(stat_config.
output,
"%s", prefix);
1915 fputc(
'\n', stat_config.
output);
1936 fflush(stat_config.
output);
1967 sigaddset(&
set, SIGCHLD);
1968 sigprocmask(SIG_BLOCK, &
set, &oset);
1973 sigprocmask(SIG_SETMASK, &oset, NULL);
1978 signal(signr, SIG_DFL);
1979 kill(getpid(), signr);
1983 const char *s __maybe_unused,
int unset)
1990 const char *s __maybe_unused,
int unset)
1999 int unset __maybe_unused)
2006 "hardware transaction statistics"),
2007 OPT_CALLBACK(
'e',
"event", &evsel_list,
"event",
2008 "event selector. use 'perf list' to list available events",
2010 OPT_CALLBACK(0,
"filter", &evsel_list,
"filter",
2013 "child tasks do not inherit counters"),
2014 OPT_STRING(
'p',
"pid", &target.
pid,
"pid",
2015 "stat events on existing process id"),
2016 OPT_STRING(
't',
"tid", &target.
tid,
"tid",
2017 "stat events on existing thread id"),
2019 "system-wide collection from all CPUs"),
2020 OPT_BOOLEAN(
'g',
"group", &
group,
2021 "put the counters into a counter group"),
2022 OPT_BOOLEAN(
'c',
"scale", &stat_config.
scale,
"scale/normalize counters"),
2023 OPT_INCR(
'v',
"verbose", &
verbose,
2024 "be more verbose (show counter open errors, etc)"),
2026 "repeat command and print average + stddev (max: 100, forever: 0)"),
2028 "display details about each run (only with -r option)"),
2029 OPT_BOOLEAN(
'n',
"null", &
null_run,
2030 "null run - dont start any counters"),
2032 "detailed run - start a lot of events"),
2033 OPT_BOOLEAN(
'S',
"sync", &
sync_run,
2034 "call sync() before starting a run"),
2035 OPT_CALLBACK_NOOPT(
'B',
"big-num", NULL, NULL,
2036 "print large numbers with thousands\' separators",
2038 OPT_STRING(
'C',
"cpu", &target.
cpu_list,
"cpu",
2039 "list of cpus to monitor in system-wide"),
2040 OPT_SET_UINT(
'A',
"no-aggr", &stat_config.
aggr_mode,
2041 "disable CPU count aggregation",
AGGR_NONE),
2042 OPT_BOOLEAN(0,
"no-merge", &
no_merge,
"Do not merge identical named events"),
2043 OPT_STRING(
'x',
"field-separator", &
csv_sep,
"separator",
2044 "print counts with custom separator"),
2045 OPT_CALLBACK(
'G',
"cgroup", &evsel_list,
"name",
2047 OPT_STRING(
'o',
"output", &
output_name,
"file",
"output file name"),
2048 OPT_BOOLEAN(0,
"append", &
append_file,
"append to the output file"),
2050 "log output to fd, instead of stderr"),
2051 OPT_STRING(0,
"pre", &
pre_cmd,
"command",
2052 "command to run prior to the measured command"),
2053 OPT_STRING(0,
"post", &
post_cmd,
"command",
2054 "command to run after to the measured command"),
2055 OPT_UINTEGER(
'I',
"interval-print", &stat_config.
interval,
2056 "print counts at regular interval in ms " 2057 "(overhead is possible for values <= 100ms)"),
2058 OPT_INTEGER(0,
"interval-count", &stat_config.
times,
2059 "print counts for fixed number of times"),
2060 OPT_UINTEGER(0,
"timeout", &stat_config.
timeout,
2061 "stop workload and print counts after a timeout period in ms (>= 10ms)"),
2062 OPT_SET_UINT(0,
"per-socket", &stat_config.
aggr_mode,
2063 "aggregate counts per processor socket",
AGGR_SOCKET),
2064 OPT_SET_UINT(0,
"per-core", &stat_config.
aggr_mode,
2065 "aggregate counts per physical processor core",
AGGR_CORE),
2066 OPT_SET_UINT(0,
"per-thread", &stat_config.
aggr_mode,
2069 "ms to wait before starting measurement after program start"),
2070 OPT_CALLBACK_NOOPT(0,
"metric-only", &
metric_only, NULL,
2073 "measure topdown level 1 statistics"),
2074 OPT_BOOLEAN(0,
"smi-cost", &
smi_cost,
2075 "measure SMI cost"),
2076 OPT_CALLBACK(
'M',
"metrics", &evsel_list,
"metric/metric group list",
2077 "monitor specified metrics or metric groups (separated by ,)",
2096 for (i = 0; i < map->
nr; i++) {
2097 if (map->
map[i] > max)
2113 cpu = map->
map[idx];
2115 if (cpus_aggr_map->
map[cpu] == -1)
2116 cpus_aggr_map->
map[cpu] = get_id(map, idx);
2118 return cpus_aggr_map->
map[cpu];
2138 perror(
"cannot build socket map");
2145 perror(
"cannot build core map");
2165 return cpus_aggr_map ? 0 : -ENOMEM;
2173 cpus_aggr_map = NULL;
2183 cpu = map->
map[idx];
2213 core = (socket_id << 16) | (env->
cpu[
cpu].
core_id & 0xffff);
2248 perror(
"cannot build socket map");
2255 perror(
"cannot build core map");
2278 for (i = 0; attr[i]; i++) {
2280 len += strlen(attr[i]) + 1;
2281 attr[i - off] = attr[i];
2285 attr[i - off] = NULL;
2287 *str =
malloc(len + 1 + 2);
2297 for (i = 0; attr[i]; i++) {
2327 struct perf_event_attr default_attrs0[] = {
2329 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
2330 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
2331 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
2332 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
2334 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
2336 struct perf_event_attr frontend_attrs[] = {
2337 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
2339 struct perf_event_attr backend_attrs[] = {
2340 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
2342 struct perf_event_attr default_attrs1[] = {
2343 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
2344 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
2345 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
2352 struct perf_event_attr detailed_attrs[] = {
2354 { .type = PERF_TYPE_HW_CACHE,
2356 PERF_COUNT_HW_CACHE_L1D << 0 |
2357 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2358 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
2360 { .type = PERF_TYPE_HW_CACHE,
2362 PERF_COUNT_HW_CACHE_L1D << 0 |
2363 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2364 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
2366 { .type = PERF_TYPE_HW_CACHE,
2368 PERF_COUNT_HW_CACHE_LL << 0 |
2369 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2370 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
2372 { .type = PERF_TYPE_HW_CACHE,
2374 PERF_COUNT_HW_CACHE_LL << 0 |
2375 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2376 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
2382 struct perf_event_attr very_detailed_attrs[] = {
2384 { .type = PERF_TYPE_HW_CACHE,
2386 PERF_COUNT_HW_CACHE_L1I << 0 |
2387 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2388 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
2390 { .type = PERF_TYPE_HW_CACHE,
2392 PERF_COUNT_HW_CACHE_L1I << 0 |
2393 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2394 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
2396 { .type = PERF_TYPE_HW_CACHE,
2398 PERF_COUNT_HW_CACHE_DTLB << 0 |
2399 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2400 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
2402 { .type = PERF_TYPE_HW_CACHE,
2404 PERF_COUNT_HW_CACHE_DTLB << 0 |
2405 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2406 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
2408 { .type = PERF_TYPE_HW_CACHE,
2410 PERF_COUNT_HW_CACHE_ITLB << 0 |
2411 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2412 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
2414 { .type = PERF_TYPE_HW_CACHE,
2416 PERF_COUNT_HW_CACHE_ITLB << 0 |
2417 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
2418 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
2425 struct perf_event_attr very_very_detailed_attrs[] = {
2427 { .type = PERF_TYPE_HW_CACHE,
2429 PERF_COUNT_HW_CACHE_L1D << 0 |
2430 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
2431 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
2433 { .type = PERF_TYPE_HW_CACHE,
2435 PERF_COUNT_HW_CACHE_L1D << 0 |
2436 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
2437 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
2456 fprintf(stderr,
"Cannot set up transaction events\n");
2466 fprintf(stderr,
"freeze_on_smi is not supported.\n");
2472 fprintf(stderr,
"Failed to set freeze_on_smi.\n");
2484 fprintf(stderr,
"To measure SMI cost, it needs " 2485 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
2489 fprintf(stderr,
"Cannot set up SMI cost events\n");
2501 pr_err(
"top down event configuration requires --per-core mode\n");
2506 pr_err(
"top down event configuration requires system-wide mode (-a)\n");
2514 pr_err(
"Out of memory\n");
2523 "Cannot set up top down events %s: %d\n",
2529 fprintf(stderr,
"System does not support topdown\n");
2537 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
2543 frontend_attrs) < 0)
2579 "perf stat record [<options>]",
2601 argc = parse_options(argc, argv, stat_options, stat_record_usage,
2602 PARSE_OPT_STOP_AT_NON_OPTION);
2608 pr_err(
"Cannot use -r option with perf stat record.\n");
2613 if (session == NULL) {
2614 pr_err(
"Perf session creation failed.\n");
2632 struct timespec tsh, *ts = NULL;
2663 pr_warning(
"warning: processing task data, aggregation mode not set\n");
2703 pr_warning(
"Extra thread map event, ignoring.\n");
2723 pr_warning(
"Extra cpu map event, ignoring.\n");
2765 "perf stat report [<options>]",
2785 const struct option options[] = {
2786 OPT_STRING(
'i',
"input", &
input_name,
"file",
"input file name"),
2787 OPT_SET_UINT(0,
"per-socket", &perf_stat.
aggr_mode,
2788 "aggregate counts per processor socket",
AGGR_SOCKET),
2789 OPT_SET_UINT(0,
"per-core", &perf_stat.
aggr_mode,
2790 "aggregate counts per physical processor core",
AGGR_CORE),
2791 OPT_SET_UINT(
'A',
"no-aggr", &perf_stat.
aggr_mode,
2792 "disable CPU count aggregation",
AGGR_NONE),
2798 argc = parse_options(argc, argv, options, stat_report_usage, 0);
2801 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
2811 if (session == NULL)
2815 stat_config.
output = stderr;
2816 evsel_list = session->
evlist;
2857 const char *
const stat_usage[] = {
2858 "perf stat [<options>] [<command>]",
2861 int status = -EINVAL, run_idx;
2865 const char *
const stat_subcommands[] = {
"record",
"report" };
2867 setlocale(LC_ALL,
"");
2870 if (evsel_list == NULL)
2874 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
2875 (
const char **) stat_usage,
2876 PARSE_OPT_STOP_AT_NON_OPTION);
2887 if (argc && !strncmp(argv[0],
"rec", 3)) {
2891 }
else if (argc && !strncmp(argv[0],
"rep", 3))
2895 timeout = stat_config.
timeout;
2904 fprintf(stderr,
"cannot use both --output and --log-fd\n");
2905 parse_options_usage(stat_usage, stat_options,
"o", 1);
2906 parse_options_usage(NULL, stat_options,
"log-fd", 0);
2911 fprintf(stderr,
"--metric-only is not supported with --per-thread\n");
2916 fprintf(stderr,
"--metric-only is not supported with -r\n");
2921 fprintf(stderr,
"--table is only supported with -r\n");
2922 parse_options_usage(stat_usage, stat_options,
"r", 1);
2923 parse_options_usage(NULL, stat_options,
"table", 0);
2928 fprintf(stderr,
"argument to --log-fd must be a > 0\n");
2929 parse_options_usage(stat_usage, stat_options,
"log-fd", 0);
2939 perror(
"failed to create output file");
2942 clock_gettime(CLOCK_REALTIME, &tm);
2943 fprintf(output,
"# started on %s\n", ctime(&tm.tv_sec));
2948 perror(
"Failed opening logfd");
2961 fprintf(stderr,
"-B option not supported with -x\n");
2962 parse_options_usage(stat_usage, stat_options,
"B", 1);
2963 parse_options_usage(NULL, stat_options,
"x", 1);
2980 pr_err(
"Run count must be a positive number\n");
2981 parse_options_usage(stat_usage, stat_options,
"r", 1);
2991 pr_err(
"failed to setup -r option");
2999 fprintf(stderr,
"The --per-thread option is only " 3000 "available when monitoring via -p -t -a " 3001 "options or only --per-thread.\n");
3002 parse_options_usage(NULL, stat_options,
"p", 1);
3003 parse_options_usage(NULL, stat_options,
"t", 1);
3015 fprintf(stderr,
"both cgroup and no-aggregation " 3016 "modes only available in system-wide mode\n");
3018 parse_options_usage(stat_usage, stat_options,
"G", 1);
3019 parse_options_usage(NULL, stat_options,
"A", 1);
3020 parse_options_usage(NULL, stat_options,
"a", 1);
3034 pr_err(
"Problems finding threads of monitor\n");
3035 parse_options_usage(stat_usage, stat_options,
"p", 1);
3036 parse_options_usage(NULL, stat_options,
"t", 1);
3038 perror(
"failed to parse CPUs map");
3039 parse_options_usage(stat_usage, stat_options,
"C", 1);
3040 parse_options_usage(NULL, stat_options,
"a", 1);
3059 if (stat_config.
times && interval)
3061 else if (stat_config.
times && !interval) {
3062 pr_err(
"interval-count option should be used together with " 3063 "interval-print.\n");
3064 parse_options_usage(stat_usage, stat_options,
"interval-count", 0);
3065 parse_options_usage(stat_usage, stat_options,
"I", 1);
3069 if (timeout && timeout < 100) {
3071 pr_err(
"timeout must be >= 10ms.\n");
3072 parse_options_usage(stat_usage, stat_options,
"timeout", 0);
3076 "The overhead percentage could be high in some cases. " 3077 "Please proceed with caution.\n");
3079 if (timeout && interval) {
3080 pr_err(
"timeout option is not supported with interval-print.\n");
3081 parse_options_usage(stat_usage, stat_options,
"timeout", 0);
3082 parse_options_usage(stat_usage, stat_options,
"I", 1);
3107 if (run_count != 1 &&
verbose > 0)
3108 fprintf(output,
"[ perf stat: executing run #%d ... ]\n",
3112 if (
forever && status != -1) {
3118 if (!
forever && status != -1 && !interval)
3137 &perf_stat.session->machines.host);
3139 pr_warning(
"Couldn't synthesize the kernel mmap record, harmless, " 3140 "older tools may produce warnings about this file\n.");
3145 pr_err(
"failed to write stat round event\n");
3148 if (!perf_stat.data.is_pipe) {
3149 perf_stat.session->header.data_size += perf_stat.bytes_written;
void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads)
void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st)
static void aggr_update_shadow(void)
static int perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, struct perf_counts_values *count)
int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, const char *argv[], bool pipe_output, void(*exec_error)(int signo, siginfo_t *info, void *ucontext))
int color_fprintf(FILE *fp, const char *color, const char *fmt,...)
static const char * post_cmd
struct perf_evlist::@110 workload
double avg_stats(struct stats *stats)
struct perf_evlist * evlist
void perf_evlist__set_leader(struct perf_evlist *evlist)
static bool transaction_run
static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused)
int thread_map__remove(struct thread_map *threads, int idx)
int parse_filter(const struct option *opt, const char *str, int unset __maybe_unused)
static const char * csv_sep
static void print_metric_csv(void *ctx, const char *color __maybe_unused, const char *fmt, const char *unit, double val)
union perf_evsel_config_term::@112 val
static const char * pre_cmd
static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, struct cpu_map **sockp)
static const char * aggr_header_csv[]
const char ** cmdline_argv
static int cpu_map__id_to_socket(int id)
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
static int get_precision(double num)
void perf_evlist__free_stats(struct perf_evlist *evlist)
int ui__error(const char *format,...)
static int cpu_map__nr(const struct cpu_map *map)
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine)
static int print_mixed_hw_group_error
static int enable_metric_only(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset)
static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
static int cpu_map__id_to_cpu(int id)
enum target_errno target__validate(struct target *target)
static int run_perf_stat(int argc, const char **argv, int run_idx)
static int perf_stat__get_core(struct cpu_map *map, int cpu)
static int write_stat_round_event(u64 tm, u64 type)
void perf_evlist__enable(struct perf_evlist *evlist)
static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
static int store_counter_ids(struct perf_evsel *counter)
static int xyarray__max_x(struct xyarray *xy)
static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
void runtime_stat__exit(struct runtime_stat *st)
int ui__warning(const char *format,...)
static int cmp_val(const void *a, const void *b)
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
static void print_metric_headers(const char *prefix, bool no_indent)
static int __run_perf_stat(int argc, const char **argv, int run_idx)
static bool target__has_task(struct target *target)
static int aggr_header_lens[]
static void print_noise(struct perf_evsel *evsel, double avg)
static bool is_duration_time(struct perf_evsel *evsel)
static int process_stat_config_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused)
static struct perf_stat_config stat_config
static bool valid_only_metric(const char *unit)
struct thread_map * threads
struct perf_data_file file
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
#define DEFAULT_SEPARATOR
struct cpu_map * cpu_map__new_data(struct cpu_map_data *data)
static int perf_stat__get_socket(struct cpu_map *map, int cpu)
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
void perf_evlist__delete(struct perf_evlist *evlist)
static int stat__set_big_num(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset)
print_metric_t print_metric
static int cpu_map__get_max(struct cpu_map *map)
static struct rblist metric_events
static void print_metric_std(void *ctx, const char *color, const char *fmt, const char *unit, double val)
struct stat_config_event stat_config
int perf_evlist__start_workload(struct perf_evlist *evlist)
struct thread_map * threads
void thread_map__read_comms(struct thread_map *threads)
int perf_event__synthesize_cpu_map(struct perf_tool *tool, struct cpu_map *map, perf_event__handler_t process, struct machine *machine)
static void print_metric_only(void *ctx, const char *color, const char *fmt, const char *unit, double val)
struct perf_session * session
double stddev_stats(struct stats *stats)
struct runtime_stat rt_stat
static bool target__none(struct target *target)
#define WRITE_STAT_ROUND_EVENT(time, interval)
static void printout(int id, int nr, struct perf_evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st)
static void new_line_metric(void *ctx __maybe_unused)
static void runtime_stat_delete(struct perf_stat_config *config)
static unsigned long long rdclock(void)
static const char * output_name
int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine)
static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
void perf_session__delete(struct perf_session *session)
struct perf_evsel * evsel
struct stats walltime_nsecs_stats
static void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b)
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
static u64 * walltime_run
static void init_stats(struct stats *stats)
__weak bool arch_topdown_check_group(bool *warn)
static int process_synthesized_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused)
int parse_cgroups(const struct option *opt, const char *str, int unset __maybe_unused)
int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *err)
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
static void collect_all_aliases(struct perf_evsel *counter, void(*cb)(struct perf_evsel *counter, void *data, bool first), void *data)
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
bool perf_evsel__fallback(struct perf_evsel *evsel, int err, char *msg, size_t msgsize)
static int perf_stat_init_aggr_mode(void)
static struct perf_aggr_thread_value * sort_aggr_thread(struct perf_evsel *counter, int nthreads, int ncpus, int *ret)
static int process_stat_round_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session)
static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, struct cpu_map **corep)
static bool is_mixed_hw_group(struct perf_evsel *counter)
int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
static int first_shadow_cpu(struct perf_evsel *evsel, int id)
static int read_counter(struct perf_evsel *counter)
void cpu_map__put(struct cpu_map *map)
static int __store_counter_ids(struct perf_evsel *counter)
static int print_free_counters_hint
int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine)
static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
struct perf_evlist * evlist
#define pr_debug(fmt,...)
static void print_footer(void)
void update_stats(struct stats *stats, u64 val)
static bool target__has_cpu(struct target *target)
static struct cpu_map * aggr_map
static bool force_metric_only
static void new_line_csv(void *ctx)
static void print_interval(char *prefix, struct timespec *ts)
static unsigned int unit_width
#define evlist__for_each_entry(evlist, evsel)
static struct perf_evlist * evsel_list
static void print_metric_header(void *ctx, const char *color __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused)
int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
static int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
static void uniquify_event_name(struct perf_evsel *counter)
void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
struct cpu_topology_map * cpu
#define perf_evlist__add_default_attrs(evlist, array)
#define FREEZE_ON_SMI_PATH
static bool cpu_map__empty(const struct cpu_map *map)
static struct perf_evsel * perf_evsel__reset_weak_group(struct perf_evsel *evsel)
static unsigned int nthreads
static void init_features(struct perf_session *session)
static void print_noise_pct(double total, double avg)
static aggr_get_id_t aggr_get_id
int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, int fd)
static const char *const stat_report_usage[]
static struct cpu_map * perf_evsel__cpus(struct perf_evsel *evsel)
struct perf_evsel * counter
struct cpu_map_event cpu_map
void perf_evlist__reset_stats(struct perf_evlist *evlist)
static void perf_stat__exit_aggr_mode(void)
struct thread_map * thread_map__new_event(struct thread_map_event *event)
static int str(yyscan_t scanner, int token)
static int parse_metric_groups(const struct option *opt, const char *str, int unset __maybe_unused)
static int process_cpu_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused)
double rel_stddev_stats(double stddev, double avg)
bool pmu_have_event(const char *pname, const char *name)
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
static char * thread_map__comm(struct thread_map *map, int thread)
static unsigned int initial_delay
static int pmu_type(const char *name, __u32 *type)
static int nsec_counter(struct perf_evsel *evsel)
void perf_evsel__close(struct perf_evsel *evsel)
static const char * topdown_attrs[]
static struct rusage ru_data
struct perf_counts * counts
static void counter_aggr_cb(struct perf_evsel *counter, void *data, bool first __maybe_unused)
static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
static volatile pid_t child_pid
static int add_default_attributes(void)
int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, int err, char *msg, size_t size)
struct stats res_stats[3]
static void process_interval(void)
static int perf_data__fd(struct perf_data *data)
static const char * smi_cost_attrs
int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread)
struct thread_map * threads
static void disable_counters(void)
static volatile int signr
int(* aggr_get_id_t)(struct cpu_map *m, int cpu)
static bool target__has_per_thread(struct target *target)
static void perf_stat__reset_stats(void)
static void do_new_line_std(struct outstate *os)
static void setup_system_wide(int forks)
static void print_running(u64 run, u64 ena)
const char * perf_evsel__name(struct perf_evsel *evsel)
static double timeval2double(struct timeval *t)
static struct cpu_map * cpus_aggr_map
int perf_stat_process_counter(struct perf_stat_config *config, struct perf_evsel *counter)
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
static int perf_stat_synthesize_config(bool is_pipe)
int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, struct perf_evsel **err_evsel, struct perf_evsel_config_term **err_term)
static struct timespec ref_time
static int set_maps(struct perf_stat *st)
void perf_stat__init_shadow_stats(void)
static int thread_map__nr(struct thread_map *threads)
static int perf_stat__get_core_file(struct cpu_map *map, int idx)
struct perf_event_header header
void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu, struct runtime_stat *st)
static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
static struct @9 output[OUTPUT_TYPE_MAX]
static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
struct runtime_stat * stats
static void print_aggr(char *prefix)
struct perf_session * perf_session__new(struct perf_data *data, bool repipe, struct perf_tool *tool)
int parse_events_option(const struct option *opt, const char *str, int unset __maybe_unused)
void perf_evlist__disable(struct perf_evlist *evlist)
void runtime_stat__init(struct runtime_stat *st)
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
int perf_session__process_events(struct perf_session *session)
static void print_table(FILE *output, int precision, double avg)
static void counter_cb(struct perf_evsel *counter, void *data, bool first __maybe_unused)
#define perf_evsel__match(evsel, t, c)
struct perf_evsel * leader
int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session)
static void enable_counters(void)
static volatile int workload_exec_errno
static bool perf_evsel__should_store_id(struct perf_evsel *counter)
static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, const char *fmt, const char *unit, double val)
static int __cmd_report(int argc, const char **argv)
int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 evtime, u64 type, perf_event__handler_t process, struct machine *machine)
void(* print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val)
static unsigned int ncpus
void parse_events__shrink_config_terms(void)
void perf_stat__reset_shadow_stats(void)
ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size)
struct perf_header header
static int create_perf_stat_counter(struct perf_evsel *evsel)
int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, int(*f)(struct cpu_map *map, int cpu, void *data), void *data)
void perf_evlist__close(struct perf_evlist *evlist)
static void read_counters(void)
static void print_counter(struct perf_evsel *counter, char *prefix)
static void print_header(int argc, const char **argv)
int cmd_stat(int argc, const char **argv)
static pid_t thread_map__pid(struct thread_map *map, int thread)
int metricgroup__parse_groups(const struct option *opt, const char *str, struct rblist *metric_events)
static void new_line_std(void *ctx)
static int __cmd_record(int argc, const char **argv)
struct cpu_map * cpu_map__empty_new(int nr)
static bool perf_evsel__is_group_leader(const struct perf_evsel *evsel)
static bool interval_count
static bool collect_data(struct perf_evsel *counter, void(*cb)(struct perf_evsel *counter, void *data, bool first), void *data)
static const struct option stat_options[]
struct thread_map_event thread_map
static int perf_evsel__nr_cpus(struct perf_evsel *evsel)
static struct perf_counts_values * perf_counts(struct perf_counts *counts, int cpu, int thread)
static bool walltime_run_table
struct perf_stat_evsel * stats
#define pr_warning(fmt,...)
static int xyarray__max_y(struct xyarray *xy)
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
static int topdown_filter_events(const char **attr, char **str, bool use_group)
static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
static const char * transaction_attrs
#define CNTR_NOT_SUPPORTED
static const char *const stat_record_usage[]
static void skip_signal(int signo)
static void print_counters(struct timespec *ts, int argc, const char **argv)
int cpu_map__get_socket(struct cpu_map *map, int idx, void *data __maybe_unused)
static const char * transaction_limited_attrs
struct perf_evlist * perf_evlist__new(void)
struct perf_event_attr attr
static void print_no_aggr_metric(char *prefix)
static int process_thread_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused)
void perf_event__read_stat_config(struct perf_stat_config *config, struct stat_config_event *event)
static const char * fixunit(char *buf, struct perf_evsel *evsel, const char *unit)
void static void * zalloc(size_t size)
__weak void arch_topdown_group_warn(void)
int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, struct machine *machine)
static void sig_atexit(void)