Linux Perf
futex-wake-parallel.c
Go to the documentation of this file.
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Davidlohr Bueso.
4  *
5  * Block a bunch of threads and let parallel waker threads wakeup an
6  * equal amount of them. The program output reflects the avg latency
7  * for each individual thread to service its share of work. Ultimately
8  * it can be used to measure futex_wake() changes.
9  */
10 #include "bench.h"
11 #include <linux/compiler.h>
12 #include "../util/debug.h"
13 
14 #ifndef HAVE_PTHREAD_BARRIER
15 int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
16 {
17  pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
18  return 0;
19 }
20 #else /* HAVE_PTHREAD_BARRIER */
21 /* For the CLR_() macros */
22 #include <string.h>
23 #include <pthread.h>
24 
25 #include <signal.h>
26 #include "../util/stat.h"
27 #include <subcmd/parse-options.h>
28 #include <linux/kernel.h>
29 #include <linux/time64.h>
30 #include <errno.h>
31 #include "futex.h"
32 #include "cpumap.h"
33 
34 #include <err.h>
35 #include <stdlib.h>
36 #include <sys/time.h>
37 
38 struct thread_data {
39  pthread_t worker;
40  unsigned int nwoken;
41  struct timeval runtime;
42 };
43 
44 static unsigned int nwakes = 1;
45 
46 /* all threads will block on the same futex -- hash bucket chaos ;) */
47 static u_int32_t futex = 0;
48 
49 static pthread_t *blocked_worker;
50 static bool done = false, silent = false, fshared = false;
51 static unsigned int nblocked_threads = 0, nwaking_threads = 0;
52 static pthread_mutex_t thread_lock;
53 static pthread_cond_t thread_parent, thread_worker;
54 static pthread_barrier_t barrier;
55 static struct stats waketime_stats, wakeup_stats;
56 static unsigned int threads_starting;
57 static int futex_flag = 0;
58 
59 static const struct option options[] = {
60  OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
61  OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
62  OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
63  OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
64  OPT_END()
65 };
66 
67 static const char * const bench_futex_wake_parallel_usage[] = {
68  "perf bench futex wake-parallel <options>",
69  NULL
70 };
71 
72 static void *waking_workerfn(void *arg)
73 {
74  struct thread_data *waker = (struct thread_data *) arg;
75  struct timeval start, end;
76 
77  pthread_barrier_wait(&barrier);
78 
79  gettimeofday(&start, NULL);
80 
81  waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
82  if (waker->nwoken != nwakes)
83  warnx("couldn't wakeup all tasks (%d/%d)",
84  waker->nwoken, nwakes);
85 
86  gettimeofday(&end, NULL);
87  timersub(&end, &start, &waker->runtime);
88 
89  pthread_exit(NULL);
90  return NULL;
91 }
92 
93 static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
94 {
95  unsigned int i;
96 
97  pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
98 
99  pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
100 
101  /* create and block all threads */
102  for (i = 0; i < nwaking_threads; i++) {
103  /*
104  * Thread creation order will impact per-thread latency
105  * as it will affect the order to acquire the hb spinlock.
106  * For now let the scheduler decide.
107  */
108  if (pthread_create(&td[i].worker, &thread_attr,
109  waking_workerfn, (void *)&td[i]))
110  err(EXIT_FAILURE, "pthread_create");
111  }
112 
113  pthread_barrier_wait(&barrier);
114 
115  for (i = 0; i < nwaking_threads; i++)
116  if (pthread_join(td[i].worker, NULL))
117  err(EXIT_FAILURE, "pthread_join");
118 
119  pthread_barrier_destroy(&barrier);
120 }
121 
122 static void *blocked_workerfn(void *arg __maybe_unused)
123 {
124  pthread_mutex_lock(&thread_lock);
126  if (!threads_starting)
127  pthread_cond_signal(&thread_parent);
128  pthread_cond_wait(&thread_worker, &thread_lock);
129  pthread_mutex_unlock(&thread_lock);
130 
131  while (1) { /* handle spurious wakeups */
132  if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
133  break;
134  }
135 
136  pthread_exit(NULL);
137  return NULL;
138 }
139 
140 static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
141  struct cpu_map *cpu)
142 {
143  cpu_set_t cpuset;
144  unsigned int i;
145 
146  threads_starting = nblocked_threads;
147 
148  /* create and block all threads */
149  for (i = 0; i < nblocked_threads; i++) {
150  CPU_ZERO(&cpuset);
151  CPU_SET(cpu->map[i % cpu->nr], &cpuset);
152 
153  if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
154  err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
155 
156  if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
157  err(EXIT_FAILURE, "pthread_create");
158  }
159 }
160 
161 static void print_run(struct thread_data *waking_worker, unsigned int run_num)
162 {
163  unsigned int i, wakeup_avg;
164  double waketime_avg, waketime_stddev;
165  struct stats __waketime_stats, __wakeup_stats;
166 
167  init_stats(&__wakeup_stats);
168  init_stats(&__waketime_stats);
169 
170  for (i = 0; i < nwaking_threads; i++) {
171  update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
172  update_stats(&__wakeup_stats, waking_worker[i].nwoken);
173  }
174 
175  waketime_avg = avg_stats(&__waketime_stats);
176  waketime_stddev = stddev_stats(&__waketime_stats);
177  wakeup_avg = avg_stats(&__wakeup_stats);
178 
179  printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
180  "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
181  nblocked_threads, waketime_avg / USEC_PER_MSEC,
182  rel_stddev_stats(waketime_stddev, waketime_avg));
183 }
184 
185 static void print_summary(void)
186 {
187  unsigned int wakeup_avg;
188  double waketime_avg, waketime_stddev;
189 
190  waketime_avg = avg_stats(&waketime_stats);
191  waketime_stddev = stddev_stats(&waketime_stats);
192  wakeup_avg = avg_stats(&wakeup_stats);
193 
194  printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
195  wakeup_avg,
196  nblocked_threads,
197  waketime_avg / USEC_PER_MSEC,
198  rel_stddev_stats(waketime_stddev, waketime_avg));
199 }
200 
201 
202 static void do_run_stats(struct thread_data *waking_worker)
203 {
204  unsigned int i;
205 
206  for (i = 0; i < nwaking_threads; i++) {
207  update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
208  update_stats(&wakeup_stats, waking_worker[i].nwoken);
209  }
210 
211 }
212 
213 static void toggle_done(int sig __maybe_unused,
214  siginfo_t *info __maybe_unused,
215  void *uc __maybe_unused)
216 {
217  done = true;
218 }
219 
220 int bench_futex_wake_parallel(int argc, const char **argv)
221 {
222  int ret = 0;
223  unsigned int i, j;
224  struct sigaction act;
225  pthread_attr_t thread_attr;
226  struct thread_data *waking_worker;
227  struct cpu_map *cpu;
228 
229  argc = parse_options(argc, argv, options,
230  bench_futex_wake_parallel_usage, 0);
231  if (argc) {
232  usage_with_options(bench_futex_wake_parallel_usage, options);
233  exit(EXIT_FAILURE);
234  }
235 
236  sigfillset(&act.sa_mask);
237  act.sa_sigaction = toggle_done;
238  sigaction(SIGINT, &act, NULL);
239 
240  cpu = cpu_map__new(NULL);
241  if (!cpu)
242  err(EXIT_FAILURE, "calloc");
243 
244  if (!nblocked_threads)
245  nblocked_threads = cpu->nr;
246 
247  /* some sanity checks */
248  if (nwaking_threads > nblocked_threads || !nwaking_threads)
249  nwaking_threads = nblocked_threads;
250 
251  if (nblocked_threads % nwaking_threads)
252  errx(EXIT_FAILURE, "Must be perfectly divisible");
253  /*
254  * Each thread will wakeup nwakes tasks in
255  * a single futex_wait call.
256  */
257  nwakes = nblocked_threads/nwaking_threads;
258 
259  blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
260  if (!blocked_worker)
261  err(EXIT_FAILURE, "calloc");
262 
263  if (!fshared)
265 
266  printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
267  "futex %p), %d threads waking up %d at a time.\n\n",
268  getpid(), nblocked_threads, fshared ? "shared":"private",
269  &futex, nwaking_threads, nwakes);
270 
271  init_stats(&wakeup_stats);
272  init_stats(&waketime_stats);
273 
274  pthread_attr_init(&thread_attr);
275  pthread_mutex_init(&thread_lock, NULL);
276  pthread_cond_init(&thread_parent, NULL);
277  pthread_cond_init(&thread_worker, NULL);
278 
279  for (j = 0; j < bench_repeat && !done; j++) {
280  waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
281  if (!waking_worker)
282  err(EXIT_FAILURE, "calloc");
283 
284  /* create, launch & block all threads */
285  block_threads(blocked_worker, thread_attr, cpu);
286 
287  /* make sure all threads are already blocked */
288  pthread_mutex_lock(&thread_lock);
289  while (threads_starting)
290  pthread_cond_wait(&thread_parent, &thread_lock);
291  pthread_cond_broadcast(&thread_worker);
292  pthread_mutex_unlock(&thread_lock);
293 
294  usleep(100000);
295 
296  /* Ok, all threads are patiently blocked, start waking folks up */
297  wakeup_threads(waking_worker, thread_attr);
298 
299  for (i = 0; i < nblocked_threads; i++) {
300  ret = pthread_join(blocked_worker[i], NULL);
301  if (ret)
302  err(EXIT_FAILURE, "pthread_join");
303  }
304 
305  do_run_stats(waking_worker);
306  if (!silent)
307  print_run(waking_worker, j);
308 
309  free(waking_worker);
310  }
311 
312  /* cleanup & report results */
313  pthread_cond_destroy(&thread_parent);
314  pthread_cond_destroy(&thread_worker);
315  pthread_mutex_destroy(&thread_lock);
316  pthread_attr_destroy(&thread_attr);
317 
318  print_summary();
319 
320  free(blocked_worker);
321  return ret;
322 }
323 #endif /* HAVE_PTHREAD_BARRIER */
int nr
Definition: cpumap.h:14
static bool silent
Definition: futex-hash.c:35
double avg_stats(struct stats *stats)
Definition: stat.c:26
int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
static int futex_flag
Definition: futex-hash.c:36
static pthread_mutex_t thread_lock
Definition: futex-hash.c:39
Definition: genelf.c:61
int int err
Definition: 5sec.c:44
Definition: cpumap.h:12
double stddev_stats(struct stats *stats)
Definition: stat.c:47
#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags)
Definition: futex.h:36
#define pr_err(fmt,...)
Definition: json.h:21
static void init_stats(struct stats *stats)
Definition: stat.h:103
void update_stats(struct stats *stats, u64 val)
Definition: stat.c:10
static bool fshared
Definition: futex-hash.c:35
static int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused, size_t cpusetsize __maybe_unused, cpu_set_t *cpuset __maybe_unused)
Definition: futex.h:93
double rel_stddev_stats(double stddev, double avg)
Definition: stat.c:60
static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct cpu_map *cpu)
Definition: futex-requeue.c:86
int map[]
Definition: cpumap.h:15
static pthread_cond_t thread_parent
Definition: futex-hash.c:42
static unsigned int nwakes
Definition: futex-wake.c:38
static void print_summary(void)
Definition: futex-hash.c:108
int FUTEX_PRIVATE_FLAG
Definition: Util.py:13
static void toggle_done(int sig __maybe_unused, siginfo_t *info __maybe_unused, void *uc __maybe_unused)
Definition: futex-hash.c:98
Definition: stat.h:10
static int futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
Definition: futex.h:44
static unsigned int threads_starting
Definition: futex-hash.c:40
pthread_t * worker
Definition: futex-wake.c:40
void free(void *)
struct cpu_map * cpu_map__new(const char *cpu_list)
Definition: cpumap.c:125
unsigned int bench_repeat
static int futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
Definition: futex.h:54
static pthread_cond_t thread_worker
Definition: futex-hash.c:42
static bool done
Definition: futex-hash.c:35
struct timeval start end runtime
Definition: futex-hash.c:38