Linux Perf
insn.c
Go to the documentation of this file.
1 /*
2  * x86 instruction analysis
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright (C) IBM Corporation, 2002, 2004, 2009
19  */
20 
21 #ifdef __KERNEL__
22 #include <linux/string.h>
23 #else
24 #include <string.h>
25 #endif
26 #include "inat.h"
27 #include "insn.h"
28 
29 /* Verify next sizeof(t) bytes can be on the same instruction */
30 #define validate_next(t, insn, n) \
31  ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
32 
33 #define __get_next(t, insn) \
34  ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
35 
36 #define __peek_nbyte_next(t, insn, n) \
37  ({ t r = *(t*)((insn)->next_byte + n); r; })
38 
39 #define get_next(t, insn) \
40  ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
41 
42 #define peek_nbyte_next(t, insn, n) \
43  ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
44 
45 #define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
46 
53 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
54 {
55  /*
56  * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
57  * even if the input buffer is long enough to hold them.
58  */
59  if (buf_len > MAX_INSN_SIZE)
60  buf_len = MAX_INSN_SIZE;
61 
62  memset(insn, 0, sizeof(*insn));
63  insn->kaddr = kaddr;
64  insn->end_kaddr = kaddr + buf_len;
65  insn->next_byte = kaddr;
66  insn->x86_64 = x86_64 ? 1 : 0;
67  insn->opnd_bytes = 4;
68  if (x86_64)
69  insn->addr_bytes = 8;
70  else
71  insn->addr_bytes = 4;
72 }
73 
83 {
84  struct insn_field *prefixes = &insn->prefixes;
86  insn_byte_t b, lb;
87  int i, nb;
88 
89  if (prefixes->got)
90  return;
91 
92  nb = 0;
93  lb = 0;
94  b = peek_next(insn_byte_t, insn);
95  attr = inat_get_opcode_attribute(b);
96  while (inat_is_legacy_prefix(attr)) {
97  /* Skip if same prefix */
98  for (i = 0; i < nb; i++)
99  if (prefixes->bytes[i] == b)
100  goto found;
101  if (nb == 4)
102  /* Invalid instruction */
103  break;
104  prefixes->bytes[nb++] = b;
105  if (inat_is_address_size_prefix(attr)) {
106  /* address size switches 2/4 or 4/8 */
107  if (insn->x86_64)
108  insn->addr_bytes ^= 12;
109  else
110  insn->addr_bytes ^= 6;
111  } else if (inat_is_operand_size_prefix(attr)) {
112  /* oprand size switches 2/4 */
113  insn->opnd_bytes ^= 6;
114  }
115 found:
116  prefixes->nbytes++;
117  insn->next_byte++;
118  lb = b;
119  b = peek_next(insn_byte_t, insn);
120  attr = inat_get_opcode_attribute(b);
121  }
122  /* Set the last prefix */
123  if (lb && lb != insn->prefixes.bytes[3]) {
124  if (unlikely(insn->prefixes.bytes[3])) {
125  /* Swap the last prefix */
126  b = insn->prefixes.bytes[3];
127  for (i = 0; i < nb; i++)
128  if (prefixes->bytes[i] == lb)
129  prefixes->bytes[i] = b;
130  }
131  insn->prefixes.bytes[3] = lb;
132  }
133 
134  /* Decode REX prefix */
135  if (insn->x86_64) {
136  b = peek_next(insn_byte_t, insn);
137  attr = inat_get_opcode_attribute(b);
138  if (inat_is_rex_prefix(attr)) {
139  insn->rex_prefix.value = b;
140  insn->rex_prefix.nbytes = 1;
141  insn->next_byte++;
142  if (X86_REX_W(b))
143  /* REX.W overrides opnd_size */
144  insn->opnd_bytes = 8;
145  }
146  }
147  insn->rex_prefix.got = 1;
148 
149  /* Decode VEX prefix */
150  b = peek_next(insn_byte_t, insn);
151  attr = inat_get_opcode_attribute(b);
152  if (inat_is_vex_prefix(attr)) {
153  insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
154  if (!insn->x86_64) {
155  /*
156  * In 32-bits mode, if the [7:6] bits (mod bits of
157  * ModRM) on the second byte are not 11b, it is
158  * LDS or LES or BOUND.
159  */
160  if (X86_MODRM_MOD(b2) != 3)
161  goto vex_end;
162  }
163  insn->vex_prefix.bytes[0] = b;
164  insn->vex_prefix.bytes[1] = b2;
165  if (inat_is_evex_prefix(attr)) {
166  b2 = peek_nbyte_next(insn_byte_t, insn, 2);
167  insn->vex_prefix.bytes[2] = b2;
168  b2 = peek_nbyte_next(insn_byte_t, insn, 3);
169  insn->vex_prefix.bytes[3] = b2;
170  insn->vex_prefix.nbytes = 4;
171  insn->next_byte += 4;
172  if (insn->x86_64 && X86_VEX_W(b2))
173  /* VEX.W overrides opnd_size */
174  insn->opnd_bytes = 8;
175  } else if (inat_is_vex3_prefix(attr)) {
176  b2 = peek_nbyte_next(insn_byte_t, insn, 2);
177  insn->vex_prefix.bytes[2] = b2;
178  insn->vex_prefix.nbytes = 3;
179  insn->next_byte += 3;
180  if (insn->x86_64 && X86_VEX_W(b2))
181  /* VEX.W overrides opnd_size */
182  insn->opnd_bytes = 8;
183  } else {
184  /*
185  * For VEX2, fake VEX3-like byte#2.
186  * Makes it easier to decode vex.W, vex.vvvv,
187  * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
188  */
189  insn->vex_prefix.bytes[2] = b2 & 0x7f;
190  insn->vex_prefix.nbytes = 2;
191  insn->next_byte += 2;
192  }
193  }
194 vex_end:
195  insn->vex_prefix.got = 1;
196 
197  prefixes->got = 1;
198 
199 err_out:
200  return;
201 }
202 
213 void insn_get_opcode(struct insn *insn)
214 {
215  struct insn_field *opcode = &insn->opcode;
216  insn_byte_t op;
217  int pfx_id;
218  if (opcode->got)
219  return;
220  if (!insn->prefixes.got)
221  insn_get_prefixes(insn);
222 
223  /* Get first opcode */
224  op = get_next(insn_byte_t, insn);
225  opcode->bytes[0] = op;
226  opcode->nbytes = 1;
227 
228  /* Check if there is VEX prefix or not */
229  if (insn_is_avx(insn)) {
230  insn_byte_t m, p;
231  m = insn_vex_m_bits(insn);
232  p = insn_vex_p_bits(insn);
233  insn->attr = inat_get_avx_attribute(op, m, p);
234  if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
235  (!inat_accept_vex(insn->attr) &&
236  !inat_is_group(insn->attr)))
237  insn->attr = 0; /* This instruction is bad */
238  goto end; /* VEX has only 1 byte for opcode */
239  }
240 
241  insn->attr = inat_get_opcode_attribute(op);
242  while (inat_is_escape(insn->attr)) {
243  /* Get escaped opcode */
244  op = get_next(insn_byte_t, insn);
245  opcode->bytes[opcode->nbytes++] = op;
246  pfx_id = insn_last_prefix_id(insn);
247  insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
248  }
249  if (inat_must_vex(insn->attr))
250  insn->attr = 0; /* This instruction is bad */
251 end:
252  opcode->got = 1;
253 
254 err_out:
255  return;
256 }
257 
266 void insn_get_modrm(struct insn *insn)
267 {
268  struct insn_field *modrm = &insn->modrm;
269  insn_byte_t pfx_id, mod;
270  if (modrm->got)
271  return;
272  if (!insn->opcode.got)
273  insn_get_opcode(insn);
274 
275  if (inat_has_modrm(insn->attr)) {
276  mod = get_next(insn_byte_t, insn);
277  modrm->value = mod;
278  modrm->nbytes = 1;
279  if (inat_is_group(insn->attr)) {
280  pfx_id = insn_last_prefix_id(insn);
281  insn->attr = inat_get_group_attribute(mod, pfx_id,
282  insn->attr);
283  if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
284  insn->attr = 0; /* This is bad */
285  }
286  }
287 
288  if (insn->x86_64 && inat_is_force64(insn->attr))
289  insn->opnd_bytes = 8;
290  modrm->got = 1;
291 
292 err_out:
293  return;
294 }
295 
296 
305 {
306  struct insn_field *modrm = &insn->modrm;
307 
308  if (!insn->x86_64)
309  return 0;
310  if (!modrm->got)
311  insn_get_modrm(insn);
312  /*
313  * For rip-relative instructions, the mod field (top 2 bits)
314  * is zero and the r/m field (bottom 3 bits) is 0x5.
315  */
316  return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
317 }
318 
326 void insn_get_sib(struct insn *insn)
327 {
328  insn_byte_t modrm;
329 
330  if (insn->sib.got)
331  return;
332  if (!insn->modrm.got)
333  insn_get_modrm(insn);
334  if (insn->modrm.nbytes) {
335  modrm = (insn_byte_t)insn->modrm.value;
336  if (insn->addr_bytes != 2 &&
337  X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
338  insn->sib.value = get_next(insn_byte_t, insn);
339  insn->sib.nbytes = 1;
340  }
341  }
342  insn->sib.got = 1;
343 
344 err_out:
345  return;
346 }
347 
348 
358 {
359  insn_byte_t mod, rm, base;
360 
361  if (insn->displacement.got)
362  return;
363  if (!insn->sib.got)
364  insn_get_sib(insn);
365  if (insn->modrm.nbytes) {
366  /*
367  * Interpreting the modrm byte:
368  * mod = 00 - no displacement fields (exceptions below)
369  * mod = 01 - 1-byte displacement field
370  * mod = 10 - displacement field is 4 bytes, or 2 bytes if
371  * address size = 2 (0x67 prefix in 32-bit mode)
372  * mod = 11 - no memory operand
373  *
374  * If address size = 2...
375  * mod = 00, r/m = 110 - displacement field is 2 bytes
376  *
377  * If address size != 2...
378  * mod != 11, r/m = 100 - SIB byte exists
379  * mod = 00, SIB base = 101 - displacement field is 4 bytes
380  * mod = 00, r/m = 101 - rip-relative addressing, displacement
381  * field is 4 bytes
382  */
383  mod = X86_MODRM_MOD(insn->modrm.value);
384  rm = X86_MODRM_RM(insn->modrm.value);
385  base = X86_SIB_BASE(insn->sib.value);
386  if (mod == 3)
387  goto out;
388  if (mod == 1) {
389  insn->displacement.value = get_next(signed char, insn);
390  insn->displacement.nbytes = 1;
391  } else if (insn->addr_bytes == 2) {
392  if ((mod == 0 && rm == 6) || mod == 2) {
393  insn->displacement.value =
394  get_next(short, insn);
395  insn->displacement.nbytes = 2;
396  }
397  } else {
398  if ((mod == 0 && rm == 5) || mod == 2 ||
399  (mod == 0 && base == 5)) {
400  insn->displacement.value = get_next(int, insn);
401  insn->displacement.nbytes = 4;
402  }
403  }
404  }
405 out:
406  insn->displacement.got = 1;
407 
408 err_out:
409  return;
410 }
411 
412 /* Decode moffset16/32/64. Return 0 if failed */
413 static int __get_moffset(struct insn *insn)
414 {
415  switch (insn->addr_bytes) {
416  case 2:
417  insn->moffset1.value = get_next(short, insn);
418  insn->moffset1.nbytes = 2;
419  break;
420  case 4:
421  insn->moffset1.value = get_next(int, insn);
422  insn->moffset1.nbytes = 4;
423  break;
424  case 8:
425  insn->moffset1.value = get_next(int, insn);
426  insn->moffset1.nbytes = 4;
427  insn->moffset2.value = get_next(int, insn);
428  insn->moffset2.nbytes = 4;
429  break;
430  default: /* opnd_bytes must be modified manually */
431  goto err_out;
432  }
433  insn->moffset1.got = insn->moffset2.got = 1;
434 
435  return 1;
436 
437 err_out:
438  return 0;
439 }
440 
441 /* Decode imm v32(Iz). Return 0 if failed */
442 static int __get_immv32(struct insn *insn)
443 {
444  switch (insn->opnd_bytes) {
445  case 2:
446  insn->immediate.value = get_next(short, insn);
447  insn->immediate.nbytes = 2;
448  break;
449  case 4:
450  case 8:
451  insn->immediate.value = get_next(int, insn);
452  insn->immediate.nbytes = 4;
453  break;
454  default: /* opnd_bytes must be modified manually */
455  goto err_out;
456  }
457 
458  return 1;
459 
460 err_out:
461  return 0;
462 }
463 
464 /* Decode imm v64(Iv/Ov), Return 0 if failed */
465 static int __get_immv(struct insn *insn)
466 {
467  switch (insn->opnd_bytes) {
468  case 2:
469  insn->immediate1.value = get_next(short, insn);
470  insn->immediate1.nbytes = 2;
471  break;
472  case 4:
473  insn->immediate1.value = get_next(int, insn);
474  insn->immediate1.nbytes = 4;
475  break;
476  case 8:
477  insn->immediate1.value = get_next(int, insn);
478  insn->immediate1.nbytes = 4;
479  insn->immediate2.value = get_next(int, insn);
480  insn->immediate2.nbytes = 4;
481  break;
482  default: /* opnd_bytes must be modified manually */
483  goto err_out;
484  }
485  insn->immediate1.got = insn->immediate2.got = 1;
486 
487  return 1;
488 err_out:
489  return 0;
490 }
491 
492 /* Decode ptr16:16/32(Ap) */
493 static int __get_immptr(struct insn *insn)
494 {
495  switch (insn->opnd_bytes) {
496  case 2:
497  insn->immediate1.value = get_next(short, insn);
498  insn->immediate1.nbytes = 2;
499  break;
500  case 4:
501  insn->immediate1.value = get_next(int, insn);
502  insn->immediate1.nbytes = 4;
503  break;
504  case 8:
505  /* ptr16:64 is not exist (no segment) */
506  return 0;
507  default: /* opnd_bytes must be modified manually */
508  goto err_out;
509  }
510  insn->immediate2.value = get_next(unsigned short, insn);
511  insn->immediate2.nbytes = 2;
512  insn->immediate1.got = insn->immediate2.got = 1;
513 
514  return 1;
515 err_out:
516  return 0;
517 }
518 
529 {
530  if (insn->immediate.got)
531  return;
532  if (!insn->displacement.got)
533  insn_get_displacement(insn);
534 
535  if (inat_has_moffset(insn->attr)) {
536  if (!__get_moffset(insn))
537  goto err_out;
538  goto done;
539  }
540 
541  if (!inat_has_immediate(insn->attr))
542  /* no immediates */
543  goto done;
544 
545  switch (inat_immediate_size(insn->attr)) {
546  case INAT_IMM_BYTE:
547  insn->immediate.value = get_next(signed char, insn);
548  insn->immediate.nbytes = 1;
549  break;
550  case INAT_IMM_WORD:
551  insn->immediate.value = get_next(short, insn);
552  insn->immediate.nbytes = 2;
553  break;
554  case INAT_IMM_DWORD:
555  insn->immediate.value = get_next(int, insn);
556  insn->immediate.nbytes = 4;
557  break;
558  case INAT_IMM_QWORD:
559  insn->immediate1.value = get_next(int, insn);
560  insn->immediate1.nbytes = 4;
561  insn->immediate2.value = get_next(int, insn);
562  insn->immediate2.nbytes = 4;
563  break;
564  case INAT_IMM_PTR:
565  if (!__get_immptr(insn))
566  goto err_out;
567  break;
568  case INAT_IMM_VWORD32:
569  if (!__get_immv32(insn))
570  goto err_out;
571  break;
572  case INAT_IMM_VWORD:
573  if (!__get_immv(insn))
574  goto err_out;
575  break;
576  default:
577  /* Here, insn must have an immediate, but failed */
578  goto err_out;
579  }
580  if (inat_has_second_immediate(insn->attr)) {
581  insn->immediate2.value = get_next(signed char, insn);
582  insn->immediate2.nbytes = 1;
583  }
584 done:
585  insn->immediate.got = 1;
586 
587 err_out:
588  return;
589 }
590 
598 void insn_get_length(struct insn *insn)
599 {
600  if (insn->length)
601  return;
602  if (!insn->immediate.got)
603  insn_get_immediate(insn);
604  insn->length = (unsigned char)((unsigned long)insn->next_byte
605  - (unsigned long)insn->kaddr);
606 }
unsigned int insn_attr_t
Definition: inat_types.h:25
void insn_get_modrm(struct insn *insn)
Definition: insn.c:266
Definition: insn.h:36
static int inat_accept_vex(insn_attr_t attr)
Definition: inat.h:230
struct insn_field vex_prefix
Definition: insn.h:42
#define peek_next(t, insn)
Definition: insn.c:45
#define INAT_IMM_BYTE
Definition: inat.h:57
#define INAT_IMM_VWORD
Definition: inat.h:63
#define X86_VEX_W(vex)
Definition: insn.h:88
const insn_byte_t * kaddr
Definition: insn.h:67
static int insn_last_prefix_id(struct insn *insn)
Definition: insn.h:170
static int inat_has_immediate(insn_attr_t attr)
Definition: inat.h:195
insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t grp_attr)
Definition: inat.c:59
const insn_byte_t * end_kaddr
Definition: insn.h:68
static int inat_is_operand_size_prefix(insn_attr_t attr)
Definition: inat.h:135
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
Definition: insn.c:53
static int inat_is_vex3_prefix(insn_attr_t attr)
Definition: inat.h:165
static insn_byte_t insn_vex_p_bits(struct insn *insn)
Definition: insn.h:161
#define X86_REX_W(rex)
Definition: insn.h:82
static int inat_is_legacy_prefix(insn_attr_t attr)
Definition: inat.h:124
insn_attr_t attr
Definition: insn.h:61
static int inat_is_evex_prefix(insn_attr_t attr)
Definition: inat.h:160
insn_value_t value
Definition: insn.h:28
struct insn_field modrm
Definition: insn.h:48
x86 movsq based memset() in arch/x86/lib/memset_64.S") MEMSET_FN(memset_erms
insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
Definition: inat.c:27
static int inat_is_group(insn_attr_t attr)
Definition: inat.h:180
static int __get_immv(struct insn *insn)
Definition: insn.c:465
#define INAT_IMM_VWORD32
Definition: inat.h:62
static int inat_has_second_immediate(insn_attr_t attr)
Definition: inat.h:215
static int inat_is_rex_prefix(insn_attr_t attr)
Definition: inat.h:140
#define peek_nbyte_next(t, insn, n)
Definition: insn.c:42
unsigned char insn_byte_t
Definition: inat_types.h:26
#define INAT_IMM_PTR
Definition: inat.h:61
if(!yyg->yy_init)
static int inat_is_escape(insn_attr_t attr)
Definition: inat.h:170
#define X86_MODRM_RM(modrm)
Definition: insn.h:76
struct insn_field opcode
Definition: insn.h:43
static int inat_must_evex(insn_attr_t attr)
Definition: inat.h:240
void insn_get_length(struct insn *insn)
Definition: insn.c:598
struct insn_field rex_prefix
Definition: insn.h:41
#define INAT_IMM_DWORD
Definition: inat.h:59
static int __get_immv32(struct insn *insn)
Definition: insn.c:442
struct insn_field immediate
Definition: insn.h:52
struct insn_field displacement
Definition: insn.h:50
static int __get_moffset(struct insn *insn)
Definition: insn.c:413
#define INAT_IMM_WORD
Definition: inat.h:58
static int inat_has_modrm(insn_attr_t attr)
Definition: inat.h:205
void insn_get_sib(struct insn *insn)
Definition: insn.c:326
unsigned char got
Definition: insn.h:32
insn_byte_t bytes[4]
Definition: insn.h:29
insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr)
Definition: inat.c:40
unsigned char length
Definition: insn.h:64
int insn_rip_relative(struct insn *insn)
Definition: insn.c:304
struct insn_field prefixes
Definition: insn.h:37
void insn_get_immediate(struct insn *insn)
Definition: insn.c:528
insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, insn_byte_t vex_p)
Definition: inat.c:79
static int __get_immptr(struct insn *insn)
Definition: insn.c:493
const insn_byte_t * next_byte
Definition: insn.h:69
struct insn_field moffset1
Definition: insn.h:53
static int inat_is_force64(insn_attr_t attr)
Definition: inat.h:210
#define X86_SIB_BASE(sib)
Definition: insn.h:80
void insn_get_displacement(struct insn *insn)
Definition: insn.c:357
#define MAX_INSN_SIZE
Definition: insn.h:72
struct insn_field moffset2
Definition: insn.h:57
static int inat_has_moffset(insn_attr_t attr)
Definition: inat.h:220
#define INAT_IMM_QWORD
Definition: inat.h:60
static int inat_is_address_size_prefix(insn_attr_t attr)
Definition: inat.h:130
#define X86_MODRM_MOD(modrm)
Definition: insn.h:74
unsigned char x86_64
Definition: insn.h:65
static insn_byte_t insn_vex_m_bits(struct insn *insn)
Definition: insn.h:151
static int insn_is_avx(struct insn *insn)
Definition: insn.h:130
Definition: attr.py:1
static int inat_is_vex_prefix(insn_attr_t attr)
Definition: inat.h:153
static int insn_is_evex(struct insn *insn)
Definition: insn.h:137
unsigned char opnd_bytes
Definition: insn.h:62
static int inat_must_vex(insn_attr_t attr)
Definition: inat.h:235
unsigned char nbytes
Definition: insn.h:33
unsigned char addr_bytes
Definition: insn.h:63
#define get_next(t, insn)
Definition: insn.c:39
static bool done
Definition: futex-hash.c:35
struct insn_field immediate1
Definition: insn.h:54
void insn_get_opcode(struct insn *insn)
Definition: insn.c:213
static int inat_immediate_size(insn_attr_t attr)
Definition: inat.h:200
void insn_get_prefixes(struct insn *insn)
Definition: insn.c:82
struct insn_field sib
Definition: insn.h:49
struct insn_field immediate2
Definition: insn.h:58