HPCToolkit
Seg.cpp
Go to the documentation of this file.
1 // -*-Mode: C++;-*-
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 //***************************************************************************
48 //
49 // File:
50 // $HeadURL$
51 //
52 // Purpose:
53 // [The purpose of this file]
54 //
55 // Description:
56 // [The set of functions, macros, etc. defined in the file]
57 //
58 //***************************************************************************
59 
60 //************************* System Include Files ****************************
61 
62 #include <iostream>
63 using std::cerr;
64 using std::endl;
65 using std::hex;
66 using std::dec;
67 
68 #include <sstream>
69 
70 #include <string>
71 using std::string;
72 
73 #include <map>
74 
75 #include <algorithm>
76 
77 #include <cstring>
78 
79 //*************************** User Include Files ****************************
80 
81 #include <include/gcc-attr.h>
82 #include <include/gnu_bfd.h>
83 
84 #include "LM.hpp"
85 #include "Seg.hpp"
86 #include "Proc.hpp"
87 #include "Insn.hpp"
88 
89 #include "Dbg-LM.hpp"
90 #include "Dbg-Proc.hpp"
91 
92 #include "BinUtils.hpp"
93 
94 #include <lib/isa/ISA.hpp>
95 
97 
98 //*************************** Forward Declarations **************************
99 
100 //***************************************************************************
101 
102 //***************************************************************************
103 // Seg
104 //***************************************************************************
105 
106 BinUtil::Seg::Seg(BinUtil::LM* lm, const string& name, Type type,
107  VMA beg, VMA end, VMA size)
108  : m_lm(lm), m_name(name), m_type(type),
109  m_begVMA(beg), m_endVMA(end), m_size(size)
110 {
111 }
112 
113 
115 {
116  m_lm = NULL;
117 }
118 
119 
120 string
121 BinUtil::Seg::toString(int flags, const char* pre) const
122 {
123  std::ostringstream os;
124  dump(os, flags, pre);
125  return os.str();
126 }
127 
128 
129 void
130 BinUtil::Seg::dump(std::ostream& o, GCC_ATTR_UNUSED int flags,
131  const char* pre) const
132 {
133  string p(pre);
134  o << std::showbase;
135  o << p << "------------------- Section Dump ------------------\n";
136  o << p << " Name: `" << name() << "'\n";
137  o << p << " Type: `";
138  switch (type()) {
139  case TypeBSS: o << "BSS'\n"; break;
140  case TypeText: o << "Text'\n"; break;
141  case TypeData: o << "Data'\n"; break;
142  default: DIAG_Die("Unknown segment type");
143  }
144  o << p << " VMA: [" << hex << begVMA() << ", " << endVMA() << dec << ")\n";
145  o << p << " Size(b): " << size() << "\n";
146 }
147 
148 
149 void
151 {
152  dump(std::cerr);
153 }
154 
155 //***************************************************************************
156 // TextSeg
157 //***************************************************************************
158 
159 BinUtil::TextSeg::TextSeg(BinUtil::LM* lm, const string& name,
160  VMA beg, VMA end, uint64_t size)
161  : Seg(lm, name, Seg::TypeText, beg, end, size),
162  m_contents(NULL), m_contentsRaw(NULL)
163 {
164  uint rflg = m_lm->readFlags();
165  if (rflg & LM::ReadFlg_fProc) {
166  ctor_initProcs();
167  }
168  if (rflg & LM::ReadFlg_fInsn) {
169  ctor_readSegment();
170  ctor_disassembleProcs();
171  }
172 }
173 
174 
176 {
177  // Clear procedures
178  for (ProcVec::iterator it = m_procs.begin(); it != m_procs.end(); ++it) {
179  delete *it; // Proc*
180  }
181 
182  // BFD info
183  delete[] m_contentsRaw;
184  m_contentsRaw = NULL;
185  m_contents = NULL;
186 }
187 
188 
189 void
190 BinUtil::TextSeg::dump(std::ostream& o, int flags, const char* pre) const
191 {
192  string pfx(pre);
193  string pfx1 = pfx + " ";
194 
195  Seg::dump(o, flags, pre);
196  o << pfx << " Procedures (" << numProcs() << ")\n";
197  for (ProcVec::const_iterator it = m_procs.begin();
198  it != m_procs.end(); ++it) {
199  Proc* x = *it;
200  x->dump(o, flags, pfx1.c_str());
201  }
202 }
203 
204 
205 //***************************************************************************
206 
207 
208 void
210 {
211  Dbg::LM* dbgInfo = m_lm->getDebugInfo();
212 
213  // Any procedure with a parent has a <Proc*, parentVMA> entry
214  std::map<Proc*, VMA> parentMap;
215 
216  // ------------------------------------------------------------
217  // Each text section finds and creates its own routines.
218  // Traverse the symbol table (which is sorted by VMA) searching
219  // for function symbols in our section. Create a Proc for
220  // each one found.
221  //
222  // Note that symbols can appear multiple times (e.g. a weak symbol
223  // 'sbrk' along with a gloabl symbol '__sbrk'), but we should not
224  // have multiple procedures.
225  // ------------------------------------------------------------
226 
227  bfd* abfd = m_lm->abfd();
228  asymbol** symtab = m_lm->bfdSymTab(); // sorted
229  uint symtabSz = m_lm->bfdSymTabSz();
230 
231  // FIXME:PERF: exploit sortedness of 'symtab' to start iteration
232  for (uint i = 0; i < symtabSz; i++) {
233  asymbol* sym = symtab[i];
234  if (isIn(bfd_asymbol_value(sym)) && Proc::isProcBFDSym(sym)) {
235  // NOTE: initially we have [begVMA, endVMA) where endVMA is the
236  // *end* of the last insn. This is changed after decoding below.
237  VMA begVMA = bfd_asymbol_value(sym);
238  VMA endVMA = 0;
239 
240  Proc::Type procType;
241  if (sym->flags & BSF_LOCAL) {
242  procType = Proc::Local;
243  }
244  else if (sym->flags & BSF_WEAK) {
245  procType = Proc::Weak;
246  }
247  else if (sym->flags & BSF_GLOBAL) {
248  procType = Proc::Global;
249  }
250  else {
251  procType = Proc::Unknown;
252  }
253 
254  Proc* proc = m_lm->findProc(begVMA);
255  if (proc) {
256  DIAG_Assert(proc->begVMA() == begVMA, "TextSeg::ctor_initProcs: Procedure beginning at 0x" << hex << begVMA << " overlaps with:\n" << proc->toString());
257  if (procType == Proc::Global) {
258  // 'global' types take precedence
259  proc->type(procType);
260  }
261  continue;
262  }
263 
264  // Create a procedure based on best information we have. We
265  // always prefer explicit debug information over that inferred
266  // from the symbol table.
267  string procNm;
268  string symNm = bfd_asymbol_name(sym);
269 
270  Dbg::LM::iterator it = dbgInfo->find(begVMA);
271  Dbg::Proc* dbg = (it != dbgInfo->end()) ? it->second : NULL;
272 
273  if (!dbg) {
274  procNm = findProcName(abfd, sym);
275  string pnm = BinUtil::canonicalizeProcName(procNm);
276 
277  Dbg::LM::iterator1 it1 = dbgInfo->find1(pnm);
278  dbg = (it1 != dbgInfo->end1()) ? it1->second : NULL;
279  }
280  if (!dbg) {
281  Dbg::LM::iterator1 it1 = dbgInfo->find1(symNm);
282  dbg = (it1 != dbgInfo->end1()) ? it1->second : NULL;
283  }
284 
285  // Finding the end VMA (end of last insn). The computation is
286  // as follows because sometimes the debug information is
287  // *wrong*. (Intel 9 has generated significant over-estimates).
288  //
289  // N.B. exploits the fact that the symbol table is sorted by vma
290  VMA endVMA_approx = findProcEnd(i);
291 
292  if (dbg) {
293  if (!dbg->name.empty()) {
294  procNm = dbg->name;
295  }
296  else if (!symNm.empty()) {
297  // sometimes a procedure name is in the symbol table even
298  // though it is not in the dwarf section. this case occurs
299  // when gcc outlines routines from OpenMP parallel sections.
300  procNm = symNm;
301  }
302 
303 #if 1
304  // Remove capability below... the DWARF sizes can be wrong!!
305  endVMA = endVMA_approx;
306 #else
307  endVMA = std::min(dbg->endVMA, endVMA_approx);
308  if (endVMA != endVMA_approx) {
309  int64_t diff = endVMA - endVMA_approx;
310  DIAG_DevMsg(0, procNm << ": inconsistent end VMA: " << diff << " [" << std::showbase << std::hex << begVMA << "-" << endVMA << "/" << endVMA_approx << std::dec << "]");
311  }
312 #endif
313  }
314  if (!dbg || endVMA == 0) {
315  endVMA = endVMA_approx;
316  }
317  uint size = endVMA - begVMA;
318 
319  if (size == 0) {
320  continue;
321  }
322 
323  // We now have a valid procedure. Initilize with [begVMA, endVMA),
324  // but note this is changed after disassembly.
325  proc = new Proc(this, procNm, symNm, procType, begVMA, endVMA, size);
326  m_procs.push_back(proc);
327  m_lm->insertProc(VMAInterval(begVMA, endVMA), proc);
328 
329  // Add symbolic info
330  if (dbg) {
331  proc->filename(dbg->filenm);
332  proc->begLine(dbg->begLine);
333  if (dbg->parent) {
334  parentMap.insert(std::make_pair(proc, dbg->parent->begVMA));
335  }
336  }
337  }
338  // Xu: treat the OBJ symbol as the dummy proc symbol
339  else if(Proc::isDummyProcBFDSym(sym)) {
340  VMA begVMA = bfd_asymbol_value(sym);
341  VMA endVMA = begVMA + 1;
342 
343  Proc::Type procType = Proc::Data;
344  string symNm = bfd_asymbol_name(sym);
345  string procNm = symNm;
346  Proc* proc = m_lm->findProc(begVMA);
347  proc = new Proc(this, procNm, symNm, procType, begVMA, endVMA, 1);
348  m_procs.push_back(proc);
349  m_lm->insertProc(VMAInterval(begVMA, endVMA), proc);
350  }
351  }
352 
353  // ------------------------------------------------------------
354  // If a text section does not have any function symbols, consider
355  // the whole section a quasi procedure
356  // ------------------------------------------------------------
357  if (numProcs() == 0) {
358  // [begVMA, endVMA)
359  Proc* proc = new Proc(this, name(), name(), Proc::Quasi,
360  begVMA(), endVMA(), size());
361  m_procs.push_back(proc);
362  m_lm->insertProc(VMAInterval(begVMA(), endVMA()), proc);
363  }
364 
365 
366  // ------------------------------------------------------------
367  // Embed parent information
368  // ------------------------------------------------------------
369  for (std::map<Proc*, VMA>::iterator it = parentMap.begin();
370  it != parentMap.end(); ++it) {
371  Proc* child = it->first;
372  VMA parentVMA = it->second;
373  Proc* parent = m_lm->findProc(parentVMA);
374  DIAG_AssertWarn(parent, "Could not find parent within this section:\n"
375  << child->toString());
376  if (parent == child) {
377  DIAG_WMsg(0, "Procedure has itself as parent!\n" << child->toString());
378  continue; // skip
379  }
380  child->parent(parent);
381  }
382 }
383 
384 
385 // Read in the section data (usually raw instructions).
386 void
388 {
389  // - Obtain a new buffer, and align the pointer to a 16-byte
390  // boundary.
391  // - We also add a 16 byte buffer at the beginning of the contents.
392  // This is because some of the GNU decoders (e.g. Sparc) want to
393  // examine both an instruction and its predecessor at the same
394  // time. Since we do not want to tell them about text section
395  // sizes -- the ISA classes are independent of these details -- we
396  // add this padding to prevent array access errors when decoding
397  // the first instruction.
398 
399  // FIXME: Does "new" provide a way of returning an aligned pointer?
400  m_contentsRaw = new char[size()+16+16];
401  memset(m_contentsRaw, 0, 16+16); // zero the padding
402  char* contentsTmp = m_contentsRaw + 16; // add the padding
403  m_contents = (char *)( ((uintptr_t)contentsTmp + 15) & ~15 ); // align
404 
405  bfd* abfd = m_lm->abfd();
406  asection* bfdSeg = bfd_get_section_by_name(abfd, name().c_str());
407  int ret = bfd_get_section_contents(abfd, bfdSeg, m_contents, 0, size());
408  if (!ret) {
409  delete[] m_contentsRaw;
410  m_contentsRaw = m_contents = NULL;
411  DIAG_EMsg("Error reading section: " << bfd_errmsg(bfd_get_error()));
412  return;
413  }
414 }
415 
416 
417 // Disassemble the instructions in each procedure
418 void
420 {
421  // ------------------------------------------------------------
422  // Disassemble the instructions in each procedure.
423  // ------------------------------------------------------------
424  VMA sectionBase = begVMA();
425 
426  for (ProcVec::iterator it = m_procs.begin(); it != m_procs.end(); ++it) {
427  Proc* p = *it;
428  if (p->isDummyProc())
429  continue;
430 
431  VMA procBeg = p->begVMA();
432  VMA procEnd = p->endVMA();
433  ushort insnSz = 0;
434  VMA lastInsnVMA = procBeg; // vma of last valid instruction in the proc
435 
436  // Iterate over each vma at which an instruction might begin
437  for (VMA vma = procBeg; vma < procEnd; ) {
438  MachInsn *mi = &(m_contents[vma - sectionBase]);
439  insnSz = LM::isa->getInsnSize(mi);
440  if (insnSz == 0) {
441  // This is not a recognized instruction (cf. data on CISC ISAs).
442  ++vma; // Increment the VMA, and try to decode again.
443  continue;
444  }
445 
446  int num_ops = LM::isa->getInsnNumOps(mi);
447  if (num_ops == 0) {
448  // This instruction contains data. No need to decode.
449  vma += insnSz;
450  continue;
451  }
452 
453  // We have a valid instruction at this vma!
454  lastInsnVMA = vma;
455  for (ushort opIndex = 0; opIndex < num_ops; opIndex++) {
456  Insn *newInsn = makeInsn(m_lm->abfd(), mi, vma, opIndex, insnSz);
457  m_lm->insertInsn(vma, opIndex, newInsn);
458  }
459  vma += insnSz;
460  }
461  // 'insnSz' is now the size of the last instruction or 0
462 
463  // Now we can update the procedure's end address and size since we
464  // know where the last instruction begins. The procedure's
465  // original end address was guessed to be the begin address of the
466  // following procedure while determining all procedures above.
467  p->endVMA(lastInsnVMA);
468  p->size(p->endVMA() - p->begVMA() + insnSz);
469  }
470 }
471 
472 
473 // Returns the name of the procedure referenced by 'procSym' using
474 // debugging information, if possible; otherwise returns the symbol
475 // name.
476 string
477 BinUtil::TextSeg::findProcName(bfd* abfd, asymbol* procSym) const
478 {
479  string procName;
480 
481  // cf. LM::findSrcCodeInfo()
482  asection* bfdSeg = bfd_get_section_by_name(abfd, name().c_str());
483 
484  bfd_boolean bfd_fnd = false;
485  const char* bfd_func = NULL;
486 
487  if (bfdSeg) {
488  bfd_vma secBase = bfd_section_vma(abfd, bfdSeg);
489  bfd_vma symVal = bfd_asymbol_value(procSym);
490 
491  const char* file = NULL;
492  uint line = 0;
493  bfd_fnd = bfd_find_nearest_line(abfd, bfdSeg, m_lm->bfdSymTab(),
494  symVal - secBase, &file, &bfd_func, &line);
495  }
496 
497  if (bfd_fnd && bfd_func && bfd_func[0] != '\0') {
498  procName = bfd_func;
499  }
500  else {
501  procName = bfd_asymbol_name(procSym);
502  }
503 
504  return procName;
505 }
506 
507 
508 // Approximate the end VMA of the function given by funcSymIndex.
509 // This is normally the address of the next function symbol in this
510 // section. However, if this is the last function in the section,
511 // then it is the address of the end of the section. One can safely
512 // assume this returns an over-estimate of the end VMA.
513 VMA
514 BinUtil::TextSeg::findProcEnd(int funcSymIndex) const
515 {
516  // Since the symbol table we get is sorted by VMA, we can stop
517  // the search as soon as we've gone beyond the VMA of this section.
518  asymbol** symtab = m_lm->bfdSymTab();
519  uint symtabSz = m_lm->bfdSymTabSz();
520 
521  VMA ret = endVMA();
522  for (uint next = funcSymIndex + 1; next < symtabSz; ++next) {
523  asymbol* sym = symtab[next];
524  if (!isIn(bfd_asymbol_value(sym))) {
525  break;
526  }
527  if (Proc::isProcBFDSym(sym)) {
528  ret = bfd_asymbol_value(sym);
529  break;
530  }
531  }
532  return ret;
533 }
534 
535 
536 // Returns a new instruction of the appropriate type. Promises not to
537 // return NULL.
539 BinUtil::TextSeg::makeInsn(bfd* abfd, MachInsn* mi, VMA vma, ushort opIndex,
540  ushort sz) const
541 {
542  // Assume that there is only one instruction type per
543  // architecture (unlike i860 for example).
544  Insn *newInsn = NULL;
545  switch (bfd_get_arch(abfd)) {
546  case bfd_arch_mips:
547  case bfd_arch_alpha:
548  case bfd_arch_powerpc:
549  case bfd_arch_sparc:
550  newInsn = new RISCInsn(mi, vma);
551  break;
552  case bfd_arch_i386:
553 #ifdef bfd_mach_k1om
554  case bfd_arch_k1om:
555 #endif
556  newInsn = new CISCInsn(mi, vma, sz);
557  break;
558  case bfd_arch_ia64:
559  newInsn = new VLIWInsn(mi, vma, opIndex);
560  break;
561  default:
562  DIAG_Die("TextSeg::makeInsn encountered unknown instruction type!");
563  }
564  return newInsn;
565 }
566 
567 
568 //***************************************************************************
iterator1 find1(const key_type1 &x)
Definition: Dbg-LM.hpp:216
string canonicalizeProcName(const std::string &name, ProcNameMgr *procNameMgr)
Definition: BinUtils.cpp:69
bfd_vma VMA
Definition: ISATypes.hpp:79
VMA begVMA() const
Definition: Proc.hpp:148
#define DIAG_EMsg(...)
Definition: diagnostics.h:251
virtual ~Seg()
Definition: Seg.cpp:114
iterator1 end1()
Definition: Dbg-LM.hpp:187
virtual void dump(std::ostream &o=std::cerr, int flags=LM::DUMP_Short, const char *pre="") const
Definition: Proc.cpp:132
SrcFile::ln begLine() const
Definition: Proc.hpp:186
uint size() const
Definition: Proc.hpp:161
void ctor_initProcs()
Definition: Seg.cpp:209
iterator find(const key_type &x)
Definition: Dbg-LM.hpp:170
virtual void dump(std::ostream &o=std::cerr, int flags=LM::DUMP_Short, const char *pre="") const
Definition: Seg.cpp:190
My_t::iterator iterator
Definition: Dbg-LM.hpp:109
bool isDummyProc() const
Definition: Proc.hpp:304
static bool isDummyProcBFDSym(asymbol *sym)
Definition: Proc.hpp:296
virtual ushort getInsnNumOps(MachInsn *mi)=0
unsigned short int ushort
Definition: uint.h:120
unsigned int uint
Definition: uint.h:124
std::string findProcName(bfd *abfd, asymbol *procSym) const
Definition: Seg.cpp:477
Type type() const
Definition: Proc.hpp:136
void ddump() const
Definition: Seg.cpp:150
std::string toString(int flags=LM::DUMP_Short, const char *pre="") const
Definition: Seg.cpp:121
Insn * makeInsn(bfd *abfd, MachInsn *mi, VMA vma, ushort opIndex, ushort sz) const
Definition: Seg.cpp:539
std::string name
Definition: Dbg-Proc.hpp:123
virtual ushort getInsnSize(MachInsn *mi)=0
#define DIAG_DevMsg(level,...)
Definition: diagnostics.h:246
My1_t::iterator iterator1
Definition: Dbg-LM.hpp:122
iterator end()
Definition: Dbg-LM.hpp:141
std::string toString(int flags=LM::DUMP_Short) const
Definition: Proc.cpp:123
VMA findProcEnd(int funcSymIndex) const
Definition: Seg.cpp:514
VMA endVMA() const
Definition: Proc.hpp:152
virtual void dump(std::ostream &o=std::cerr, int flags=LM::DUMP_Short, const char *pre="") const
Definition: Seg.cpp:130
void MachInsn
Definition: ISATypes.hpp:87
#define NULL
Definition: ElfHelper.cpp:85
SrcFile::ln begLine
Definition: Dbg-Proc.hpp:124
Proc * parent() const
Definition: Proc.hpp:194
void ctor_readSegment()
Definition: Seg.cpp:387
static bool isProcBFDSym(asymbol *sym)
Definition: Proc.hpp:283
LM::InsnMap::const_iterator it
Definition: Proc.hpp:421
void ctor_disassembleProcs()
Definition: Seg.cpp:419
virtual ~TextSeg()
Definition: Seg.cpp:175
std::string filenm
Definition: Dbg-Proc.hpp:123
#define DIAG_Die(...)
Definition: diagnostics.h:267
#define GCC_ATTR_UNUSED
Definition: gcc-attr.h:80
static MachInsn * mi
Definition: x86ISAXed.cpp:91
static ISA * isa
Definition: LM.hpp:493
const std::string & filename() const
Definition: Proc.hpp:178