HPCToolkit
LM.cpp
Go to the documentation of this file.
1 // -*-Mode: C++;-*-
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 //***************************************************************************
48 //
49 // File:
50 // $HeadURL$
51 //
52 // Purpose:
53 // [The purpose of this file]
54 //
55 // Description:
56 // [The set of functions, macros, etc. defined in the file]
57 //
58 //***************************************************************************
59 
60 //************************* System Include Files ****************************
61 
62 #include <typeinfo>
63 
64 #include <string>
65 using std::string;
66 
67 #include <sstream>
68 
69 #include <iostream>
70 #include <iomanip>
71 using std::cerr;
72 using std::endl;
73 
74 #include <cstring>
75 
76 //*************************** User Include Files ****************************
77 
78 #include <include/hpctoolkit-config.h>
79 #include <include/gcc-attr.h>
80 #include <include/uint.h>
81 
82 #include <lib/isa/EmptyISA.hpp>
83 
85 #include <lib/support/Logic.hpp>
87 
88 #include <include/linux_info.h>
89 
90 #include "LM.hpp"
91 #include "Seg.hpp"
92 #include "Insn.hpp"
93 #include "Proc.hpp"
95 #include "Dbg-LM.hpp"
96 
97 //***************************************************************************
98 // macros
99 //***************************************************************************
100 
101 #define DBG_BLD_PROC_MAP 0
102 
103 #define NORETURNS_DISABLE 0
104 #define NORETURNS_DEBUG 0
105 #define NORETURNS_RESULT_NOISY 0
106 #define NORETURNS_LOOKUP_NOISY 0
107 #define NORETURNS_LOOKUP_LOCAL_NOISY 0
108 
109 
110 //***************************************************************************
111 // private data
112 //***************************************************************************
113 
114 static const char *noreturn_table[] = {
115  // include machine-generated file containing names of functions
116  // that don't return
117 #include "names.cpp"
118 };
119 
120 
121 //***************************************************************************
122 // type declarations
123 //***************************************************************************
124 
125 namespace BinUtil {
126 
128 public:
129  bool operator()(const char *s1, const char *s2) const {
130  int result = strcmp(s1,s2);
131  return result < 0 ? true : false;
132  }
133 };
134 
135 
136 class name_set : public std::set<const char*, cstring_compare> {
137 public:
138  name_set() {
139  unsigned int i;
140  for(i = 0; i < sizeof(noreturn_table)/sizeof(char*); i++)
141  insert(noreturn_table[i]);
142  }
143 
144  void
146  for(name_set::iterator i = this->begin();
147  i != this->end(); i++) {
148  std::cout << *i << std::endl;
149  }
150  }
151 };
152 
153 
155 
156 
157 class NoReturns : private std::set<VMA> {
158 public:
159 
161  };
162 
163  void
164  addSynSymEntries(asymbol* syms, long symcount) {
165 #if NORETURNS_RESULT_NOISY
166  printf("-------- dyn sym noreturn functions ------ \n");
167 #endif
168  //-------------------------------------------------------------------
169  // iterate through the functions referenced through the load module's
170  // PLT to identify which of them are known to not return. for each
171  // PLT entry for a function that doesn't return, record the address
172  // of its PLT trampoline so that we can identify calls to functions
173  // that don't return when building a control flow graph.
174  //
175  // Note: the binutils interface to symbols in the PLT processes
176  // the symbols and converts each symbol with name "symname" to
177  // have name "symname@plt". Here, we strip off the suffix
178  // that begins with the @ before comparing a PLT symbol name
179  // with names of functions known to not return.
180  //-------------------------------------------------------------------
181  asymbol *symbol = syms;
182  for (long i = 0; i < symcount; i++, symbol++) {
183  //-----------------------------------------------------
184  // create a copy of the symbol name that we can modify
185  //-----------------------------------------------------
186  char *name = strdup(bfd_asymbol_name(symbol));
187 
188  char *index = strchr(name,'@'); // look for an @ in the symbol
189  if (index) { // if an @ was found
190  *index = 0; // remove the suffix @...
191  unsigned long addr = bfd_asymbol_value(symbol);
192  //---------------------------------------------------------
193  // look for the symbol name in the set of functions known to
194  // not return. if it is found, note the address of the PLT
195  // entry as the address of a callee that won't return.
196  //---------------------------------------------------------
197 #if NORETURNS_LOOKUP_NOISY
198  std::cout << "looking up " << name << " @ " << std::hex << "0x" << addr << std::endl;
199 #endif
200  addIfNoReturn(name, addr);
201  }
202  free(name);
203  }
204  };
205 
206 
207  void
208  addSymEntries(asymbol** syms, long symcount) {
209 #if NORETURNS_RESULT_NOISY
210  printf("-------- sym noreturn functions ------ \n");
211 #endif
212  //-------------------------------------------------------------------
213  // iterate through functions statically linked in the load module
214  // to identify which of them are known to not return. for each
215  // function that doesn't return, record its address so that we can
216  // identify calls to functions that don't return when building a
217  // control flow graph.
218  //-------------------------------------------------------------------
219  asymbol **symbol = syms;
220  if (symbol && symcount > 0) {
221  for (long i = 0; symbol[i]; i++) {
222  const char *name = bfd_asymbol_name(symbol[i]);
223  if (symbol[i]->flags & (BSF_FUNCTION)) {
224  unsigned long addr = bfd_asymbol_value(symbol[i]);
225 #if NORETURNS_LOOKUP_NOISY
226  std::cout << "looking up " << name << " @ " << std::hex << "0x"
227  << addr << std::endl;
228 #endif
229  if (addr) {
230  addIfNoReturn(name, addr);
231  }
232  } else if (symbol[i]->flags == BSF_LOCAL) {
233  // look for plt_call and long branch trampolines found in
234  // Power binaries
235 #if NORETURNS_LOOKUP_LOCAL_NOISY
236  unsigned long laddr = bfd_asymbol_value(symbol[i]);
237  std::cout << "looking up local symbol " << name << std::hex << "0x"
238  << laddr << std::endl;
239 #endif
240  if (strstr(name, "long_branch_r2off")) {
241  unsigned long addr = bfd_asymbol_value(symbol[i]);
242  if (addr) {
243  char *dname = strdup(name);
244  char *index = strchr(dname,'+'); // look for an @ in the symbol
245  if (index) { // if an @ was found
246  *index = 0; // remove the suffix @...
247  }
248  // on power architectures, need to eliminate
249  // *.long_branch_r2off. prefix
250  char *name = dname;
251  char *name_suffix = strrchr(dname,'.');
252  if (name_suffix) {
253  name = name_suffix + 1;
254  }
255 #if NORETURNS_LOOKUP_NOISY
256  std::cout << "looking up " << name << " @ " << std::hex << "0x"
257  << addr << std::endl;
258 #endif
259  addIfNoReturn(name, addr);
260  free(dname);
261  }
262  } else if (strstr(name, "plt_call")) {
263  unsigned long addr = bfd_asymbol_value(symbol[i]);
264  if (addr) {
265  char *dname = strdup(name);
266  char *index = strchr(dname,'@'); // look for an @ in the symbol
267  if (index) { // if an @ was found
268  *index = 0; // remove the suffix @...
269  }
270  // on power architectures, need to eliminate *.plt_call. prefix
271  char *name = dname;
272  char *name_suffix = strrchr(dname,'.');
273  if (name_suffix) {
274  name = name_suffix + 1;
275  }
276 #if NORETURNS_LOOKUP_NOISY
277  std::cout << "looking up " << name << " @ " << std::hex << "0x"
278  << addr << std::endl;
279 #endif
280  addIfNoReturn(name, addr);
281  free(dname);
282  }
283  }
284  }
285  }
286  }
287  };
288 
290  };
291 
292  bool
294  return NORETURNS_DISABLE ? false : this->find(addr) != this->end();
295  };
296 
297 
298  void
299  addIfNoReturn(const char *name, uint64_t addr) {
300  if (noreturn_fn_names.find(name) != noreturn_fn_names.end()) {
301  this->insert(addr);
302 #if NORETURNS_RESULT_NOISY
303  std::cout << name << " @ " << std::hex << "0x" << addr << std::endl;
304 #endif
305  }
306  };
307 
308 
309  void
310  dump() {
311  std::cout << "noreturns (addresses of functions that don't return)"
312  << std::endl;
313  for(std::set<VMA>::iterator i = this->begin(); i != this->end(); i++) {
314  std::cout << std::hex << "0x" << *i << std::endl;
315  }
316  };
317 };
318 
319 }
320 
321 
322 //*************************** Forward Declarations **************************
323 
324 
325 static void
326 dumpSymFlag(std::ostream& o, asymbol* sym, int flag, const char* txt, bool& hasPrinted);
327 
328 
329 //***************************************************************************
330 
331 
332 //***************************************************************************
333 // LM
334 //***************************************************************************
335 
336 // current ISA (see comments in header)
338 
339 
340 BinUtil::LM::LM(bool useBinutils)
341  : m_type(TypeNULL), m_readFlags(ReadFlg_NULL),
342  m_txtBeg(0), m_txtEnd(0), m_begVMA(0),
343  m_textBegReloc(0), m_unrelocDelta(0),
344  m_bfd(NULL), m_bfdSymTab(NULL),
345  m_bfdDynSymTab(NULL), m_bfdSynthTab(NULL),
346  m_bfdSymTabSort(NULL), m_bfdSymTabSz(0), m_bfdDynSymTabSz(0),
347  m_bfdSymTabSortSz(0), m_bfdSynthTabSz(0), m_noreturns(0),
348  m_realpathMgr(RealPathMgr::singleton()), m_useBinutils(useBinutils),
349  m_simpleSymbols(0)
350 {
351 }
352 
353 
355 {
356  for (SegMap::iterator it = m_segMap.begin(); it != m_segMap.end(); ++it) {
357  delete it->second; // Seg*
358  }
359  m_segMap.clear();
360 
361  for (InsnMap::iterator it = m_insnMap.begin(); it != m_insnMap.end(); ++it) {
362  delete it->second; // Insn*
363  }
364  m_insnMap.clear();
365 
366  // BFD info
367  if (m_bfd) {
368  bfd_close(m_bfd);
369  m_bfd = NULL;
370  }
371 
372  delete[] m_bfdSymTab;
373  m_bfdSymTab = NULL;
374 
377 
378  delete[] m_bfdSymTabSort;
380 
381  m_bfdSymTabSz = 0;
382  m_bfdSymTabSortSz = 0;
383  m_bfdSynthTabSz = 0;
384 
385  // reset isa
386  delete isa;
387  isa = NULL;
388 
389  delete m_noreturns;
390  m_noreturns = NULL;
391 }
392 
393 
394 void
395 BinUtil::LM::open(const char* filenm)
396 {
397  DIAG_Assert(Logic::implies(!m_name.empty(), m_name.c_str() == filenm),
398  "Cannot open a different file!");
399 
400  if (simpleSymbolsFactories.find(filenm)) {
401  m_name = filenm;
402  return;
403  }
404 
405  // -------------------------------------------------------
406  // 1. Initialize bfd and open the object file.
407  // -------------------------------------------------------
408 
409  // Determine file existence.
410  bfd_init();
411  m_bfd = bfd_openr(filenm, "default");
412  if (!m_bfd) {
413  BINUTIL_Throw("'" << filenm << "': " << bfd_errmsg(bfd_get_error()));
414  }
415 
416  // bfd_object: may contain data, symbols, relocations and debug info
417  // bfd_archive: contains other BFDs and an optional index
418  // bfd_core: contains the result of an executable core dump
419  if (!bfd_check_format(m_bfd, bfd_object)) {
420  BINUTIL_Throw("'" << filenm << "': not an object or executable");
421  }
422 
423  m_name = filenm;
425 
426  // -------------------------------------------------------
427  // 2. Collect data from BFD
428  // -------------------------------------------------------
429 
430  // Set flags. FIXME: both executable and dynamic flags can be set
431  // on some architectures (e.g. alpha).
432  flagword flags = bfd_get_file_flags(m_bfd);
433  if (flags & EXEC_P) { // BFD is directly executable
434  m_type = TypeExe;
435  }
436  else if (flags & DYNAMIC) { // BFD is a dynamic object
437  m_type = TypeDSO;
438  }
439  else {
440  m_type = TypeNULL;
441  }
442 
443  m_txtBeg = bfd_get_start_address(m_bfd); // entry point
444  m_begVMA = m_txtBeg;
445 
446  // -------------------------------------------------------
447  // 3. Configure ISA.
448  // -------------------------------------------------------
449 
450  // We no longer use binutils to crack instructions on any platform,
451  // so EmptyISA is a stub until we remove binutils entirely.
452 
453  if (! isa) {
454  isa = new EmptyISA;
455  }
456 }
457 
458 
459 void
460 BinUtil::LM::read(const std::set<std::string> &directorySet, LM::ReadFlg readflg)
461 {
462  // Internal sanity check.
463  DIAG_Assert(!m_name.empty(), "Must call LM::Open first");
464 
465  m_readFlags = (ReadFlg)(readflg | LM::ReadFlg_fSeg); // enforce ReadFlg rules
466 
468  if (sf){
469  m_simpleSymbols = sf->create();
470  if (! m_simpleSymbols->parse(directorySet, m_name.c_str())) {
471  // Warning: we cannot parse the load module.
472  // this is not a problem, so we can safely ignore the case
473  }
474  return;
475  }
476 
478  readSegs();
480 }
481 
482 
483 // relocate: Internally, all operations are performed on non-relocated
484 // VMAs. All routines operating on VMAs should call unrelocate(),
485 // which will do the right thing.
486 void
488 {
489  DIAG_Assert(m_txtBeg != 0, "LM::Relocate not supported!");
490  m_textBegReloc = textBegReloc;
491 
492  if (m_textBegReloc == 0) {
493  m_unrelocDelta = 0;
494  }
495  else {
496  //m_unrelocDelta = -(m_textBegReloc - m_txtBeg); // FMZ
498  }
499 }
500 
501 
502 MachInsn*
504 {
505  MachInsn* minsn = NULL;
506  size = 0;
507  Insn* insn = findInsn(vma, 0);
508  if (insn) {
509  size = insn->size();
510  minsn = insn->bits();
511  }
512  return minsn;
513 }
514 
515 
516 bool
518  string& func,
519  string& file, SrcFile::ln& line) /*const*/
520 {
521  bool STATUS = false;
522  func = file = "";
523  line = 0;
524 
525  if (m_simpleSymbols) {
526  STATUS = m_simpleSymbols->findEnclosingFunction(vma, func);
527  return STATUS;
528  }
529 
530  if (!m_bfdSymTab) {
531  return STATUS;
532  }
533 
534  VMA unrelocVMA = unrelocate(vma);
535  VMA opVMA = isa->convertVMAToOpVMA(unrelocVMA, opIndex);
536 
537  // Find the Seg where this vma lives.
538  asection* bfdSeg = NULL;
539  VMA base = 0;
540 
541  Seg* seg = findSeg(opVMA);
542  if (seg) {
543  bfdSeg = bfd_get_section_by_name(m_bfd, seg->name().c_str());
544  base = bfd_section_vma(m_bfd, bfdSeg);
545  }
546 
547  if (!bfdSeg) {
548  return STATUS;
549  }
550 
551  // Obtain the source line information.
552  const char *bfd_func = NULL, *bfd_file = NULL;
553  uint bfd_line = 0;
554  bfd_boolean fnd =
555  bfd_find_nearest_line(m_bfd, bfdSeg, m_bfdSymTab,
556  opVMA - base, &bfd_file, &bfd_func, &bfd_line);
557  if (fnd) {
558  STATUS = (bfd_file && bfd_func && SrcFile::isValid(bfd_line));
559 
560  if (bfd_func) {
561  func = bfd_func;
562  }
563  if (bfd_file) {
564  file = bfd_file;
565  m_realpathMgr.realpath(file);
566  }
567  line = (SrcFile::ln)bfd_line;
568  }
569 
570  return STATUS;
571 }
572 
573 
574 bool
575 BinUtil::LM::findSrcCodeInfo(VMA begVMA, ushort bOpIndex,
576  VMA endVMA, ushort eOpIndex,
577  string& func, string& file,
578  SrcFile::ln& begLine, SrcFile::ln& endLine,
579  unsigned flags) /*const*/
580 {
581  bool STATUS = false;
582  func = file = "";
583  begLine = endLine = 0;
584 
585  // Enforce condition that 'begVMA' <= 'endVMA'. (No need to unrelocate!)
586  VMA begOpVMA = isa->convertVMAToOpVMA(begVMA, bOpIndex);
587  VMA endOpVMA = isa->convertVMAToOpVMA(endVMA, eOpIndex);
588  if (! (begOpVMA <= endOpVMA) ) {
589  VMA tmpVMA = begVMA; // swap 'begVMA' with 'endVMA'
590  begVMA = endVMA;
591  endVMA = tmpVMA;
592  ushort tmpOpIdx = bOpIndex; // swap 'bOpIndex' with 'eOpIndex'
593  bOpIndex = eOpIndex;
594  eOpIndex = tmpOpIdx;
595  }
596 
597  // Attempt to find source file info
598  string func1, func2, file1, file2;
599  bool call1 = findSrcCodeInfo(begVMA, bOpIndex, func1, file1, begLine);
600  bool call2 = findSrcCodeInfo(endVMA, eOpIndex, func2, file2, endLine);
601  STATUS = (call1 && call2);
602 
603  // Error checking and processing: 'func'
604  if (!func1.empty() && !func2.empty()) {
605  func = func1; // prefer the first call
606  if (func1 != func2) {
607  STATUS = false; // we are accross two different functions
608  }
609  }
610  else if (!func1.empty() && func2.empty()) {
611  func = func1;
612  }
613  else if (func1.empty() && !func2.empty()) {
614  func = func2;
615  } // else (func1.empty && func2.empty()): use default values
616 
617  // Error checking and processing: 'file'
618  if (!file1.empty() && !file2.empty()) {
619  file = file1; // prefer the first call
620  if (file1 != file2) {
621  STATUS = false; // we are accross two different files
622  endLine = begLine; // 'endLine' makes no sense since we return 'file1'
623  }
624  }
625  else if (!file1.empty() && file2.empty()) {
626  file = file1;
627  }
628  else if (file1.empty() && !file2.empty()) {
629  file = file2;
630  } // else (file1.empty && file2.empty()): use default values
631 
632  // Error checking and processing: 'begLine' and 'endLine'
633  if (SrcFile::isValid(begLine) && !SrcFile::isValid(endLine)) {
634  endLine = begLine;
635  }
636  else if (SrcFile::isValid(endLine) && !SrcFile::isValid(begLine)) {
637  begLine = endLine;
638  }
639  else if (flags
640  && begLine > endLine) { // perhaps due to insn. reordering...
641  SrcFile::ln tmp = begLine; // but unlikely given the way this is
642  begLine = endLine; // typically called
643  endLine = tmp;
644  }
645 
646  return STATUS;
647 }
648 
649 
650 bool
652  SrcFile::ln &line) const
653 {
654  bool isfound = false;
655  line = 0;
656 
657  VMA vma_ur = unrelocate(vma);
658  VMA opVMA = isa->convertVMAToOpVMA(vma_ur, opIndex);
659 
660  VMAInterval ival(opVMA, opVMA + 1); // [opVMA, opVMA + 1)
661 
663  if (it != m_procMap.end()) {
664  Proc* proc = it->second;
665  line = proc->begLine();
666  isfound = true;
667  }
668  DIAG_MsgIf(DBG_BLD_PROC_MAP, "LM::findProcSrcCodeInfo "
669  << ival.toString() << " = " << line);
670 
671  return isfound;
672 }
673 
674 
675 void
677 {
678  VMA curr_begVMA;
679  VMA curr_endVMA;
680  VMA sml_begVMA = 0;
681  VMA lg_endVMA = 0;
682  for (SegMap::iterator it = m_segMap.begin(); it != m_segMap.end(); ++it) {
683  Seg* seg = it->second;
684  if (seg->type() == Seg::TypeText) {
685  if (!(sml_begVMA || lg_endVMA)) {
686  sml_begVMA = seg->begVMA();
687  lg_endVMA = seg->endVMA();
688  }
689  else {
690  curr_begVMA = seg->begVMA();
691  curr_endVMA = seg->endVMA();
692  if (curr_begVMA < sml_begVMA)
693  sml_begVMA = curr_begVMA;
694  if (curr_endVMA > lg_endVMA)
695  lg_endVMA = curr_endVMA;
696  }
697  }
698  }
699 
700  *begVMA = sml_begVMA;
701  *endVMA = lg_endVMA;
702 }
703 
704 
705 string
706 BinUtil::LM::toString(int flags, const char* pre) const
707 {
708  std::ostringstream os;
709  dump(os, flags, pre);
710  return os.str();
711 }
712 
713 
714 void
715 BinUtil::LM::dump(std::ostream& o, int flags, const char* pre) const
716 {
717  string p(pre);
718  string p1 = p;
719  string p2 = p1 + " ";
720 
721  o << p << "==================== Load Module Dump ====================\n";
722 
723  o << p1 << "BFD version: ";
724 #ifdef BFD_VERSION
725  o << BFD_VERSION << endl;
726 #else
727  o << "-unknown-" << endl;
728 #endif
729 
730  o << std::showbase;
731  o << p1 << "Load Module Information:\n";
732  dumpModuleInfo(o, p2.c_str());
733 
734  o << p1 << "Load Module Contents:\n";
735  dumpme(o, p2.c_str());
736 
737  if (flags & DUMP_Flg_SymTab) {
738  o << p2 << "Symbol Table (" << m_bfdSymTabSz << "):\n";
739  dumpSymTab(o, p2.c_str());
740  }
741 
742  o << p2 << "Sections (" << numSegs() << "):\n";
743  for (SegMap::const_iterator it = segs().begin(); it != segs().end(); ++it) {
744  Seg* seg = it->second;
745  seg->dump(o, flags, p2.c_str());
746  }
747 }
748 
749 
750 void
751 BinUtil::LM::ddump(int code) const
752 {
753  dump(std::cerr, code);
754 }
755 
756 
757 void
758 BinUtil::LM::dumpme(std::ostream& GCC_ATTR_UNUSED o,
759  const char* GCC_ATTR_UNUSED pre) const
760 {
761 }
762 
763 
764 
765 void
766 BinUtil::LM::dumpProcMap(std::ostream& os, unsigned flag,
767  const char* GCC_ATTR_UNUSED pre) const
768 {
769  for (ProcMap::const_iterator it = m_procMap.begin();
770  it != m_procMap.end(); ++it) {
771  os << it->first.toString() << " --> " << std::hex << "Ox" << it->second
772  << std::dec << endl;
773  if (flag != 0) {
774  os << it->second->toString();
775  }
776  }
777 }
778 
779 
780 void
781 BinUtil::LM::ddumpProcMap(unsigned flag) const
782 {
783  dumpProcMap(std::cerr, flag);
784 }
785 
786 //***************************************************************************
787 
788 int
789 BinUtil::LM::cmpBFDSymByVMA(const void* s1, const void* s2)
790 {
791  asymbol *a = (asymbol *)s1;
792  asymbol *b = (asymbol *)s2;
793 
794  // Primary sort key: Symbol's VMA (ascending).
795  if (bfd_asymbol_value(a) < bfd_asymbol_value(b)) {
796  return -1;
797  }
798  else if (bfd_asymbol_value(a) > bfd_asymbol_value(b)) {
799  return 1;
800  }
801  else {
802  return 0;
803  }
804 }
805 
806 
807 void
809 {
810  // -------------------------------------------------------
811  // Read the normal symbol table
812  // -------------------------------------------------------
813  long bytesNeeded = bfd_get_symtab_upper_bound(m_bfd);
814 
815  if (bytesNeeded > 0) {
816  m_bfdSymTab = new asymbol*[bytesNeeded / sizeof(asymbol*)];
817  m_bfdSymTabSz = bfd_canonicalize_symtab(m_bfd, m_bfdSymTab);
818 
819  if (m_bfdSymTabSz == 0) {
820  delete[] m_bfdSymTab;
821  m_bfdSymTab = NULL;
822  DIAG_Msg(2, "'" << name() << "': No regular symbols found.");
823  }
824  }
825 
826  // -------------------------------------------------------
827  // Read the dynamic symbol table
828  // -------------------------------------------------------
829  {
830  bytesNeeded = bfd_get_dynamic_symtab_upper_bound(m_bfd);
831 
832  if (bytesNeeded > 0) {
833  m_bfdDynSymTab = new asymbol*[bytesNeeded / sizeof(asymbol*)];
834  m_bfdDynSymTabSz = bfd_canonicalize_dynamic_symtab(m_bfd, m_bfdDynSymTab);
835  }
836 
837  if (m_bfdDynSymTabSz == 0) {
838  DIAG_Msg(2, "'" << name() << "': No dynamic symbols found.");
839  }
840  }
841 
842  // -------------------------------------------------------
843  // Append the synthetic symbol table to our copy for sorting.
844  // On many platforms this is empty, but it helps on powerpc.
845  //
846  // Note: the synthetic table is an array of asymbol structs,
847  // not an array of pointers, and not null-terminated.
848  // Note: the sorted table may be larger than the original table,
849  // and size is the size of the sorted table (regular + synthetic).
850  // -------------------------------------------------------
851  m_bfdSynthTabSz = bfd_get_synthetic_symtab(m_bfd, m_bfdSymTabSz, m_bfdSymTab,
853  &m_bfdSynthTab);
854  if (m_bfdSynthTabSz < 0) {
855  m_bfdSynthTabSz = 0;
856  }
857 
858  if (m_bfdSynthTabSz == 0) {
859  DIAG_Msg(2, "'" << name() << "': No synthetic symbols found.");
860  }
861 
862  m_bfdSymTabSort = new asymbol*[m_bfdSymTabSz + m_bfdSynthTabSz + 1];
863  memcpy(m_bfdSymTabSort, m_bfdSymTab, m_bfdSymTabSz * sizeof(asymbol *));
864  for (int i = 0; i < m_bfdSynthTabSz; i++) {
866  }
869 
870  // -------------------------------------------------------
871  // Sort symbol table by VMA.
872  // -------------------------------------------------------
873  QuickSort QSort;
874  QSort.Create((void **)(m_bfdSymTabSort), LM::cmpBFDSymByVMA);
875  QSort.Sort(0, m_bfdSymTabSortSz - 1);
876 }
877 
878 
879 void
881 {
882  // Create sections.
883  // Pass symbol table and debug summary information for each section
884  // into that section as it is created.
886 
887  // Process each section in the object file.
888  for (asection* sec = m_bfd->sections; (sec); sec = sec->next) {
889 
890  // 1. Determine initial section attributes
891  string segnm(bfd_section_name(m_bfd, sec));
892  bfd_vma segBeg = bfd_section_vma(m_bfd, sec);
893  uint64_t segSz = bfd_section_size(m_bfd, sec) / bfd_octets_per_byte(m_bfd);
894  bfd_vma segEnd = segBeg + segSz;
895 
896  // 2. Create section
897  Seg* seg = NULL;
898  if (sec->flags & SEC_CODE) {
899  seg = new TextSeg(this, segnm, segBeg, segEnd, segSz);
900  }
901  else {
902  seg = new Seg(this, segnm, Seg::TypeData, segBeg, segEnd, segSz);
903  }
904  bool ins = insertSeg(VMAInterval(segBeg, segEnd), seg);
905  if (!ins) {
906  DIAG_WMsg(3, "Overlapping segment: " << segnm << ": "
907  << std::hex << segBeg << " " << segEnd << std::dec);
908  delete seg;
909  }
910  }
911 
912  m_dbgInfo.clear();
913 }
914 
915 void
917 {
918  m_noreturns = new NoReturns();
919 
920  // gather information about functions in this load module
922 
923  // gather information about functions accessed through the PLT
925 }
926 
927 
928 bool
930 {
931  return m_noreturns->isNoReturn(addr);
932 }
933 
934 
935 void
936 BinUtil::LM::dumpModuleInfo(std::ostream& o, const char* pre) const
937 {
938  string p(pre);
939 
940  o << p << "Name: `" << name() << "'\n";
941 
942  o << p << "Format: `" << bfd_get_target(m_bfd) << "'" << endl;
943  // bfd_get_flavour
944 
945  o << p << "Type: `";
946  switch (type()) {
947  case TypeNULL:
948  o << "Unknown load module type'\n";
949  break;
950  case TypeExe:
951  o << "Executable (fully linked except for possible DSOs)'\n";
952  break;
953  case TypeDSO:
954  o << "Dynamically Shared Library'\n";
955  break;
956  default:
957  DIAG_Die("Invalid load module type: " << type());
958  }
959 
960  o << p << "Load VMA: " << std::hex << m_begVMA << std::dec << "\n";
961 
962  o << p << "Text(beg,end): "
963  << std::hex << textBeg() << ", " << textEnd() << std::dec << "\n";
964 
965  o << p << "Endianness: `"
966  << ( (bfd_big_endian(m_bfd)) ? "Big'\n" : "Little'\n" );
967 
968  o << p << "Architecture: `";
969  switch (bfd_get_arch(m_bfd)) {
970  case bfd_arch_alpha: o << "Alpha'\n"; break;
971  case bfd_arch_mips: o << "MIPS'\n"; break;
972  case bfd_arch_powerpc: o << "POWER'\n"; break;
973  case bfd_arch_sparc: o << "SPARC'\n"; break;
974  case bfd_arch_i386: o << "x86'\n"; break;
975  case bfd_arch_ia64: o << "IA-64'\n"; break;
976 #ifdef bfd_mach_k1om
977  case bfd_arch_k1om: o << "K1OM'\n"; break;
978 #endif
979  default: DIAG_Die("Unknown bfd arch: " << bfd_get_arch(m_bfd));
980  }
981 
982  o << p << "Architectural implementation: `";
983  switch (bfd_get_arch(m_bfd)) {
984  case bfd_arch_alpha:
985  switch (bfd_get_mach(m_bfd)) {
986  case bfd_mach_alpha_ev4: o << "EV4'\n"; break;
987  case bfd_mach_alpha_ev5: o << "EV5'\n"; break;
988  case bfd_mach_alpha_ev6: o << "EV6'\n"; break;
989  default: o << "-unknown Alpha-'\n"; break;
990  }
991  break;
992  case bfd_arch_mips:
993  switch (bfd_get_mach(m_bfd)) {
994  case bfd_mach_mips3000: o << "R3000'\n"; break;
995  case bfd_mach_mips4000: o << "R4000'\n"; break;
996  case bfd_mach_mips6000: o << "R6000'\n"; break;
997  case bfd_mach_mips8000: o << "R8000'\n"; break;
998  case bfd_mach_mips10000: o << "R10000'\n"; break;
999  case bfd_mach_mips12000: o << "R12000'\n"; break;
1000  default: o << "-unknown MIPS-'\n";
1001  }
1002  break;
1003  case bfd_arch_powerpc:
1004  switch (bfd_get_mach(m_bfd)) {
1005  case bfd_mach_ppc: o << "PPC'\n"; break;
1006  case bfd_mach_ppc64: o << "PPC-64'\n"; break;
1007  default: o << "-unknown POWER-'\n";
1008  }
1009  break;
1010  case bfd_arch_sparc:
1011  switch (bfd_get_mach(m_bfd)) {
1012  case bfd_mach_sparc_sparclet: o << "let'\n"; break;
1013  case bfd_mach_sparc_sparclite: o << "lite'\n"; break;
1014  case bfd_mach_sparc_sparclite_le: o << "lite_le'\n"; break;
1015  case bfd_mach_sparc_v8plus: o << "v8plus'\n"; break;
1016  case bfd_mach_sparc_v8plusa: o << "v8plusa'\n"; break;
1017  case bfd_mach_sparc_v8plusb: o << "v8plusb'\n"; break;
1018  case bfd_mach_sparc_v9: o << "v9'\n"; break;
1019  case bfd_mach_sparc_v9a: o << "v9a'\n"; break;
1020  case bfd_mach_sparc_v9b: o << "v9b'\n"; break;
1021  default: o << "-unknown Sparc-'\n";
1022  }
1023  break;
1024  case bfd_arch_i386:
1025  switch (bfd_get_mach(m_bfd)) {
1026  case bfd_mach_i386_i386: o << "x86'\n"; break;
1027  case bfd_mach_i386_i8086: o << "x86 (8086)'\n"; break;
1028  case bfd_mach_x86_64: o << "x86_64'\n"; break;
1029  default: o << "-unknown x86-'\n";
1030  }
1031  break;
1032  case bfd_arch_ia64:
1033  o << "IA-64'\n";
1034  break;
1035 #ifdef bfd_mach_k1om
1036  case bfd_arch_k1om:
1037  o << "K1OM'\n";
1038  break;
1039 #endif
1040  default:
1041  DIAG_Die("Unknown bfd arch: " << bfd_get_arch(m_bfd));
1042  }
1043 
1044  o << p << "Bits per byte: " << bfd_arch_bits_per_byte(m_bfd) << endl;
1045  o << p << "Bits per address: " << bfd_arch_bits_per_address(m_bfd) << endl;
1046  o << p << "Bits per word: " << m_bfd->arch_info->bits_per_word << endl;
1047 }
1048 
1049 
1050 static void
1051 dumpASymbol(std::ostream& o, asymbol* sym, string p1)
1052 {
1053  // value, name, section name
1054  o << p1 << std::hex << std::setw(16)
1055  << (bfd_vma)bfd_asymbol_value(sym) << ": " << std::setw(0) << std::dec
1056  << bfd_asymbol_name(sym)
1057  << " [sec: " << sym->section->name << "] ";
1058 
1059  // flags
1060  o << "[flg: " << std::hex << sym->flags << std::dec << " ";
1061  bool hasPrintedFlag = false;
1062  dumpSymFlag(o, sym, BSF_LOCAL, "LCL", hasPrintedFlag);
1063  dumpSymFlag(o, sym, BSF_GLOBAL, "GBL", hasPrintedFlag);
1064  dumpSymFlag(o, sym, BSF_FUNCTION, "FUNC", hasPrintedFlag);
1065  dumpSymFlag(o, sym, BSF_WEAK, "WEAK", hasPrintedFlag);
1066  dumpSymFlag(o, sym, BSF_SECTION_SYM, "SEC", hasPrintedFlag);
1067  dumpSymFlag(o, sym, BSF_FILE, "FILE", hasPrintedFlag);
1068  dumpSymFlag(o, sym, BSF_DYNAMIC, "DYN", hasPrintedFlag);
1069  dumpSymFlag(o, sym, BSF_OBJECT, "OBJ", hasPrintedFlag);
1070  dumpSymFlag(o, sym, BSF_THREAD_LOCAL, "THR_LCL", hasPrintedFlag);
1071  o << "]";
1072 
1073  if (BinUtil::Proc::isProcBFDSym(sym)) {
1074  o << " *proc*";
1075  }
1076 
1077  o << endl;
1078 }
1079 
1080 
1081 void
1082 BinUtil::LM::dumpSymTab(std::ostream& o, const char* pre) const
1083 {
1084  string p(pre);
1085  string p1 = p + " ";
1086 
1087  o << p << "--------------- Symbol Table Dump (Unsorted) --------------\n";
1088 
1089  if (m_bfdSymTab) {
1090  for (uint i = 0; m_bfdSymTab[i] != NULL; i++) {
1091  dumpASymbol(o, m_bfdSymTab[i], p1);
1092  }
1093  }
1094 
1095  o << p << "--------------- Symbol Table Dump (Synthetic) -------------\n";
1096 
1097  if (m_bfdSynthTabSz) {
1098  for (int i = 0; i < m_bfdSynthTabSz; i++) {
1099  dumpASymbol(o, &m_bfdSynthTab[i], p1);
1100  }
1101  }
1102 
1103  o << p << "-----------------------------------------------------------\n";
1104 }
1105 
1106 
1107 
1108 static void
1109 dumpSymFlag(std::ostream& o,
1110  asymbol* sym, int flag, const char* txt, bool& hasPrinted)
1111 {
1112  if ((sym->flags & flag)) {
1113  if (hasPrinted) {
1114  o << ",";
1115  }
1116  o << txt;
1117  hasPrinted = true; \
1118  }
1119 }
1120 
1121 
1122 //***************************************************************************
1123 // Exe
1124 //***************************************************************************
1125 
1127  : m_startVMA(0)
1128 {
1129 }
1130 
1131 
1133 {
1134 }
1135 
1136 
1137 void
1138 BinUtil::Exe::open(const char* filenm)
1139 {
1140  LM::open(filenm);
1141  if (type() != LM::TypeExe) {
1142  BINUTIL_Throw("'" << filenm << "' is not an executable.");
1143  }
1144 
1145  m_startVMA = bfd_get_start_address(abfd());
1146 }
1147 
1148 
1149 void
1150 BinUtil::Exe::dump(std::ostream& o, int flags, const char* pre) const
1151 {
1152  LM::dump(o, flags, pre);
1153 }
1154 
1155 
1156 void
1157 BinUtil::Exe::dumpme(std::ostream& o, const char* pre) const
1158 {
1159  o << pre << "Program start address: " << std::hex << getStartVMA()
1160  << std::dec << endl;
1161 }
1162 
1163 //***************************************************************************
virtual ~LM()
Definition: LM.cpp:354
std::string toString(int flags=DUMP_Short, const char *pre="") const
Definition: LM.cpp:706
My_t::iterator iterator
virtual VMA convertVMAToOpVMA(VMA vma, ushort GCC_ATTR_UNUSED opIndex) const
Definition: ISA.hpp:471
bool findProcSrcCodeInfo(VMA vma, ushort opIndex, SrcFile::ln &line) const
Definition: LM.cpp:651
unsigned int ln
Definition: SrcFile.hpp:66
void MONITOR_EXT_WRAP_NAME() free(void *ptr)
friend class TextSeg
Definition: LM.hpp:534
VMA m_textBegReloc
Definition: LM.hpp:549
bfd_vma VMA
Definition: ISATypes.hpp:79
static char * tmp
Definition: tokenize.c:63
static void dumpASymbol(std::ostream &o, asymbol *sym, string p1)
Definition: LM.cpp:1051
int find(char s1[], char s2[])
Definition: CStrUtil.cpp:177
NoReturns * m_noreturns
Definition: LM.hpp:580
virtual ~Exe()
Definition: LM.cpp:1132
void textBegEndVMA(VMA *begVMA, VMA *endVMA)
Definition: LM.cpp:676
void ddumpProcMap(unsigned flag) const
Definition: LM.cpp:781
long m_bfdSymTabSz
Definition: LM.hpp:575
VMA endVMA() const
Definition: Seg.hpp:126
Type type() const
Definition: Seg.hpp:114
SrcFile::ln begLine() const
Definition: Proc.hpp:186
bool isValid(SrcFile::ln line)
Definition: SrcFile.hpp:70
void Sort(const int minEntryIndex, const int maxEntryIndex)
Definition: QuickSort.cpp:133
void relocate(VMA textBegReloc)
Definition: LM.cpp:487
virtual void read(const std::set< std::string > &directorySet, ReadFlg readflg)
Definition: LM.cpp:460
void addSymEntries(asymbol **syms, long symcount)
Definition: LM.cpp:208
Type m_type
Definition: LM.hpp:543
void ddump(int code=DUMP_Long_decode) const
Definition: LM.cpp:751
ProcMap m_procMap
Definition: LM.hpp:559
bool insertSeg(VMAInterval ival, Seg *seg)
Definition: LM.hpp:255
bool functionNeverReturns(VMA addr)
Definition: LM.cpp:929
void addSynSymEntries(asymbol *syms, long symcount)
Definition: LM.cpp:164
VMA textBeg() const
Definition: LM.hpp:185
BinUtil::Dbg::LM m_dbgInfo
Definition: LM.hpp:563
Type type() const
Definition: LM.hpp:175
MachInsn * findMachInsn(VMA vma, ushort &size) const
Definition: LM.cpp:503
name_set noreturn_fn_names
Definition: LM.cpp:154
virtual void open(const char *filenm)
Definition: LM.cpp:1138
VMA m_startVMA
Definition: LM.hpp:639
#define DIAG_MsgIf(ifexpr,...)
Definition: diagnostics.h:236
const std::string & name() const
Definition: Seg.hpp:110
asymbol ** m_bfdDynSymTab
Definition: LM.hpp:572
bool operator()(const char *s1, const char *s2) const
Definition: LM.cpp:129
const std::string & name() const
Definition: LM.hpp:170
unsigned short int ushort
Definition: uint.h:120
bool realpath(std::string &pathNm) const
virtual MachInsn * bits() const
Definition: Insn.hpp:116
unsigned int uint
Definition: uint.h:124
#define NORETURNS_DISABLE
Definition: LM.cpp:103
static void dumpSymFlag(std::ostream &o, asymbol *sym, int flag, const char *txt, bool &hasPrinted)
Definition: LM.cpp:1109
virtual bool parse(const std::set< std::string > &directorySet, const char *pathname)=0
VMA textEnd() const
Definition: LM.hpp:189
VMASigned m_unrelocDelta
Definition: LM.hpp:550
RealPathMgr & m_realpathMgr
Definition: LM.hpp:582
virtual ushort size() const =0
long m_bfdSynthTabSz
Definition: LM.hpp:578
virtual void dumpProcMap(std::ostream &o=std::cerr, unsigned flag=0, const char *pre="") const
Definition: LM.cpp:766
static const char * noreturn_table[]
Definition: LM.cpp:114
asymbol ** m_bfdSymTabSort
Definition: LM.hpp:574
Definition: ISA.hpp:106
uint numSegs() const
Definition: LM.hpp:264
ReadFlg m_readFlags
Definition: LM.hpp:544
asymbol * m_bfdSynthTab
Definition: LM.hpp:573
SimpleSymbolsFactories simpleSymbolsFactories
bfd * m_bfd
Definition: LM.hpp:570
VMA begVMA() const
Definition: Seg.hpp:122
long m_bfdSymTabSortSz
Definition: LM.hpp:577
SimpleSymbolsFactory * find(const char *pathname)
void addIfNoReturn(const char *name, uint64_t addr)
Definition: LM.cpp:299
void dumpModuleInfo(std::ostream &o=std::cerr, const char *pre="") const
Definition: LM.cpp:936
My_t::const_iterator const_iterator
#define DIAG_Msg(level,...)
Definition: diagnostics.h:241
virtual void dumpme(std::ostream &o=std::cerr, const char *pre="") const
asymbol ** m_bfdSymTab
Definition: LM.hpp:571
void readSegs()
Definition: LM.cpp:880
SegMap & segs()
Definition: LM.hpp:236
static int cmpBFDSymByVMA(const void *s1, const void *s2)
Definition: LM.cpp:789
virtual void dump(std::ostream &o=std::cerr, int flags=LM::DUMP_Short, const char *pre="") const
Definition: Seg.cpp:130
SegMap m_segMap
Definition: LM.hpp:558
void Create(void **UserArrayPtr, const EntryCompareFunctPtr _CompareFunct)
Definition: QuickSort.cpp:76
virtual void dump(std::ostream &o=std::cerr, int flags=DUMP_Short, const char *pre="") const
Definition: LM.cpp:1150
void MachInsn
Definition: ISATypes.hpp:87
void dumpSymTab(std::ostream &o=std::cerr, const char *pre="") const
Definition: LM.cpp:1082
VMA unrelocate(VMA relocVMA) const
Definition: LM.hpp:520
void read(bfd *abfd, asymbol **bfdSymTab)
Definition: Dbg-LM.cpp:108
Seg * findSeg(VMA vma) const
Definition: LM.hpp:244
virtual void dump(std::ostream &o=std::cerr, int flags=DUMP_Short, const char *pre="") const
Definition: LM.cpp:715
VMA m_begVMA
Definition: LM.hpp:547
#define NULL
Definition: ElfHelper.cpp:85
long m_bfdDynSymTabSz
Definition: LM.hpp:576
bool isNoReturn(VMA addr)
Definition: LM.cpp:293
static bool isProcBFDSym(asymbol *sym)
Definition: Proc.hpp:283
std::string toString() const
Definition: VMAInterval.cpp:93
void dump()
Definition: LM.cpp:310
#define BINUTIL_Throw(streamArgs)
Definition: LM.hpp:649
Insn * findInsn(VMA vma, ushort opIndex) const
Definition: LM.hpp:335
iterator find(const key_type &toFind)
std::string m_name
Definition: LM.hpp:541
#define DBG_BLD_PROC_MAP
Definition: LM.cpp:101
void readSymbolTables()
Definition: LM.cpp:808
bfd * abfd() const
Definition: LM.hpp:432
VMA getStartVMA() const
Definition: LM.hpp:619
bool findSrcCodeInfo(VMA vma, ushort opIndex, std::string &func, std::string &file, SrcFile::ln &line)
VMA m_txtBeg
Definition: LM.hpp:546
bool findEnclosingFunction(uint64_t vma, std::string &fnName)
#define DIAG_Die(...)
Definition: diagnostics.h:267
cct_addr_t * addr
Definition: cct.c:130
#define GCC_ATTR_UNUSED
Definition: gcc-attr.h:80
bool implies(bool p, bool q)
Definition: Logic.hpp:114
void dumpnames()
Definition: LM.cpp:145
static ISA * isa
Definition: LM.hpp:493
void computeNoReturns()
Definition: LM.cpp:916
virtual void dumpme(std::ostream &o=std::cerr, const char *pre="") const
Definition: LM.cpp:1157
LM(bool useBinutils=false)
Definition: LM.cpp:340
SimpleSymbols * m_simpleSymbols
Definition: LM.hpp:585
virtual void open(const char *filenm)
Definition: LM.cpp:395
InsnMap m_insnMap
Definition: LM.hpp:560
virtual SimpleSymbols * create()=0