bfd_support.cpp

Go to the documentation of this file.
00001 
00011 #include "bfd_support.h"
00012 
00013 #include "op_bfd.h"
00014 #include "op_fileio.h"
00015 #include "op_config.h"
00016 #include "string_manip.h"
00017 #include "file_manip.h"
00018 #include "cverb.h"
00019 #include "locate_images.h"
00020 
00021 #include <cstdlib>
00022 #include <cstring>
00023 #include <cassert>
00024 #include <iostream>
00025 #include <fstream>
00026 #include <sstream>
00027 #include <string>
00028 #include <cstring>
00029 #include <cstdlib>
00030 
00031 using namespace std;
00032 
00033 extern verbose vbfd;
00034 
00035 namespace {
00036 
00037 
00038 void check_format(string const & file, bfd ** ibfd)
00039 {
00040     if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
00041         cverb << vbfd << "BFD format failure for " << file << endl;
00042         bfd_close(*ibfd);
00043         *ibfd = NULL;
00044     }
00045 }
00046 
00047 
00048 bool separate_debug_file_exists(string & name, unsigned long const crc, 
00049                                 extra_images const & extra)
00050 {
00051     unsigned long file_crc = 0;
00052     // The size of 2 * 1024 elements for the buffer is arbitrary.
00053     char buffer[2 * 1024];
00054 
00055     image_error img_ok;
00056     string const image_path = extra.find_image_path(name, img_ok, true);
00057 
00058     if (img_ok != image_ok)
00059         return false;
00060 
00061     name = image_path;
00062 
00063     ifstream file(image_path.c_str());
00064     if (!file)
00065         return false;
00066 
00067     cverb << vbfd << "found " << name;
00068     while (file) {
00069         file.read(buffer, sizeof(buffer));
00070         file_crc = calc_crc32(file_crc, 
00071                       reinterpret_cast<unsigned char *>(&buffer[0]),
00072                       file.gcount());
00073     }
00074     cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
00075     return crc == file_crc;
00076 }
00077 
00078 
00079 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
00080 {
00081     asection * sect;
00082 
00083     cverb << vbfd << "fetching .gnu_debuglink section" << endl;
00084     sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
00085     
00086     if (sect == NULL)
00087         return false;
00088     
00089     bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);  
00090     char contents[debuglink_size];
00091     cverb << vbfd
00092           << ".gnu_debuglink section has size " << debuglink_size << endl;
00093     
00094     if (!bfd_get_section_contents(ibfd, sect, 
00095                  reinterpret_cast<unsigned char *>(contents), 
00096                  static_cast<file_ptr>(0), debuglink_size)) {
00097         bfd_perror("bfd_get_section_contents:get_debug:");
00098         exit(2);
00099     }
00100     
00101     /* CRC value is stored after the filename, aligned up to 4 bytes. */
00102     size_t filename_len = strlen(contents);
00103     size_t crc_offset = filename_len + 1;
00104     crc_offset = (crc_offset + 3) & ~3;
00105     
00106     crc32 = bfd_get_32(ibfd, 
00107                    reinterpret_cast<bfd_byte *>(contents + crc_offset));
00108     filename = string(contents, filename_len);
00109     cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
00110     return true;
00111 }
00112 
00113 
00126 bool objc_match(string const & sym, string const & method)
00127 {
00128     if (method.length() < 3)
00129         return false;
00130 
00131     string mangled;
00132 
00133     if (is_prefix(method, "-[")) {
00134         mangled += "_i_";
00135     } else if (is_prefix(method, "+[")) {
00136         mangled += "_c_";
00137     } else {
00138         return false;
00139     }
00140 
00141     string::const_iterator it = method.begin() + 2;
00142     string::const_iterator const end = method.end();
00143 
00144     bool found_paren = false;
00145 
00146     for (; it != end; ++it) {
00147         switch (*it) {
00148         case ' ':
00149             mangled += '_';
00150             if (!found_paren)
00151                 mangled += '_';
00152             break;
00153         case ':':
00154             mangled += '_';
00155             break;
00156         case ')':
00157         case ']':
00158             break;
00159         case '(':
00160             found_paren = true;
00161             mangled += '_';
00162             break;
00163         default:
00164             mangled += *it; 
00165         }
00166     }
00167 
00168     return sym == mangled;
00169 }
00170 
00171 
00172 /*
00173  * With a binary image where some objects are missing debug
00174  * info, we can end up attributing to a completely different
00175  * function (#484660): bfd_nearest_line() will happily move from one
00176  * symbol to the nearest one it can find with debug information.
00177  * To mitigate this problem, we check that the symbol name
00178  * matches the returned function name.
00179  *
00180  * However, this check fails in some cases it shouldn't:
00181  * Objective C, and C++ static inline functions (as discussed in
00182  * GCC bugzilla #11774). So, we have a looser check that
00183  * accepts merely a substring, plus some magic for Objective C.
00184  *
00185  * If even the loose check fails, then we give up.
00186  */
00187 bool is_correct_function(string const & function, string const & name)
00188 {
00189     if (name == function)
00190         return true;
00191 
00192     if (objc_match(name, function))
00193         return true;
00194 
00195     // warn the user if we had to use the loose check
00196     if (name.find(function) != string::npos) {
00197         static bool warned = false;
00198         if (!warned) {
00199             cerr << "warning: some functions compiled without "
00200                  << "debug information may have incorrect source "
00201                  << "line attributions" << endl;
00202                 warned = true;
00203         }
00204         cverb << vbfd << "is_correct_function(" << function << ", "
00205               << name << ") fuzzy match." << endl;
00206         return true;
00207     }
00208 
00209     return false;
00210 }
00211 
00212 
00213 /*
00214  * binutils 2.12 and below have a small bug where functions without a
00215  * debug entry at the prologue start do not give a useful line number
00216  * from bfd_find_nearest_line(). This can happen with certain gcc
00217  * versions such as 2.95.
00218  *
00219  * We work around this problem by scanning forward for a vma with valid
00220  * linenr info, if we can't get a valid line number.  Problem uncovered
00221  * by Norbert Kaufmann. The work-around decreases, on the tincas
00222  * application, the number of failure to retrieve linenr info from 835
00223  * to 173. Most of the remaining are c++ inline functions mainly from
00224  * the STL library. Fix #529622
00225  */
00226 void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
00227           string const & name, bfd_vma pc,
00228                   char const ** filename, unsigned int * line)
00229 {
00230     char const * cfilename;
00231     char const * function;
00232     unsigned int linenr;
00233 
00234     // FIXME: looking at debug info for all gcc version shows than
00235     // the same problems can -perhaps- occur for epilog code: find a
00236     // samples files with samples in epilog and try opreport -l -g
00237     // on it, check it also with opannotate.
00238 
00239     // first restrict the search on a sensible range of vma, 16 is
00240     // an intuitive value based on epilog code look
00241     size_t max_search = 16;
00242     size_t section_size = bfd_section_size(abfd, section);
00243     if (pc + max_search > section_size)
00244         max_search = section_size - pc;
00245 
00246     for (size_t i = 1; i < max_search; ++i) {
00247         bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
00248                          &cfilename, &function,
00249                          &linenr);
00250 
00251         if (ret && cfilename && function && linenr != 0
00252             && is_correct_function(function, name)) {
00253             *filename = cfilename;
00254             *line = linenr;
00255             return;
00256         }
00257     }
00258 }
00259 
00260 
00261 } // namespace anon
00262 
00263 
00264 bfd * open_bfd(string const & file)
00265 {
00266     /* bfd keeps its own reference to the filename char *,
00267      * so it must have a lifetime longer than the ibfd */
00268     bfd * ibfd = bfd_openr(file.c_str(), NULL);
00269     if (!ibfd) {
00270         cverb << vbfd << "bfd_openr failed for " << file << endl;
00271         return NULL;
00272     }
00273 
00274     check_format(file, &ibfd);
00275 
00276     return ibfd;
00277 }
00278 
00279 
00280 bfd * fdopen_bfd(string const & file, int fd)
00281 {
00282     /* bfd keeps its own reference to the filename char *,
00283      * so it must have a lifetime longer than the ibfd */
00284     bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
00285     if (!ibfd) {
00286         cverb << vbfd << "bfd_openr failed for " << file << endl;
00287         return NULL;
00288     }
00289 
00290     check_format(file, &ibfd);
00291 
00292     return ibfd;
00293 }
00294 
00295 
00296 bool find_separate_debug_file(bfd * ibfd, string const & filepath_in, 
00297                               string & debug_filename, extra_images const & extra)
00298 {
00299     string filepath(filepath_in);
00300     string basename;
00301     unsigned long crc32;
00302     
00303     if (!get_debug_link_info(ibfd, basename, crc32))
00304         return false;
00305 
00306     // Work out the image file's directory prefix
00307     string filedir = op_dirname(filepath);
00308     // Make sure it starts with /
00309     if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
00310         filedir += '/';
00311 
00312     string first_try(filedir + ".debug/" + basename);
00313     string second_try(DEBUGDIR + filedir + basename);
00314     string third_try(filedir + basename);
00315 
00316     cverb << vbfd << "looking for debugging file " << basename 
00317           << " with crc32 = " << hex << crc32 << endl;
00318 
00319     if (separate_debug_file_exists(first_try, crc32, extra)) 
00320         debug_filename = first_try; 
00321     else if (separate_debug_file_exists(second_try, crc32, extra))
00322         debug_filename = second_try;
00323     else if (separate_debug_file_exists(third_try, crc32, extra))
00324         debug_filename = third_try;
00325     else
00326         return false;
00327     
00328     return true;
00329 }
00330 
00331 
00332 bool interesting_symbol(asymbol * sym)
00333 {
00334     // #717720 some binutils are miscompiled by gcc 2.95, one of the
00335     // typical symptom can be catched here.
00336     if (!sym->section) {
00337         ostringstream os;
00338         os << "Your version of binutils seems to have a bug.\n"
00339            << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
00340         throw op_runtime_error(os.str());
00341     }
00342 
00343     if (!(sym->section->flags & SEC_CODE))
00344         return false;
00345 
00346     // returning true for fix up in op_bfd_symbol()
00347     if (!sym->name || sym->name[0] == '\0')
00348         return true;
00349     /* ARM assembler internal mapping symbols aren't interesting */
00350     if ((strcmp("$a", sym->name) == 0) ||
00351         (strcmp("$t", sym->name) == 0) ||
00352         (strcmp("$d", sym->name) == 0))
00353         return false;
00354 
00355     // C++ exception stuff
00356     if (sym->name[0] == '.' && sym->name[1] == 'L')
00357         return false;
00358 
00359     /* This case cannot be moved to boring_symbol(),
00360      * because that's only used for duplicate VMAs,
00361      * and sometimes this symbol appears at an address
00362      * different from all other symbols.
00363      */
00364     if (!strcmp("gcc2_compiled.", sym->name))
00365         return false;
00366 
00367     /* Commit ab45a0cc5d1cf522c1aef8f22ed512a9aae0dc1c removed a check for
00368      * the SEC_LOAD bit.  See the commit message for details why this
00369      * was removed.
00370      */
00371 
00372         if (sym->flags & BSF_SECTION_SYM)
00373                 return false;
00374 
00375     return true;
00376 }
00377 
00378 
00379 bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
00380 {
00381     if (first.name() == "Letext")
00382         return true;
00383     else if (second.name() == "Letext")
00384         return false;
00385 
00386     if (first.name().substr(0, 2) == "??")
00387         return true;
00388     else if (second.name().substr(0, 2) == "??")
00389         return false;
00390 
00391     if (first.hidden() && !second.hidden())
00392         return true;
00393     else if (!first.hidden() && second.hidden())
00394         return false;
00395 
00396     if (first.name()[0] == '_' && second.name()[0] != '_')
00397         return true;
00398     else if (first.name()[0] != '_' && second.name()[0] == '_')
00399         return false;
00400 
00401     if (first.weak() && !second.weak())
00402         return true;
00403     else if (!first.weak() && second.weak())
00404         return false;
00405 
00406     return false;
00407 }
00408 
00409 
00410 bool bfd_info::has_debug_info() const
00411 {
00412     if (!valid())
00413         return false;
00414 
00415     for (asection const * sect = abfd->sections; sect; sect = sect->next) {
00416         if (sect->flags & SEC_DEBUGGING)
00417             return true;
00418     }
00419 
00420     return false;
00421 }
00422 
00423 
00424 bfd_info::~bfd_info()
00425 {
00426     free(synth_syms);
00427     close();
00428 }
00429 
00430 
00431 void bfd_info::close()
00432 {
00433     if (abfd)
00434         bfd_close(abfd);
00435 }
00436 
00437 #if SYNTHESIZE_SYMBOLS
00438 
00453 void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
00454 {
00455     unsigned int img_sect_cnt = 0;
00456     bfd_vma vma_adj;
00457     bfd * image_bfd = image_bfd_info->abfd;
00458     multimap<string, bfd_section *> image_sections;
00459 
00460     for (bfd_section * sect = image_bfd->sections;
00461          sect && img_sect_cnt < image_bfd->section_count;
00462          sect = sect->next) {
00463         // A comment section marks the end of the needed sections
00464         if (strstr(sect->name, ".comment") == sect->name)
00465             break;
00466         image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
00467         img_sect_cnt++;
00468     }
00469 
00470     asymbol * sym = dbg_syms[0];
00471     string prev_sect_name = "";
00472     bfd_section * matched_section = NULL;
00473     vma_adj = image_bfd->start_address - abfd->start_address;
00474     for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
00475         bool section_switch;
00476 
00477         if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
00478             section_switch = true;
00479             prev_sect_name = sym->section->name;
00480         } else {
00481             section_switch = false;
00482         }
00483         if (sym->section->owner && sym->section->owner == abfd) {
00484             if (section_switch ) {
00485                 matched_section = NULL;
00486                 multimap<string, bfd_section *>::iterator it;
00487                 pair<multimap<string, bfd_section *>::iterator,
00488                      multimap<string, bfd_section *>::iterator> range;
00489 
00490                 range = image_sections.equal_range(sym->section->name);
00491                 for (it = range.first; it != range.second; it++) {
00492                     if ((*it).second->vma == sym->section->vma + vma_adj) {
00493                         matched_section = (*it).second;
00494                         if (vma_adj)
00495                             section_vma_maps[(*it).second->vma] = sym->section->vma;
00496                         break;
00497                     }
00498                 }
00499             }
00500             if (matched_section) {
00501                 sym->section = matched_section;
00502                 sym->the_bfd = image_bfd;
00503             }
00504         }
00505     }
00506 }
00507 
00508 bool bfd_info::get_synth_symbols()
00509 {
00510     extern const bfd_target bfd_elf64_powerpc_vec;
00511     extern const bfd_target bfd_elf64_powerpcle_vec;
00512     bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
00513         || (abfd->xvec == &bfd_elf64_powerpcle_vec);
00514 
00515     if (!is_elf64_powerpc_target)
00516         return false;
00517 
00518     void * buf;
00519     uint tmp;
00520     long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
00521     if (nr_mini_syms < 1)
00522         return false;
00523 
00524     asymbol ** mini_syms = (asymbol **)buf;
00525     buf = NULL;
00526     bfd * synth_bfd;
00527 
00528     /* For ppc64, a debuginfo file by itself does not hold enough symbol
00529      * information for us to properly attribute samples to symbols.  If
00530      * the image file's bfd has no symbols (as in a super-stripped library),
00531      * then we need to do the extra processing in translate_debuginfo_syms.
00532      */
00533     if (image_bfd_info && image_bfd_info->nr_syms == 0) {
00534         translate_debuginfo_syms(mini_syms, nr_mini_syms);
00535         synth_bfd = image_bfd_info->abfd;
00536     } else
00537         synth_bfd = abfd;
00538     
00539     long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
00540                                                   nr_mini_syms,
00541                                                   mini_syms, 0,
00542                                                   NULL, &synth_syms);
00543 
00544     if (nr_synth_syms < 0) {
00545         free(mini_syms);
00546         return false;
00547     }
00548 
00549     /* If we called translate_debuginfo_syms() above, then we had to map
00550      * the debuginfo symbols' sections to the sections of the runtime binary.
00551      * We had to twist ourselves in this knot due to the peculiar requirements
00552      * of bfd_get_synthetic_symtab().  While doing this mapping, we cached
00553      * the original section VMAs because we need those original values in
00554      * order to properly match up sample offsets with debug data.  So now that
00555      * we're done with bfd_get_synthetic_symtab, we can restore these section
00556      * VMAs.
00557      */
00558     if (section_vma_maps.size()) {
00559         unsigned int sect_count = 0;
00560         for (bfd_section * sect = synth_bfd->sections;
00561              sect && sect_count < synth_bfd->section_count;
00562              sect = sect->next) {
00563             sect->vma = section_vma_maps[sect->vma];
00564             sect_count++;
00565         }
00566     }
00567 
00568 
00569     cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
00570     cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
00571 
00572     nr_syms = nr_mini_syms + nr_synth_syms;
00573     syms.reset(new asymbol *[nr_syms + 1]);
00574 
00575     for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
00576         syms[i] = mini_syms[i];
00577 
00578 
00579     for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
00580         syms[nr_mini_syms + i] = synth_syms + i;
00581     
00582 
00583     free(mini_syms);
00584 
00585     // bfd_canonicalize_symtab does this, so shall we
00586     syms[nr_syms] = NULL;
00587 
00588     return true;
00589 }
00590 #else
00591 bool bfd_info::get_synth_symbols()
00592 {
00593     return false;
00594 }
00595 #endif /* SYNTHESIZE_SYMBOLS */
00596 
00597 
00598 void bfd_info::get_symbols()
00599 {
00600     if (!abfd)
00601         return;
00602 
00603     cverb << vbfd << "bfd_info::get_symbols() for "
00604           << bfd_get_filename(abfd) << endl;
00605 
00606     if (get_synth_symbols())
00607         return;
00608 
00609     if (bfd_get_file_flags(abfd) & HAS_SYMS)
00610         nr_syms = bfd_get_symtab_upper_bound(abfd);
00611 
00612     cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
00613           << nr_syms << hex << endl;
00614 
00615     nr_syms /= sizeof(asymbol *);
00616 
00617     if (nr_syms < 1) {
00618         if (!image_bfd_info)
00619             return;
00620         syms.reset();
00621         cverb << vbfd << "Debuginfo has debug data only" << endl;
00622     } else {
00623         syms.reset(new asymbol *[nr_syms]);
00624         nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
00625         cverb << vbfd << "bfd_canonicalize_symtab: " << dec
00626               << nr_syms << hex << endl;
00627     }
00628 }
00629 
00630 
00631 linenr_info const
00632 find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
00633                   bfd_vma offset, bool anon_obj)
00634 {
00635     char const * function = "";
00636     char const * cfilename = "";
00637     unsigned int linenr = 0;
00638     linenr_info info;
00639     bfd * abfd;
00640     asymbol ** syms;
00641     asection * section = NULL;
00642     asymbol * empty_syms[1];
00643     bfd_vma pc;
00644     bool ret;
00645 
00646     if (!b.valid())
00647         goto fail;
00648 
00649     // take care about artificial symbol
00650     if (!sym.symbol())
00651         goto fail;
00652 
00653     abfd = b.abfd;
00654     syms = b.syms.get();
00655     if (!syms) {
00656         // If this bfd_info object has no syms, that implies that we're
00657         // using a debuginfo bfd_info object that has only debug data.
00658         // This also implies that the passed sym is from the runtime binary,
00659         // and thus it's section is also from the runtime binary.  And
00660         // since section VMA can be different for a runtime binary (prelinked)
00661         // and its associated debuginfo, we need to obtain the debuginfo
00662         // section to pass to the libbfd functions.
00663         asection * sect_candidate;
00664         bfd_vma vma_adj = b.get_image_bfd_info()->abfd->start_address - abfd->start_address;
00665         if (vma_adj == 0)
00666             section = sym.symbol()->section;
00667         for (sect_candidate = abfd->sections;
00668              (sect_candidate != NULL) && (section == NULL);
00669              sect_candidate = sect_candidate->next) {
00670             if (sect_candidate->vma + vma_adj == sym.symbol()->section->vma) {
00671                 section = sect_candidate;
00672             }
00673         }
00674         if (section == NULL) {
00675             cerr << "ERROR: Unable to find section for symbol " << sym.symbol()->name << endl;
00676             goto fail;
00677         }
00678         syms = empty_syms;
00679         syms[0] = NULL;
00680 
00681     } else {
00682         section = sym.symbol()->section;
00683     }
00684     if (anon_obj)
00685         pc = offset - sym.symbol()->section->vma;
00686     else
00687         pc = (sym.value() + offset) - sym.filepos();
00688 
00689     if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
00690         goto fail;
00691 
00692     if (pc >= bfd_section_size(abfd, section))
00693         goto fail;
00694 
00695     ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
00696                                      &function, &linenr);
00697 
00698     if (!ret || !cfilename || !function)
00699         goto fail;
00700 
00701     /*
00702      * is_correct_function does not handle the case of static inlines,
00703      * but if the linenr is non-zero in the inline case, it is the correct
00704      * line number.
00705      */
00706     if (linenr == 0 && !is_correct_function(function, sym.name()))
00707         goto fail;
00708 
00709     if (linenr == 0) {
00710         fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
00711                      &linenr);
00712     }
00713 
00714     info.found = true;
00715     info.filename = cfilename;
00716     info.line = linenr;
00717     return info;
00718 
00719 fail:
00720     info.found = false;
00721     // some stl lacks string::clear()
00722     info.filename.erase(info.filename.begin(), info.filename.end());
00723     info.line = 0;
00724     return info;
00725 }

Generated on 8 Nov 2012 for Oprofile by  doxygen 1.6.1