parse_filename.cpp

Go to the documentation of this file.
00001 
00011 #include <stdexcept>
00012 #include <vector>
00013 #include <string>
00014 #include <iostream>
00015 #include <sys/stat.h>
00016 
00017 #include "parse_filename.h"
00018 #include "file_manip.h"
00019 #include "string_manip.h"
00020 #include "locate_images.h"
00021 
00022 using namespace std;
00023 
00024 namespace {
00025 
00026 // PP:3.19 event_name.count.unitmask.tgid.tid.cpu
00027 parsed_filename parse_event_spec(string const & event_spec)
00028 {
00029     typedef vector<string> parts_type;
00030     typedef parts_type::size_type size_type;
00031 
00032     size_type const nr_parts = 6;
00033 
00034     parts_type parts = separate_token(event_spec, '.');
00035 
00036     if (parts.size() != nr_parts) {
00037         throw invalid_argument("parse_event_spec(): bad event specification: " + event_spec);
00038     }
00039 
00040     for (size_type i = 0; i < nr_parts ; ++i) {
00041         if (parts[i].empty()) {
00042             throw invalid_argument("parse_event_spec(): bad event specification: " + event_spec);
00043         }
00044     }
00045 
00046     parsed_filename result;
00047 
00048     size_type i = 0;
00049     result.event = parts[i++];
00050     result.count = parts[i++];
00051     result.unitmask = parts[i++];
00052     result.tgid = parts[i++];
00053     result.tid = parts[i++];
00054     result.cpu = parts[i++];
00055 
00056     return result;
00057 }
00058 
00059 
00065 void remove_base_dir(vector<string> & path)
00066 {
00067     vector<string>::iterator it;
00068     for (it = path.begin(); it != path.end(); ++it) {
00069         if (*it == "{root}" || *it == "{kern}"  || *it == "{anon}")
00070             break;
00071     }
00072 
00073     path.erase(path.begin(), it);
00074 }
00075 
00076 
00080 string const parse_anon(string const & str, string const & str2)
00081 {
00082     string name = str2;
00083     // Get rid of "{anon:
00084     name.erase(0, 6);
00085     // Catch the case where we end up with an empty string.  This should
00086     // never happen, except where things have gone awfully bad with profile
00087     // data collection, resulting in one or more bogus sample files.
00088     if(0 == name.size())
00089         throw invalid_argument("parse_anon() invalid name: " + str2 + "\n"
00090             + "This error indicates your sample data is suspect. It is "
00091             + "recommended you do a --reset and collect new profile data.");
00092     // Get rid of the trailing '}'
00093     name.erase(name.size() - 1, 1);
00094     vector<string> parts = separate_token(str, '.');
00095     if (parts.size() != 3)
00096         throw invalid_argument("parse_anon() invalid name: " + str);
00097 
00098     string ret = name +" (tgid:";
00099     ret += parts[0] + " range:" + parts[1] + "-" + parts[2] + ")";
00100     return ret;
00101 }
00102 
00103 
00104 }  // anonymous namespace
00105 
00106 
00107 /*
00108  *  valid filename are variations on:
00109  *
00110  * {kern}/name/event_spec
00111  * {root}/path/to/bin/{dep}/{root}/path/to/bin/event_spec
00112  * {root}/path/to/bin/{dep}/{anon:anon}/pid.start.end/event_spec
00113  * {root}/path/to/bin/{dep}/{anon:[vdso]}/pid.start.end/event_spec
00114  * {root}/path/to/bin/{dep}/{kern}/name/event_spec
00115  * {root}/path/to/bin/{dep}/{root}/path/to/bin/{cg}/{root}/path/to/bin/event_spec
00116 
00117  *
00118  * where /name/ denote a unique path component
00119  */
00120 parsed_filename parse_filename(string const & filename,
00121                    extra_images const & extra_found_images)
00122 {
00123     struct stat st;
00124 
00125     string::size_type pos = filename.find_last_of('/');
00126     if (pos == string::npos) {
00127         throw invalid_argument("parse_filename() invalid filename: " +
00128                        filename);
00129     }
00130     string event_spec = filename.substr(pos + 1);
00131     string filename_spec = filename.substr(0, pos);
00132 
00133     parsed_filename result = parse_event_spec(event_spec);
00134 
00135     result.filename = filename;
00136 
00137     vector<string> path = separate_token(filename_spec, '/');
00138 
00139     remove_base_dir(path);
00140 
00141     // pp_interface PP:3.19 to PP:3.23 path must start either with {root}
00142     // or {kern} and we must found at least 2 component, remove_base_dir()
00143     // return an empty path if {root} or {kern} are not found
00144     if (path.size() < 2) {
00145         throw invalid_argument("parse_filename() invalid filename: " +
00146                        filename);
00147     }
00148 
00149     size_t i;
00150     for (i = 1 ; i < path.size() ; ++i) {
00151         if (path[i] == "{dep}")
00152             break;
00153 
00154         result.image += "/" + path[i];
00155     }
00156 
00157     if (i == path.size()) {
00158         throw invalid_argument("parse_filename() invalid filename: " +
00159                        filename);
00160     }
00161 
00162     // skip "{dep}"
00163     ++i;
00164 
00165     // PP:3.19 {dep}/ must be followed by {kern}/, {root}/ or {anon}/
00166     if (path[i] != "{kern}" && path[i] != "{root}" &&
00167         path[i].find("{anon", 0) != 0) {
00168         throw invalid_argument("parse_filename() invalid filename: " +
00169                        filename);
00170     }
00171 
00172     bool anon = path[i].find("{anon:", 0) == 0;
00173 
00174     // skip "{root}", "{kern}" or "{anon:.*}"
00175     ++i;
00176 
00177     for (; i < path.size(); ++i) {
00178         if (path[i] == "{cg}")
00179             break;
00180 
00181         if (anon) {
00182             pos = filename_spec.rfind('.');
00183             pos = filename_spec.rfind('.', pos-1);
00184             if (pos == string::npos) {
00185                 throw invalid_argument("parse_filename() pid.addr.addr name expected: " +
00186                                filename_spec);
00187             }
00188             string jitdump = filename_spec.substr(0, pos) + ".jo";
00189             // if a jitdump file exists, we point to this file
00190             if (!stat(jitdump.c_str(), &st)) {
00191                 // later code assumes an optional prefix path
00192                 // is stripped from the lib_image.
00193                 result.lib_image =
00194                     extra_found_images.strip_path_prefix(jitdump);
00195                 result.jit_dumpfile_exists = true;
00196             } else {
00197                 result.lib_image =  parse_anon(path[i], path[i - 1]);
00198             }
00199             i++;
00200             break;
00201         } else {
00202             result.lib_image += "/" + path[i];
00203         }
00204     }
00205 
00206     if (i == path.size())
00207         return result;
00208 
00209     // skip "{cg}"
00210     ++i;
00211     if (i == path.size() ||
00212         (path[i] != "{kern}" && path[i] != "{root}" &&
00213          path[i].find("{anon", 0) != 0)) {
00214         throw invalid_argument("parse_filename() invalid filename: "
00215                                + filename);
00216     }
00217 
00218     // skip "{root}", "{kern}" or "{anon}"
00219     anon = (path[i].find("{anon", 0) == 0);
00220     ++i;
00221 
00222     if (anon) {
00223         result.cg_image = parse_anon(path[i], path[i - 1]);
00224         i++;
00225     } else {
00226         for (; i < path.size(); ++i)
00227             result.cg_image += "/" + path[i];
00228     }
00229 
00230     return result;
00231 }
00232 
00233 bool parsed_filename::profile_spec_equal(parsed_filename const & parsed)
00234 {
00235     return  event == parsed.event &&
00236         count == parsed.count &&
00237         unitmask == parsed.unitmask &&
00238         tgid == parsed.tgid &&
00239         tid == parsed.tid &&
00240         cpu == parsed.cpu;
00241 }
00242 
00243 ostream & operator<<(ostream & out, parsed_filename const & data)
00244 {
00245     out << data.filename << endl;
00246     out << data.image << " " << data.lib_image << " "
00247         << data.event << " " << data.count << " "
00248         << data.unitmask << " " << data.tgid << " "
00249         << data.tid << " " << data.cpu << endl;
00250 
00251     return out;
00252 }