profile_spec.cpp

Go to the documentation of this file.
00001 
00011 #include <algorithm>
00012 #include <set>
00013 #include <sstream>
00014 #include <iterator>
00015 #include <iostream>
00016 #include <dirent.h>
00017 
00018 #include "file_manip.h"
00019 #include "op_config.h"
00020 #include "profile_spec.h"
00021 #include "string_manip.h"
00022 #include "glob_filter.h"
00023 #include "locate_images.h"
00024 #include "op_exception.h"
00025 #include "op_header.h"
00026 #include "op_fileio.h"
00027 
00028 using namespace std;
00029 
00030 namespace {
00031 
00032 // PP:3.7, full path, or relative path. If we can't find it,
00033 // we should maintain the original to maintain the wordexp etc.
00034 string const fixup_image_spec(string const & str, extra_images const & extra)
00035 {
00036     // On error find_image_path() return str, so if an occur we will
00037     // use the provided image_name not the fixed one.
00038     image_error error;
00039     return extra.find_image_path(str, error, true);
00040 }
00041 
00042 void fixup_image_spec(vector<string> & images, extra_images const & extra)
00043 {
00044     vector<string>::iterator it = images.begin();
00045     vector<string>::iterator const end = images.end();
00046 
00047     for (; it != end; ++it)
00048         *it = fixup_image_spec(*it, extra);
00049 }
00050 
00051 }  // anon namespace
00052 
00053 
00054 profile_spec::profile_spec()
00055     :
00056     extra_found_images()
00057 {
00058     parse_table["archive"] = &profile_spec::parse_archive_path;
00059     parse_table["session"] = &profile_spec::parse_session;
00060     parse_table["session-exclude"] =
00061         &profile_spec::parse_session_exclude;
00062     parse_table["image"] = &profile_spec::parse_image;
00063     parse_table["image-exclude"] = &profile_spec::parse_image_exclude;
00064     parse_table["lib-image"] = &profile_spec::parse_lib_image;
00065     parse_table["event"] = &profile_spec::parse_event;
00066     parse_table["count"] = &profile_spec::parse_count;
00067     parse_table["unit-mask"] = &profile_spec::parse_unitmask;
00068     parse_table["tid"] = &profile_spec::parse_tid;
00069     parse_table["tgid"] = &profile_spec::parse_tgid;
00070     parse_table["cpu"] = &profile_spec::parse_cpu;
00071 }
00072 
00073 
00074 void profile_spec::parse(string const & tag_value)
00075 {
00076     string value;
00077     action_t action = get_handler(tag_value, value);
00078     if (!action) {
00079         throw invalid_argument("profile_spec::parse(): not "
00080                        "a valid tag \"" + tag_value + "\"");
00081     }
00082 
00083     (this->*action)(value);
00084 }
00085 
00086 
00087 bool profile_spec::is_valid_tag(string const & tag_value)
00088 {
00089     string value;
00090     return get_handler(tag_value, value);
00091 }
00092 
00093 
00094 void profile_spec::set_image_or_lib_name(string const & str)
00095 {
00096     /* FIXME: what does spec say about this being allowed to be
00097      * a comma list or not ? */
00098     image_or_lib_image.push_back(fixup_image_spec(str, extra_found_images));
00099 }
00100 
00101 
00102 void profile_spec::parse_archive_path(string const & str)
00103 {
00104     archive_path = op_realpath(str);
00105 }
00106 
00107 
00108 string profile_spec::get_archive_path() const
00109 {
00110     return archive_path;
00111 }
00112 
00113 
00114 void profile_spec::parse_session(string const & str)
00115 {
00116     session = separate_token(str, ',');
00117 }
00118 
00119 
00120 void profile_spec::parse_session_exclude(string const & str)
00121 {
00122     session_exclude = separate_token(str, ',');
00123 }
00124 
00125 
00126 void profile_spec::parse_image(string const & str)
00127 {
00128     image = separate_token(str, ',');
00129     fixup_image_spec(image, extra_found_images);
00130 }
00131 
00132 
00133 void profile_spec::parse_image_exclude(string const & str)
00134 {
00135     image_exclude = separate_token(str, ',');
00136     fixup_image_spec(image_exclude, extra_found_images);
00137 }
00138 
00139 
00140 void profile_spec::parse_lib_image(string const & str)
00141 {
00142     lib_image = separate_token(str, ',');
00143     fixup_image_spec(lib_image, extra_found_images);
00144 }
00145 
00146 
00147 void profile_spec::parse_event(string const & str)
00148 {
00149     event.set(str);
00150 }
00151 
00152 
00153 void profile_spec::parse_count(string const & str)
00154 {
00155     count.set(str);
00156 }
00157 
00158 
00159 void profile_spec::parse_unitmask(string const & str)
00160 {
00161     unitmask.set(str);
00162 }
00163 
00164 
00165 void profile_spec::parse_tid(string const & str)
00166 {
00167     tid.set(str);
00168 }
00169 
00170 
00171 void profile_spec::parse_tgid(string const & str)
00172 {
00173     tgid.set(str);
00174 }
00175 
00176 
00177 void profile_spec::parse_cpu(string const & str)
00178 {
00179     cpu.set(str);
00180 }
00181 
00182 
00183 profile_spec::action_t
00184 profile_spec::get_handler(string const & tag_value, string & value)
00185 {
00186     string::size_type pos = tag_value.find_first_of(':');
00187     if (pos == string::npos)
00188         return 0;
00189 
00190     string tag(tag_value.substr(0, pos));
00191     value = tag_value.substr(pos + 1);
00192 
00193     parse_table_t::const_iterator it = parse_table.find(tag);
00194     if (it == parse_table.end())
00195         return 0;
00196 
00197     return it->second;
00198 }
00199 
00200 
00201 namespace {
00202 
00205 template<typename T>
00206 bool comma_match(comma_list<T> const & cl, generic_spec<T> const & value)
00207 {
00208     // if the profile spec is "all" we match the sample file
00209     if (!cl.is_set())
00210         return true;
00211     
00212     // an "all" sample file should never match specified profile
00213     // spec values
00214     if (!value.is_set())
00215         return false;
00216 
00217     // now match each profile spec value against the sample file
00218     return cl.match(value.value());
00219 }
00220 
00221 }
00222 
00223 
00224 bool profile_spec::match(filename_spec const & spec) const
00225 {
00226     bool matched_by_image_or_lib_image = false;
00227 
00228     // We need the true image name not the one based on the sample
00229     // filename for the benefit of module which have /oprofile in their
00230     // sample filename. This allow to specify profile spec based on the
00231     // real name of the image, e.g. 'binary:*oprofile.ko'
00232     string simage = fixup_image_spec(spec.image, extra_found_images);
00233     string slib_image = fixup_image_spec(spec.lib_image,
00234                          extra_found_images);
00235 
00236     // PP:3.19
00237     if (!image_or_lib_image.empty()) {
00238         glob_filter filter(image_or_lib_image, image_exclude);
00239         if (filter.match(simage) || filter.match(slib_image))
00240             matched_by_image_or_lib_image = true;
00241     }
00242 
00243     if (!matched_by_image_or_lib_image) {
00244         // PP:3.7 3.8
00245         if (!image.empty()) {
00246             glob_filter filter(image, image_exclude);
00247             if (!filter.match(simage))
00248                 return false;
00249         } else if (!image_or_lib_image.empty()) {
00250             // image.empty() means match all except if user
00251             // specified image_or_lib_image
00252             return false;
00253         }
00254 
00255         // PP:3.9 3.10
00256         if (!lib_image.empty()) {
00257             glob_filter filter(lib_image, image_exclude);
00258             if (!filter.match(slib_image))
00259                 return false;
00260         } else if (image.empty() && !image_or_lib_image.empty()) {
00261             // lib_image empty means match all except if user
00262             // specified image_or_lib_image *or* we already
00263             // matched this spec through image
00264             return false;
00265         }
00266     }
00267 
00268     if (!matched_by_image_or_lib_image) {
00269         // if we don't match by image_or_lib_image we must try to
00270         // exclude from spec, exclusion from image_or_lib_image has
00271         // been handled above
00272         vector<string> empty;
00273         glob_filter filter(empty, image_exclude);
00274         if (!filter.match(simage))
00275             return false;
00276         if (!spec.lib_image.empty() && !filter.match(slib_image))
00277             return false;
00278     }
00279 
00280     if (!event.match(spec.event))
00281         return false;
00282 
00283     if (!count.match(spec.count))
00284         return false;
00285 
00286     if (!unitmask.match(spec.unitmask))
00287         return false;
00288 
00289     if (!comma_match(cpu, spec.cpu))
00290         return false;
00291 
00292     if (!comma_match(tid, spec.tid))
00293         return false;
00294 
00295     if (!comma_match(tgid, spec.tgid))
00296         return false;
00297 
00298     return true;
00299 }
00300 
00301 
00302 profile_spec profile_spec::create(list<string> const & args,
00303                                   vector<string> const & image_path,
00304                   string const & root_path)
00305 {
00306     profile_spec spec;
00307     set<string> tag_seen;
00308     vector<string> temp_image_or_lib;
00309 
00310     list<string>::const_iterator it = args.begin();
00311     list<string>::const_iterator end = args.end();
00312 
00313     for (; it != end; ++it) {
00314         if (spec.is_valid_tag(*it)) {
00315             if (tag_seen.find(*it) != tag_seen.end()) {
00316                 throw op_runtime_error("tag specified "
00317                        "more than once: " + *it);
00318             }
00319             tag_seen.insert(*it);
00320             spec.parse(*it);
00321         } else {
00322             string const file = op_realpath(*it);
00323             temp_image_or_lib.push_back(file);
00324         }
00325     }
00326 
00327     // PP:3.5 no session given means use the current session.
00328     if (spec.session.empty())
00329         spec.session.push_back("current");
00330 
00331     bool ok = true;
00332     vector<string>::const_iterator ip_it = image_path.begin();
00333     for ( ; ip_it != image_path.end(); ++ip_it) {
00334         if (!is_directory(spec.get_archive_path() + "/" + *ip_it)) {
00335             cerr << spec.get_archive_path() + "/" + *ip_it << " isn't a valid directory\n";
00336             ok = false;
00337         }
00338     }
00339     if (!ok)
00340         throw op_runtime_error("invalid --image-path= options");
00341 
00342     spec.extra_found_images.populate(image_path, spec.get_archive_path(),
00343                      root_path);
00344     vector<string>::const_iterator im = temp_image_or_lib.begin();
00345     vector<string>::const_iterator last = temp_image_or_lib.end();
00346     for (; im != last; ++im)
00347         spec.set_image_or_lib_name(*im);
00348 
00349     return spec;
00350 }
00351 
00352 namespace {
00353 
00354 vector<string> filter_session(vector<string> const & session,
00355                   vector<string> const & session_exclude)
00356 {
00357     vector<string> result(session);
00358 
00359     if (result.empty())
00360         result.push_back("current");
00361 
00362     for (size_t i = 0 ; i < session_exclude.size() ; ++i) {
00363         // FIXME: would we use fnmatch on each item, are we allowed
00364         // to --session=current* ?
00365         vector<string>::iterator it =
00366             find(result.begin(), result.end(), session_exclude[i]);
00367 
00368         if (it != result.end())
00369             result.erase(it);
00370     }
00371 
00372     return result;
00373 }
00374 
00375 static bool invalid_sample_file;
00376 bool valid_candidate(string const & base_dir, string const & filename,
00377                      profile_spec const & spec, bool exclude_dependent,
00378                      bool exclude_cg)
00379 {
00380     if (exclude_cg && filename.find("{cg}") != string::npos)
00381         return false;
00382 
00383     // strip out non sample files
00384     string const & sub = filename.substr(base_dir.size(), string::npos);
00385     if (!is_prefix(sub, "/{root}/") && !is_prefix(sub, "/{kern}/"))
00386         return false;
00387 
00388     /* When overflows occur in the oprofile kernel driver's sample
00389      * buffers (caused by too high of a sampling rate), it's possible
00390      * for samples to be mis-attributed.  A common scenario is that,
00391      * while profiling process 'abc' running binary 'xzy', the task
00392      * switch for 'abc' gets dropped somehow.  Then, samples are taken
00393      * for the 'xyz' binary.  In the attempt to attribute the samples to
00394      * the associated binary, the oprofile kernel code examines the
00395      * the memory mappings for the last process for which it recorded
00396      * a task switch.  When profiling at a very high rate, the oprofile
00397      * daemon is often the process that is mistakenly examined.  Then the
00398      * sample from binary 'xyz' is matched to some file that's open in
00399      * oprofiled's memory space.  Because oprofiled has many sample files
00400      * open at any given time, there's a good chance the sample's VMA is
00401      * contained within one of those sample files.  So, once finding this
00402      * bogus match, the oprofile kernel records a cookie switch for the
00403      * sample file.  This scenario is made even more likely if a high
00404      * sampling rate (e.g., profiling on several events) is paired with
00405      * callgraph data collection.
00406      *
00407      * When the daemon processes this sample data from the kernel, it
00408      * creates a sample file for the sample file, resulting in something
00409      * of the form:
00410      *    <session-dir>/[blah]<session-dir>/[blah]
00411      *
00412      * When the sample data is post-processed, the sample file is parsed to
00413      * try to determine the name of the binary, but it gets horribly confused.
00414      * At best, the post-processing tool will spit out some warning messages,
00415      * such as:
00416      * warning:
00417      * /lib64/libdl-2.9.so/CYCLES.10000.0.all.all.all/{dep}/{root}/var/lib/oprofile/samples/current/{root}/lib64/libdl-2.9.so/{dep}/{root}/lib64/libdl-2.9.so/PM_RUN_CYC_GRP12.10000.0.all.all.all
00418      * could not be found.
00419      *
00420      * At worst, the parsing may result in an "invalid argument" runtime error
00421      * because of the inability to parse a sample file whose name contains that
00422      * of another sample file.  This typically seems to happen when callgraph
00423      * data is being collected.
00424      *
00425      * The next several lines of code checks if the passed filename
00426      * contains <session-dir>/samples; if so, we discard it as an
00427      * invalid sample file.
00428      */
00429 
00430     unsigned int j = base_dir.rfind('/');
00431     string session_samples_dir = base_dir.substr(0, j);
00432     if (sub.find(session_samples_dir) != string::npos) {
00433         invalid_sample_file = true;
00434         return false;
00435     }
00436 
00437     // strip out generated JIT object files for samples of anonymous regions
00438     if (is_jit_sample(sub))
00439         return false;
00440 
00441     filename_spec file_spec(filename, spec.extra_found_images);
00442     if (spec.match(file_spec)) {
00443         if (exclude_dependent && file_spec.is_dependent())
00444             return false;
00445         return true;
00446     }
00447 
00448     return false;
00449 }
00450 
00451 
00456 void warn_if_kern_buffs_overflow(string const & session_samples_dir)
00457 {
00458     DIR * dir;
00459     struct dirent * dirent;
00460     string stats_path;
00461     int ret = 0;
00462     
00463     stats_path = session_samples_dir + "stats/";
00464     ret = op_read_int_from_file((stats_path + "event_lost_overflow").
00465                     c_str(), 0);
00466 
00467     if (!(dir = opendir(stats_path.c_str()))) {
00468         ret = -1;
00469         goto done;
00470     }
00471 
00472     while ((dirent = readdir(dir)) && !ret) {
00473         int cpu_nr;
00474         string path;
00475         if (sscanf(dirent->d_name, "cpu%d", &cpu_nr) != 1)
00476             continue;
00477         path = stats_path + dirent->d_name + "/";
00478         ret = op_read_int_from_file((path + "sample_lost_overflow").
00479                         c_str(), 0);
00480     }
00481     closedir(dir);
00482 
00483  done:
00484     if (ret > 0) {
00485         cerr << "WARNING! The OProfile kernel driver reports sample "
00486              << "buffer overflows." << endl;
00487         cerr << "Such overflows can result in incorrect sample attribution"
00488              << ", invalid sample" << endl
00489              << "files and other symptoms.  "
00490              << "See the oprofiled.log for details." << endl;
00491         cerr << "You should adjust your sampling frequency to eliminate"
00492              << " (or at least minimize)" << endl
00493              << "these overflows." << endl;
00494     }
00495 }
00496 
00497 
00498 }  // anonymous namespace
00499 
00500 
00501 list<string> profile_spec::generate_file_list(bool exclude_dependent,
00502   bool exclude_cg) const
00503 {
00504     // FIXME: isn't remove_duplicates faster than doing this, then copy() ?
00505     set<string> unique_files;
00506 
00507     vector<string> sessions = filter_session(session, session_exclude);
00508 
00509     if (sessions.empty()) {
00510         ostringstream os;
00511         os << "No session given\n"
00512            << "included session was:\n";
00513         copy(session.begin(), session.end(),
00514              ostream_iterator<string>(os, "\n"));
00515         os << "excluded session was:\n";
00516         copy(session_exclude.begin(), session_exclude.end(),
00517              ostream_iterator<string>(os, "\n"));
00518         throw invalid_argument(os.str());
00519     }
00520 
00521     bool found_file = false;
00522 
00523     vector<string>::const_iterator cit = sessions.begin();
00524     vector<string>::const_iterator end = sessions.end();
00525 
00526     for (; cit != end; ++cit) {
00527         if (cit->empty())
00528             continue;
00529 
00530         string base_dir;
00531         invalid_sample_file = false;
00532         if ((*cit)[0] != '.' && (*cit)[0] != '/')
00533             base_dir = archive_path + op_samples_dir;
00534         base_dir += *cit;
00535 
00536         base_dir = op_realpath(base_dir);
00537 
00538         list<string> files;
00539         create_file_list(files, base_dir, "*", true);
00540 
00541         if (!files.empty()) {
00542             found_file = true;
00543             warn_if_kern_buffs_overflow(base_dir + "/");
00544         }
00545 
00546         list<string>::const_iterator it = files.begin();
00547         list<string>::const_iterator fend = files.end();
00548         for (; it != fend; ++it) {
00549             if (valid_candidate(base_dir, *it, *this,
00550                 exclude_dependent, exclude_cg)) {
00551                 unique_files.insert(*it);
00552             }
00553         }
00554         if (invalid_sample_file) {
00555             cerr << "Warning: Invalid sample files found in "
00556                  << base_dir << endl;
00557             cerr << "This problem can be caused by too high of a sampling rate."
00558                  << endl;
00559         }
00560     }
00561 
00562     if (!found_file) {
00563         ostringstream os;
00564         os  << "No sample file found: If using opcontrol for profiling,\n"
00565             << "try running 'opcontrol --dump'; otherwise, specify a session containing\n"
00566             << "sample files.\n";
00567         throw op_fatal_error(os.str());
00568     }
00569 
00570     list<string> result;
00571     copy(unique_files.begin(), unique_files.end(), back_inserter(result));
00572 
00573     return result;
00574 }

Generated on 8 Nov 2012 for Oprofile by  doxygen 1.6.1