operf_utils.cpp

Go to the documentation of this file.
00001 
00017 #include <errno.h>
00018 #include <dirent.h>
00019 #include <stdlib.h>
00020 #include <stdint.h>
00021 #include <string.h>
00022 #include <fcntl.h>
00023 #include <cverb.h>
00024 #include <iostream>
00025 #include "operf_counter.h"
00026 #include "operf_utils.h"
00027 #ifdef HAVE_LIBPFM
00028 #include <perfmon/pfmlib.h>
00029 #endif
00030 #include "op_types.h"
00031 #include "operf_process_info.h"
00032 #include "file_manip.h"
00033 #include "operf_kernel.h"
00034 #include "operf_sfile.h"
00035 #include "op_fileio.h"
00036 #include "op_libiberty.h"
00037 #include "operf_stats.h"
00038 
00039 
00040 extern verbose vmisc;
00041 extern volatile bool quit;
00042 extern volatile bool read_quit;
00043 extern operf_read operfRead;
00044 extern int sample_reads;
00045 extern unsigned int pagesize;
00046 extern char * app_name;
00047 extern pid_t app_PID;
00048 extern verbose vrecord;
00049 extern verbose vconvert;
00050 
00051 using namespace std;
00052 
00053 map<pid_t, operf_process_info *> process_map;
00054 multimap<string, struct operf_mmap *> all_images_map;
00055 map<u64, struct operf_mmap *> kernel_modules;
00056 struct operf_mmap * kernel_mmap;
00057 bool first_time_processing;
00058 bool throttled;
00059 size_t mmap_size;
00060 size_t pg_sz;
00061 
00062 static list<event_t *> unresolved_events;
00063 static struct operf_transient trans;
00064 static bool sfile_init_done;
00065 
00066 /* The handling of mmap's for a process was a bit tricky to get right, in particular,
00067  * the handling of what I refer to as "deferred mmap's" -- i.e., when we receive an
00068  * mmap event for which we've not yet received a comm event (so we don't know app name
00069  * for the process).  I have left in some debugging code here (compiled out via #ifdef)
00070  * so we can easily test and validate any changes we ever may need to make to this code.
00071  */
00072 //#define _TEST_DEFERRED_MAPPING
00073 #ifdef _TEST_DEFERRED_MAPPING
00074 static bool do_comm_event;
00075 static event_t comm_event;
00076 #endif
00077 
00078 
00079 /* Some architectures (e.g., ppc64) do not use the same event value (code) for oprofile
00080  * and for perf_events.  The operf-record process requires event values that perf_events
00081  * understands, but the operf-read process requires oprofile event values.  The purpose of
00082  * the following method is to map the operf-record event value to a value that
00083  * opreport can understand.
00084  */
00085 #if (defined(__powerpc__) || defined(__powerpc64__))
00086 #define NIL_CODE ~0U
00087 
00088 #if HAVE_LIBPFM3
00089 static bool _get_codes_for_match(unsigned int pfm_idx, const char name[],
00090                                  vector<operf_event_t> * evt_vec)
00091 {
00092     unsigned int num_events = evt_vec->size();
00093     int tmp_code, ret;
00094     char evt_name[OP_MAX_EVT_NAME_LEN];
00095     char * grp_name;
00096     unsigned int events_converted = 0;
00097     for (unsigned int i = 0; i < num_events; i++) {
00098         operf_event_t event = (*evt_vec)[i];
00099         if (event.evt_code != NIL_CODE) {
00100             events_converted++;
00101             continue;
00102         }
00103         memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
00104         if (!strcmp(event.name, "CYCLES")) {
00105             strcpy(evt_name ,"PM_CYC") ;
00106         } else if ((grp_name = strstr(event.name, "_GRP"))) {
00107             strncpy(evt_name, event.name, grp_name - event.name);
00108         } else {
00109             strncpy(evt_name, event.name, strlen(event.name));
00110         }
00111         if (strncmp(name, evt_name, OP_MAX_EVT_NAME_LEN))
00112             continue;
00113         ret = pfm_get_event_code(pfm_idx, &tmp_code);
00114         if (ret != PFMLIB_SUCCESS) {
00115             string evt_name_str = event.name;
00116             string msg = "libpfm cannot find event code for " + evt_name_str +
00117                     "; cannot continue";
00118             throw runtime_error(msg);
00119         }
00120         event.evt_code = tmp_code;
00121         (*evt_vec)[i] = event;
00122         events_converted++;
00123         cverb << vrecord << "Successfully converted " << event.name << " to perf_event code "
00124               << hex << tmp_code << endl;
00125     }
00126     return (events_converted == num_events);
00127 }
00128 #else
00129 static bool _op_get_event_codes(vector<operf_event_t> * evt_vec)
00130 {
00131     int ret, i;
00132     unsigned int num_events = evt_vec->size();
00133     char evt_name[OP_MAX_EVT_NAME_LEN];
00134     char * grp_name;
00135     unsigned int events_converted = 0;
00136     uint64_t code[1];
00137 
00138     typedef struct {
00139         uint64_t    *codes;
00140         char        **fstr;
00141         size_t      size;
00142         int         count;
00143         int         idx;
00144     } pfm_raw_pmu_encode_t;
00145 
00146     pfm_raw_pmu_encode_t raw;
00147     raw.codes = code;
00148     raw.count = 1;
00149     raw.fstr = NULL;
00150 
00151     if (pfm_initialize() != PFM_SUCCESS)
00152         throw runtime_error("Unable to initialize libpfm; cannot continue");
00153 
00154     for (unsigned int i = 0; i < num_events; i++) {
00155         operf_event_t event = (*evt_vec)[i];
00156         memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
00157         if (!strcmp(event.name, "CYCLES")) {
00158             strcpy(evt_name ,"PM_CYC") ;
00159         } else if ((grp_name = strstr(event.name, "_GRP"))) {
00160             strncpy(evt_name, event.name, grp_name - event.name);
00161         } else {
00162             strncpy(evt_name, event.name, strlen(event.name));
00163         }
00164 
00165         memset(&raw, 0, sizeof(raw));
00166         ret = pfm_get_os_event_encoding(evt_name, PFM_PLM3, PFM_OS_NONE, &raw);
00167         if (ret != PFM_SUCCESS) {
00168             string evt_name_str = event.name;
00169             string msg = "libpfm cannot find event code for " + evt_name_str +
00170                     "; cannot continue";
00171             throw runtime_error(msg);
00172         }
00173 
00174         event.evt_code = raw.codes[0];
00175         (*evt_vec)[i] = event;
00176         events_converted++;
00177         cverb << vrecord << "Successfully converted " << event.name << " to perf_event code "
00178               << hex << event.evt_code << endl;
00179     }
00180     return (events_converted == num_events);
00181 }
00182 #endif
00183 
00184 bool OP_perf_utils::op_convert_event_vals(vector<operf_event_t> * evt_vec)
00185 {
00186     unsigned int i, count;
00187     char name[256];
00188     int ret;
00189     for (unsigned int i = 0; i < evt_vec->size(); i++) {
00190         operf_event_t event = (*evt_vec)[i];
00191         event.evt_code = NIL_CODE;
00192         (*evt_vec)[i] = event;
00193     }
00194 
00195 #if HAVE_LIBPFM3
00196     if (pfm_initialize() != PFMLIB_SUCCESS)
00197         throw runtime_error("Unable to initialize libpfm; cannot continue");
00198 
00199     ret = pfm_get_num_events(&count);
00200     if (ret != PFMLIB_SUCCESS)
00201         throw runtime_error("Unable to use libpfm to obtain event code; cannot continue");
00202     for(i =0 ; i < count; i++)
00203     {
00204         ret = pfm_get_event_name(i, name, 256);
00205         if (ret != PFMLIB_SUCCESS)
00206             continue;
00207         if (_get_codes_for_match(i, name, evt_vec))
00208             break;
00209     }
00210     return (i != count);
00211 #else
00212     return _op_get_event_codes(evt_vec);
00213 #endif
00214 }
00215 
00216 #endif
00217 
00218 
00219 static inline void update_trans_last(struct operf_transient * trans)
00220 {
00221     trans->last = trans->current;
00222     trans->last_pc = trans->pc;
00223 }
00224 
00225 static inline void clear_trans(struct operf_transient * trans)
00226 {
00227     trans->tgid = ~0U;
00228     trans->cur_procinfo = NULL;
00229 }
00230 
00231 static void __handle_fork_event(event_t * event)
00232 {
00233     if (cverb << vconvert)
00234         cout << "PERF_RECORD_FORK for tgid/tid = " << event->fork.pid
00235              << "/" << event->fork.tid << endl;
00236 
00237     map<pid_t, operf_process_info *>::iterator it;
00238     operf_process_info * parent = NULL;
00239     operf_process_info * forked_proc = NULL;
00240 
00241     it = process_map.find(event->fork.ppid);
00242     if (it != process_map.end()) {
00243         parent = it->second;
00244     } else {
00245         // Create a new proc info object for the parent, but mark it invalid since we have
00246         // not yet received a COMM event for this PID.
00247         parent = new operf_process_info(event->fork.ppid, app_name ? app_name : NULL,
00248                                                                    app_name != NULL, false);
00249         if (cverb << vconvert)
00250             cout << "Adding new proc info to collection for PID " << event->fork.ppid << endl;
00251         process_map[event->fork.ppid] = parent;
00252     }
00253 
00254     it = process_map.find(event->fork.pid);
00255     if (it == process_map.end()) {
00256         forked_proc = new operf_process_info(event->fork.pid,
00257                                              parent->get_app_name().c_str(),
00258                                              parent->is_appname_valid(), parent->is_valid());
00259         if (cverb << vconvert)
00260             cout << "Adding new proc info to collection for PID " << event->fork.pid << endl;
00261         process_map[event->fork.pid] = forked_proc;
00262         forked_proc->connect_forked_process_to_parent(parent);
00263         parent->add_forked_pid_association(forked_proc);
00264         if (cverb << vconvert)
00265             cout << "Connecting forked proc " << event->fork.pid << " to parent" << endl;
00266     } else {
00267         /* There are two ways that we may get to this point. One way is if
00268          * we've received a COMM event for the forked process before the FORK event.
00269          * Normally, if parent process A forks child process B which then does an exec, we
00270          * first see a FORK event, followed by a COMM event. But apparently there's no
00271          * guarantee in what order these events may be seen by userspace. No matter -- since
00272          * the exec'ed process is now a standalone process (which will get MMAP events
00273          * for all of its mmappings, there's no need to re-associate it back to the parent
00274          * as we do for a non-exec'ed forked process.  So we'll just ignore it.
00275          *
00276          * But the second way that there may be an existing operf_process_info object is if
00277          * a new mmap event (a real MMAP event or a synthesized event (e.g. for hypervisor
00278          * mmapping) occurred for the forked process before a COMM event was received for it.
00279          * In this case, the forked process will be marked invalid until the COMM event
00280          * is received. But if this process does *not* do an exec, there will never be a
00281          * COMM event for it.  Such forked processes should be tightly connected to their
00282          * parent, so we'll go ahead and associate the forked process with its parent.
00283          * If a COMM event comes later for the forked process, we'll disassociate them.
00284          */
00285         forked_proc = it->second;
00286         if (!forked_proc->is_valid()) {
00287             forked_proc->connect_forked_process_to_parent(parent);
00288             parent->add_forked_pid_association(forked_proc);
00289             if (cverb << vconvert)
00290                 cout << "Connecting existing incomplete forked proc " << event->fork.pid
00291                      << " to parent" << endl;
00292         }
00293     }
00294 }
00295 
00296 static void __handle_comm_event(event_t * event)
00297 {
00298 #ifdef _TEST_DEFERRED_MAPPING
00299     if (!do_comm_event) {
00300         comm_event = event;
00301         return;
00302     }
00303 #endif
00304     if (cverb << vconvert)
00305         cout << "PERF_RECORD_COMM for " << event->comm.comm << ", tgid/tid = "
00306              << event->comm.pid << "/" << event->comm.tid << endl;
00307 
00308     map<pid_t, operf_process_info *>::iterator it;
00309     it = process_map.find(event->comm.pid);
00310     if (it == process_map.end()) {
00311         /* TODO: Handle system housekeeping tasks.  For certain kinds of processes,
00312          * we will get a COMM event, but never get an MMAP event (e.g, kpsmoused).
00313          * Without receiving an MMAP event, we have no clue whether the name given
00314          * with the COMM event is a full "appname" or not, so the operf_process_info
00315          * is marked invalid.  We end up dropping all samples for such tasks when
00316          * doing a system-wide profile.
00317          */
00318 
00319         /* A COMM event can occur as the result of the app doing a fork/exec,
00320          * where the COMM event is for the forked process.  In that case, we
00321          * pass the event->comm field as the appname argument to the ctor.
00322          */
00323         const char * appname_arg;
00324         bool is_complete_appname;
00325         if (app_name && (app_PID == event->comm.pid)) {
00326             appname_arg = app_name;
00327             is_complete_appname = true;
00328         } else {
00329             appname_arg = event->comm.comm;
00330             is_complete_appname = false;
00331         }
00332         operf_process_info * proc = new operf_process_info(event->comm.pid,appname_arg,
00333                                                            is_complete_appname, true);
00334         if (cverb << vconvert)
00335             cout << "Adding new proc info to collection for PID " << event->comm.pid << endl;
00336         process_map[event->comm.pid] = proc;
00337     } else {
00338         if (it->second->is_valid()) {
00339             if (it->second->is_forked()) {
00340                 /* If the operf_process_info object we found was created as a result of
00341                  * a FORK event, then it was associated with the parent process and contains
00342                  * the parent's appname.  But now we're getting a COMM event for this forked
00343                  * process, which means it did an exec, so we need to change the appname
00344                  * to the executable associated with this COMM event, which is done via
00345                  * calling disassociate_from_parent().
00346                  */
00347                 if (cverb << vconvert)
00348                     cout << "Disassociating forked proc " << event->comm.pid
00349                          << " from parent" << endl;
00350                 it->second->disassociate_from_parent(event->comm.comm);
00351             } else {
00352                 if (cverb << vconvert)
00353                     cout << "Received extraneous COMM event for " << event->comm.comm
00354                     << ", PID " << event->comm.pid << endl;
00355             }
00356         } else {
00357             if (cverb << vconvert)
00358                 cout << "Processing deferred mappings" << endl;
00359             it->second->process_deferred_mappings(event->comm.comm);
00360         }
00361     }
00362 }
00363 
00364 static void __handle_mmap_event(event_t * event)
00365 {
00366     static bool kptr_restrict_warning_displayed_already = false;
00367     string image_basename = op_basename(event->mmap.filename);
00368     struct operf_mmap * mapping = NULL;
00369     multimap<string, struct operf_mmap *>::iterator it;
00370     pair<multimap<string, struct operf_mmap *>::iterator,
00371          multimap<string, struct operf_mmap *>::iterator> range;
00372 
00373     range = all_images_map.equal_range(image_basename);
00374     for (it = range.first; it != range.second; it++) {
00375         if (((strcmp((*it).second->filename, image_basename.c_str())) == 0)
00376                 && ((*it).second->start_addr == event->mmap.start)) {
00377             mapping = (*it).second;
00378             break;
00379         }
00380     }
00381     if (!mapping) {
00382         mapping = new struct operf_mmap;
00383         memset(mapping, 0, sizeof(struct operf_mmap));
00384         mapping->start_addr = event->mmap.start;
00385             strcpy(mapping->filename, event->mmap.filename);
00386         /* Mappings starting with "/" are for either a file or shared memory object.
00387          * From the kernel's perf_events subsystem, anon maps have labels like:
00388          *     [heap], [stack], [vdso], //anon
00389          */
00390         if (mapping->filename[0] == '[') {
00391             mapping->is_anon_mapping = true;
00392         } else if ((strncmp(mapping->filename, "//anon",
00393                             strlen("//anon")) == 0)) {
00394             mapping->is_anon_mapping = true;
00395             strcpy(mapping->filename, "anon");
00396         }
00397         mapping->end_addr = (event->mmap.len == 0ULL)? 0ULL : mapping->start_addr + event->mmap.len - 1;
00398         mapping->pgoff = event->mmap.pgoff;
00399 
00400         if (cverb << vconvert) {
00401             cout << "PERF_RECORD_MMAP for " << event->mmap.filename << endl;
00402             cout << "\tstart_addr: " << hex << mapping->start_addr;
00403             cout << "; end addr: " << mapping->end_addr << endl;
00404         }
00405 
00406         if (event->header.misc & PERF_RECORD_MISC_USER)
00407             all_images_map.insert(pair<string, struct operf_mmap *>(image_basename, mapping));
00408     }
00409 
00410     if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
00411         if (!strncmp(mapping->filename, operf_get_vmlinux_name(),
00412                     strlen(mapping->filename))) {
00413             /* The kernel_mmap is just a convenience variable
00414              * for use when mapping samples to kernel space, since
00415              * most of the kernel samples will be attributable to
00416              * the vmlinux file versus kernel modules.
00417              */
00418             kernel_mmap = mapping;
00419         } else {
00420             if ((kptr_restrict == 1) && !no_vmlinux && (my_uid != 0)) {
00421                 if (!kptr_restrict_warning_displayed_already) {
00422                     kptr_restrict_warning_displayed_already = true;
00423                     cerr << endl << "< < < WARNING > > >" << endl;
00424                     cerr << "Samples for vmlinux kernel will be recorded, but kernel module profiling"
00425                          << endl << "is not possible with current system config." << endl;
00426                     cerr << "Set /proc/sys/kernel/kptr_restrict to 0 to see samples for kernel modules."
00427                          << endl << "< < < < < > > > > >" << endl << endl;
00428                 }
00429             } else {
00430                 operf_create_module(mapping->filename,
00431                                     mapping->start_addr,
00432                                     mapping->end_addr);
00433                 kernel_modules[mapping->start_addr] = mapping;
00434             }
00435         }
00436     } else {
00437         map<pid_t, operf_process_info *>::iterator it;
00438         it = process_map.find(event->mmap.pid);
00439         if (it == process_map.end()) {
00440             /* Create a new proc info object, but mark it invalid since we have
00441              * not yet received a COMM event for this PID. This MMAP event may
00442              * be on behalf of a process created as a result of a fork/exec.
00443              * The order of delivery of events is not guaranteed so we may see
00444              * this MMAP event before getting the COMM event for that process.
00445              * If this is the case here, we just pass NULL for appname arg.
00446              * It will get fixed up later when the COMM event occurs.
00447              */
00448             const char * appname_arg;
00449             bool is_complete_appname;
00450             if (app_name && (app_PID == event->mmap.pid)) {
00451                 appname_arg = app_name;
00452                 is_complete_appname = true;
00453             } else {
00454                 appname_arg = NULL;
00455                 is_complete_appname = false;
00456             }
00457 
00458             operf_process_info * proc = new operf_process_info(event->mmap.pid, appname_arg,
00459                                                                is_complete_appname, false);
00460             proc->add_deferred_mapping(mapping);
00461             if (cverb << vconvert)
00462                 cout << "Added deferred mapping " << event->mmap.filename
00463                       << " for new process_info object" << endl;
00464             process_map[event->mmap.pid] = proc;
00465 #ifdef _TEST_DEFERRED_MAPPING
00466             if (!do_comm_event) {
00467                 do_comm_event = true;
00468                 __handle_comm_event(comm_event, out);
00469             }
00470 #endif
00471         } else if (!it->second->is_valid()) {
00472             it->second->add_deferred_mapping(mapping);
00473             if (cverb << vconvert)
00474                 cout << "Added deferred mapping " << event->mmap.filename
00475                       << " for existing but incomplete process_info object" << endl;
00476         } else {
00477             if (cverb << vconvert)
00478                 cout << "Process mapping for " << event->mmap.filename << " on behalf of "
00479                      << event->mmap.pid << endl;
00480             it->second->process_new_mapping(mapping);
00481         }
00482     }
00483 }
00484 
00485 static struct operf_transient * __get_operf_trans(struct sample_data * data, bool hypervisor_domain,
00486                                                   bool kernel_mode)
00487 {
00488     operf_process_info * proc = NULL;
00489     const struct operf_mmap * op_mmap = NULL;
00490     struct operf_transient * retval = NULL;
00491 
00492     if (trans.tgid == data->pid) {
00493         proc = trans.cur_procinfo;
00494         if (cverb << vconvert)
00495             cout << "trans.tgid == data->pid : " << data->pid << endl;
00496 
00497     } else {
00498         // Find operf_process info for data.tgid.
00499         std::map<pid_t, operf_process_info *>::const_iterator it = process_map.find(data->pid);
00500         if (it != process_map.end() && (it->second->is_appname_valid())) {
00501             proc = it->second;
00502         } else {
00503             /* This can happen for the following reasons:
00504              *   - We get a sample before getting a COMM or MMAP
00505              *     event for the process being profiled
00506              *   - The COMM event has been processed, but since that
00507              *     only gives 16 chars of the app name, we don't
00508              *     have a valid app name yet
00509              *   - The kernel incorrectly records a sample for a
00510              *     process other than the one we requested (not
00511              *     likely -- this would be a kernel bug if it did)
00512              *
00513             */
00514             if ((cverb << vconvert) && !first_time_processing) {
00515                 cerr << "Dropping sample -- process info unavailable" << endl;
00516                 if (kernel_mode)
00517                     operf_stats[OPERF_NO_APP_KERNEL_SAMPLE]++;
00518                 else
00519                     operf_stats[OPERF_NO_APP_USER_SAMPLE]++;
00520             }
00521             goto out;
00522         }
00523     }
00524 
00525     // Now find mmapping that contains the data.ip address.
00526     // Use that mmapping to set fields in trans.
00527     if (kernel_mode) {
00528         if (data->ip >= kernel_mmap->start_addr &&
00529                 data->ip <= kernel_mmap->end_addr) {
00530             op_mmap = kernel_mmap;
00531         } else {
00532             map<u64, struct operf_mmap *>::iterator it;
00533             it = kernel_modules.begin();
00534             while (it != kernel_modules.end()) {
00535                 if (data->ip >= it->second->start_addr &&
00536                         data->ip <= it->second->end_addr) {
00537                     op_mmap = it->second;
00538                     break;
00539                 }
00540                 it++;
00541             }
00542         } if (!op_mmap) {
00543             if ((kernel_mmap->start_addr == 0ULL) &&
00544                     (kernel_mmap->end_addr == 0ULL))
00545                 op_mmap = kernel_mmap;
00546         }
00547         if (!op_mmap) {
00548             /* This can happen if a kernel module is loaded after profiling
00549              * starts, and then we get samples for that kernel module.
00550              * TODO:  Fix this.
00551              */
00552         }
00553     } else {
00554         op_mmap = proc->find_mapping_for_sample(data->ip);
00555         if (op_mmap && op_mmap->is_hypervisor && !hypervisor_domain) {
00556             cverb << vconvert << "Invalid sample: Address falls within hypervisor address range, but is not a hypervisor domain sample." << endl;
00557             operf_stats[OPERF_INVALID_CTX]++;
00558             op_mmap = NULL;
00559         }
00560     }
00561     if (op_mmap) {
00562         if (cverb << vconvert)
00563             cout << "Found mmap for sample; image_name is " << op_mmap->filename <<
00564             " and app name is " << proc->get_app_name() << endl;
00565         trans.image_name = op_mmap->filename;
00566         trans.app_filename = proc->get_app_name().c_str();
00567         trans.image_len = strlen(trans.image_name);
00568         trans.app_len = strlen(trans.app_filename);
00569         trans.start_addr = op_mmap->start_addr;
00570         trans.end_addr = op_mmap->end_addr;
00571         trans.tgid = data->pid;
00572         trans.tid = data->tid;
00573         trans.cur_procinfo = proc;
00574         trans.cpu = data->cpu;
00575         trans.is_anon = op_mmap->is_anon_mapping;
00576         trans.in_kernel = kernel_mode;
00577         if (trans.in_kernel || trans.is_anon)
00578             trans.pc = data->ip;
00579         else
00580             trans.pc = data->ip - trans.start_addr;
00581 
00582         trans.sample_id = data->id;
00583         retval = &trans;
00584     } else {
00585         if ((cverb << vconvert) && !first_time_processing) {
00586             string domain = trans.in_kernel ? "kernel" : "userspace";
00587             cerr << "Discarding " << domain << " sample for process " << data->pid
00588                  << " where no appropriate mapping was found. (pc=0x"
00589                  << hex << data->ip <<")" << endl;
00590             operf_stats[OPERF_LOST_NO_MAPPING]++;
00591         }
00592         retval = NULL;
00593     }
00594 out:
00595     return retval;
00596 }
00597 
00598 static void __handle_callchain(u64 * array, struct sample_data * data)
00599 {
00600     bool in_kernel = false;
00601     data->callchain = (struct ip_callchain *) array;
00602     if (data->callchain->nr) {
00603         if (cverb << vconvert)
00604             cout << "Processing callchain" << endl;
00605         for (int i = 0; i < data->callchain->nr; i++) {
00606             data->ip = data->callchain->ips[i];
00607             if (data->ip >= PERF_CONTEXT_MAX) {
00608                 switch (data->ip) {
00609                     case PERF_CONTEXT_HV:
00610                         // hypervisor samples are not supported for callgraph
00611                         // TODO: log lost callgraph arc
00612                         break;
00613                     case PERF_CONTEXT_KERNEL:
00614                         in_kernel = true;
00615                         break;
00616                     case PERF_CONTEXT_USER:
00617                         in_kernel = false;
00618                         break;
00619                     default:
00620                         break;
00621                 }
00622                 continue;
00623             }
00624             if (data->ip && __get_operf_trans(data, false, in_kernel)) {
00625                 if ((trans.current = operf_sfile_find(&trans))) {
00626                     operf_sfile_log_arc(&trans);
00627                     update_trans_last(&trans);
00628                 }
00629             } else {
00630                 if (data->ip)
00631                     operf_stats[OPERF_BT_LOST_NO_MAPPING]++;
00632             }
00633         }
00634     }
00635 }
00636 
00637 static void __map_hypervisor_sample(u64 ip, u32 pid)
00638 {
00639     operf_process_info * proc;
00640     map<pid_t, operf_process_info *>::iterator it;
00641     it = process_map.find(pid);
00642     if (it == process_map.end()) {
00643         /* Create a new proc info object, but mark it invalid since we have
00644          * not yet received a COMM event for this PID. This sample may be
00645          * on behalf of a process created as a result of a fork/exec.
00646          * The order of delivery of events is not guaranteed so we may see
00647          * this sample event before getting the COMM event for that process.
00648          * If this is the case here, we just pass NULL for appname arg.
00649          * It will get fixed up later when the COMM event occurs.
00650          */
00651         const char * appname_arg;
00652         bool is_complete_appname;
00653         if (app_name && (app_PID == pid)) {
00654             appname_arg = app_name;
00655             is_complete_appname = true;
00656         } else {
00657             appname_arg = NULL;
00658             is_complete_appname = false;
00659         }
00660 
00661         proc = new operf_process_info(pid, appname_arg,
00662                                       is_complete_appname, false);
00663 
00664         if (cverb << vconvert)
00665             cout << "Adding new proc info to collection for PID " << pid << endl;
00666         process_map[pid] = proc;
00667 
00668     } else {
00669         proc = it->second;
00670     }
00671     proc->process_hypervisor_mapping(ip);
00672 }
00673 
00674 static void __handle_sample_event(event_t * event, u64 sample_type)
00675 {
00676     struct sample_data data;
00677     bool found_trans = false;
00678     bool in_kernel;
00679     const struct operf_mmap * op_mmap = NULL;
00680     bool hypervisor = (event->header.misc == PERF_RECORD_MISC_HYPERVISOR);
00681     u64 *array = event->sample.array;
00682 
00683     if (sample_type & PERF_SAMPLE_IP) {
00684         data.ip = event->ip.ip;
00685         array++;
00686     }
00687 
00688     if (sample_type & PERF_SAMPLE_TID) {
00689         u_int32_t *p = (u_int32_t *)array;
00690         data.pid = p[0];
00691         data.tid = p[1];
00692         array++;
00693     }
00694 
00695     data.id = ~0ULL;
00696     if (sample_type & PERF_SAMPLE_ID) {
00697         data.id = *array;
00698         array++;
00699     }
00700 
00701     if (sample_type & PERF_SAMPLE_CPU) {
00702         u_int32_t *p = (u_int32_t *)array;
00703         data.cpu = *p;
00704         array++;
00705     }
00706     if (event->header.misc == PERF_RECORD_MISC_KERNEL) {
00707         in_kernel = true;
00708     } else if (event->header.misc == PERF_RECORD_MISC_USER) {
00709         in_kernel = false;
00710     }
00711 #if (defined(__powerpc__) || defined(__powerpc64__))
00712     else if (event->header.misc == PERF_RECORD_MISC_HYPERVISOR) {
00713 #define MAX_HYPERVISOR_ADDRESS 0xfffffffULL
00714         if (data.ip > MAX_HYPERVISOR_ADDRESS) {
00715             cverb << vconvert << "Discarding out-of-range hypervisor sample: "
00716                   << hex << data.ip << endl;
00717             operf_stats[OPERF_LOST_INVALID_HYPERV_ADDR]++;
00718             goto out;
00719         }
00720         in_kernel = false;
00721         if (first_time_processing) {
00722             __map_hypervisor_sample(data.ip, data.pid);
00723         }
00724     }
00725 #endif
00726     else {
00727         // TODO: Unhandled types are the guest kernel and guest user samples.
00728         // We should at least log what we're throwing away.
00729         if (cverb << vconvert) {
00730             const char * domain;
00731             switch (event->header.misc) {
00732             case PERF_RECORD_MISC_HYPERVISOR:
00733                 domain = "hypervisor";
00734                 break;
00735             case PERF_RECORD_MISC_GUEST_KERNEL:
00736                 domain = "guest OS";
00737                 break;
00738             case PERF_RECORD_MISC_GUEST_USER:
00739                 domain = "guest user";
00740                 break;
00741             default:
00742                 domain = "unknown";
00743                 break;
00744             }
00745             cerr << "Discarding sample from " << domain << " domain: "
00746                  << hex << data.ip << endl;
00747         }
00748         goto out;
00749     }
00750 
00751         /* If the static variable trans.tgid is still holding its initial value of 0,
00752          * then we would incorrectly find trans.tgid and data.pid matching, and
00753          * and make wrong assumptions from that match -- ending seg fault.  So we
00754          * will bail out early if we see a sample for PID 0 coming in and trans.image_name
00755          * is NULL (implying the trans object is still in its initial state).
00756          */
00757     if (!trans.image_name && (data.pid == 0)) {
00758         cverb << vconvert << "Discarding sample for PID 0" << endl;
00759         goto out;
00760     }
00761 
00762     if (cverb << vconvert)
00763         cout << "(IP, " <<  event->header.misc << "): " << dec << data.pid << "/"
00764               << data.tid << ": " << hex << (unsigned long long)data.ip
00765               << endl << "\tdata ID: " << data.id << endl;
00766 
00767     // Verify the sample.
00768     trans.event = operfRead.get_eventnum_by_perf_event_id(data.id);
00769     if (trans.event < 0) {
00770         cerr << "Event num " << trans.event << " for id " << data.id
00771              << " is invalid. Skipping sample." << endl;
00772         goto out;
00773     }
00774 
00775     /* Only need to check for "no_user" since "no_kernel" is done by
00776          * perf_events code.
00777          */
00778         if ((operfRead.get_event_by_counter(trans.event)->no_user) &&
00779                         (event->header.misc == PERF_RECORD_MISC_USER)) {
00780                 // Dropping user domain sample by user request in event spec.
00781                 goto out;
00782         }
00783 
00784     if ((event->header.misc == PERF_RECORD_MISC_HYPERVISOR) && first_time_processing) {
00785         /* We defer processing hypervisor samples until all the samples
00786          * are processed.  We do this because we synthesize an mmapping
00787          * for hypervisor samples and need to modify it (start_addr and/or
00788          * end_addr) as new hypervisor samples arrive.  If we completely
00789          * processed the hypervisor samples during "first_time_processing",
00790          * we would end up (usually) with multiple "[hypervisor_bucket]" sample files,
00791          * each with a unique address range.  So we'll stick the event on
00792          * the unresolved_events list to be re-processed later.
00793          */
00794         event_t * ev = (event_t *)xmalloc(event->header.size);
00795         memcpy(ev, event, event->header.size);
00796         unresolved_events.push_back(ev);
00797         if (cverb << vconvert)
00798             cout << "Deferring processing of hypervisor sample." << endl;
00799         goto out;
00800     }
00801     /* Check for the common case first -- i.e., where the current sample is from
00802      * the same context as the previous sample.  For the "no-vmlinux" case, start_addr
00803      * and end_addr will be zero, so need to make sure we detect that.
00804      * The last resort (and most expensive) is to call __get_operf_trans() if the
00805      * sample cannot be matched up with a previous tran object.
00806      */
00807     if (in_kernel) {
00808         if (trans.image_name && trans.tgid == data.pid) {
00809             // For the no-vmlinux case . . .
00810             if ((trans.start_addr == 0ULL) && (trans.end_addr == 0ULL)) {
00811                 trans.pc = data.ip;
00812                 found_trans = true;
00813             // For samples in vmlinux or kernel module
00814             } else if (data.ip >= trans.start_addr && data.ip <= trans.end_addr) {
00815                 trans.pc = data.ip;
00816                 found_trans = true;
00817             }
00818         }
00819     } else if (trans.tgid == data.pid && data.ip >= trans.start_addr && data.ip <= trans.end_addr) {
00820         trans.tid = data.tid;
00821         if (trans.is_anon)
00822             trans.pc = data.ip;
00823         else
00824             trans.pc = data.ip - trans.start_addr;
00825         found_trans = true;
00826     }
00827 
00828     if (!found_trans && __get_operf_trans(&data, hypervisor, in_kernel)) {
00829         trans.current = operf_sfile_find(&trans);
00830         found_trans = true;
00831     }
00832 
00833     /*
00834      * trans.current may be NULL if a kernel sample falls through
00835      * the cracks, or if it's a sample from an anon region we couldn't find
00836      */
00837     if (found_trans && trans.current) {
00838         /* log the sample or arc */
00839         operf_sfile_log_sample(&trans);
00840 
00841         update_trans_last(&trans);
00842         if (sample_type & PERF_SAMPLE_CALLCHAIN)
00843             __handle_callchain(array, &data);
00844         goto done;
00845     }
00846 
00847     if (first_time_processing) {
00848         event_t * ev = (event_t *)xmalloc(event->header.size);
00849         memcpy(ev, event, event->header.size);
00850         unresolved_events.push_back(ev);
00851     }
00852 
00853 out:
00854     clear_trans(&trans);
00855 done:
00856     return;
00857 }
00858 
00859 
00860 /* This function is used by operf_read::convertPerfData() to convert perf-formatted
00861  * data to oprofile sample data files.  After the header information in the perf sample data,
00862  * the next piece of data is typically the PERF_RECORD_COMM record which tells us the name of the
00863  * application/command being profiled.  This is followed by PERF_RECORD_MMAP records
00864  * which indicate what binary executables and libraries were mmap'ed into process memory
00865  * when profiling began.  Additional PERF_RECORD_MMAP records may appear later in the data
00866  * stream (e.g., dlopen for single-process profiling or new process startup for system-wide
00867  * profiling.
00868  */
00869 void OP_perf_utils::op_write_event(event_t * event, u64 sample_type)
00870 {
00871 #if 0
00872     if (event->header.type < PERF_RECORD_MAX) {
00873         cverb << vconvert << "PERF_RECORD type " << hex << event->header.type << endl;
00874     }
00875 #endif
00876 
00877     switch (event->header.type) {
00878     case PERF_RECORD_SAMPLE:
00879         __handle_sample_event(event, sample_type);
00880         return;
00881     case PERF_RECORD_MMAP:
00882         __handle_mmap_event(event);
00883         return;
00884     case PERF_RECORD_COMM:
00885         if (!sfile_init_done) {
00886             operf_sfile_init();
00887             sfile_init_done = true;
00888         }
00889         __handle_comm_event(event);
00890         return;
00891     case PERF_RECORD_FORK:
00892         __handle_fork_event(event);
00893         return;
00894     case PERF_RECORD_THROTTLE:
00895         throttled = true;
00896         return;
00897     case PERF_RECORD_LOST:
00898         operf_stats[OPERF_RECORD_LOST_SAMPLE] += event->lost.lost;
00899         return;
00900     case PERF_RECORD_EXIT:
00901         return;
00902     default:
00903         // OK, ignore all other header types.
00904         cverb << vconvert << "No matching event type for " << hex << event->header.type << endl;
00905         return;
00906     }
00907 }
00908 
00909 void OP_perf_utils::op_reprocess_unresolved_events(u64 sample_type)
00910 {
00911     cverb << vconvert << "Reprocessing samples" << endl;
00912     list<event_t *>::const_iterator it = unresolved_events.begin();
00913     for (; it != unresolved_events.end(); it++) {
00914         event_t * evt = (*it);
00915         // This is just a sanity check, since all events in this list
00916         // are unresolved sample events.
00917         if (evt->header.type == PERF_RECORD_SAMPLE) {
00918             __handle_sample_event(evt, sample_type);
00919             free(evt);
00920         }
00921     }
00922 }
00923 
00924 void OP_perf_utils::op_release_resources(void)
00925 {
00926     map<pid_t, operf_process_info *>::iterator it = process_map.begin();
00927     while (it != process_map.end())
00928         delete it++->second;
00929     process_map.clear();
00930 
00931     multimap<string, struct operf_mmap *>::iterator images_it = all_images_map.begin();
00932     while (images_it != all_images_map.end())
00933         delete images_it++->second;
00934     all_images_map.clear();
00935     delete kernel_mmap;
00936 
00937     operf_sfile_close_files();
00938     operf_free_modules_list();
00939 
00940 }
00941 
00942 void OP_perf_utils::op_perfrecord_sigusr1_handler(int sig __attribute__((unused)),
00943         siginfo_t * siginfo __attribute__((unused)),
00944         void *u_context __attribute__((unused)))
00945 {
00946     quit = true;
00947 }
00948 
00949 void OP_perf_utils::op_perfread_sigusr1_handler(int sig __attribute__((unused)),
00950         siginfo_t * siginfo __attribute__((unused)),
00951         void *u_context __attribute__((unused)))
00952 {
00953     read_quit = true;
00954 }
00955 
00956 int OP_perf_utils::op_read_from_stream(ifstream & is, char * buf, streamsize sz)
00957 {
00958     int rc = 0;
00959     is.read(buf, sz);
00960     if (!is.eof() && is.fail()) {
00961         cerr << "Internal error:  Failed to read from input file." << endl;
00962         rc = -1;
00963     } else {
00964         rc = is.gcount();
00965     }
00966     return rc;
00967 }
00968 
00969 
00970 static int __mmap_trace_file(struct mmap_info & info)
00971 {
00972     int mmap_prot  = PROT_READ;
00973     int mmap_flags = MAP_SHARED;
00974 
00975     info.buf = (char *) mmap(NULL, mmap_size, mmap_prot,
00976                              mmap_flags, info.traceFD, info.offset);
00977     if (info.buf == MAP_FAILED) {
00978         cerr << "Error: mmap failed with errno:\n\t" << strerror(errno) << endl;
00979         return -1;
00980     }
00981     else {
00982         cverb << vconvert << hex << "mmap with the following parameters" << endl
00983               << "\tinfo.head: " << info.head << endl
00984               << "\tinfo.offset: " << info.offset << endl;
00985         return 0;
00986     }
00987 }
00988 
00989 
00990 int OP_perf_utils::op_mmap_trace_file(struct mmap_info & info, bool init)
00991 {
00992     u64 shift;
00993     if (init) {
00994         if (!pg_sz)
00995             pg_sz = sysconf(_SC_PAGESIZE);
00996         if (!mmap_size) {
00997             if (MMAP_WINDOW_SZ > info.file_data_size) {
00998                 mmap_size = info.file_data_size;
00999             } else {
01000                 mmap_size = MMAP_WINDOW_SZ;
01001             }
01002         }
01003         info.offset = 0;
01004         info.head = info.file_data_offset;
01005         shift = pg_sz * (info.head / pg_sz);
01006         info.offset += shift;
01007         info.head -= shift;
01008     }
01009     return __mmap_trace_file(info);
01010 }
01011 
01012 
01013 int OP_perf_utils::op_write_output(int output, void *buf, size_t size)
01014 {
01015     int sum = 0;
01016     while (size) {
01017         int ret = write(output, buf, size);
01018 
01019         if (ret < 0) {
01020             string errmsg = "Internal error:  Failed to write sample data to pipe. errno is ";
01021             errmsg += strerror(errno);
01022             throw runtime_error(errmsg);
01023         }
01024 
01025         size -= ret;
01026         buf = (char *)buf + ret;
01027         sum  += ret;
01028     }
01029     return sum;
01030 }
01031 
01032 
01033 static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, operf_record * pr)
01034 {
01035     char fname[PATH_MAX];
01036     FILE *fp;
01037 
01038     snprintf(fname, sizeof(fname), "/proc/%d/maps", tgid);
01039 
01040     fp = fopen(fname, "r");
01041     if (fp == NULL) {
01042         // Process must have exited already or invalid pid.
01043         cverb << vrecord << "couldn't open " << fname << endl;
01044         return;
01045     }
01046 
01047     while (1) {
01048         char line_buffer[BUFSIZ];
01049         char perms[5], pathname[PATH_MAX], dev[16];
01050         unsigned long long start_addr, end_addr, offset;
01051         u_int32_t inode;
01052 
01053         memset(pathname, '\0', sizeof(pathname));
01054         struct mmap_event mmap;
01055         size_t size;
01056         memset(&mmap, 0, sizeof(mmap));
01057         mmap.pgoff = 0;
01058         mmap.header.type = PERF_RECORD_MMAP;
01059         mmap.header.misc = PERF_RECORD_MISC_USER;
01060 
01061         if (fgets(line_buffer, sizeof(line_buffer), fp) == NULL)
01062             break;
01063 
01064         sscanf(line_buffer, "%llx-%llx %s %llx %s %d %s",
01065                 &start_addr, &end_addr, perms, &offset, dev, &inode, pathname);
01066         if (perms[2] == 'x') {
01067             char *imagename = strchr(pathname, '/');
01068 
01069             if (imagename == NULL)
01070                 imagename = strstr(pathname, "[vdso]");
01071 
01072             if (imagename == NULL)
01073                 continue;
01074 
01075             size = strlen(imagename) + 1;
01076             strcpy(mmap.filename, imagename);
01077             size = align_64bit(size);
01078             mmap.start = start_addr;
01079             mmap.len = end_addr - mmap.start;
01080             mmap.pid = tgid;
01081             mmap.tid = pid;
01082             mmap.header.size = (sizeof(mmap) -
01083                     (sizeof(mmap.filename) - size));
01084             int num = OP_perf_utils::op_write_output(output_fd, &mmap, mmap.header.size);
01085             if (cverb << vrecord)
01086                 cout << "Created MMAP event for " << imagename << endl;
01087             pr->add_to_total(num);
01088         }
01089     }
01090 
01091     fclose(fp);
01092     return;
01093 }
01094 
01095 static int _record_one_process_info(pid_t pid, bool sys_wide, operf_record * pr,
01096                                     int output_fd)
01097 {
01098     struct comm_event comm;
01099     char fname[PATH_MAX];
01100     char buff[BUFSIZ];
01101     FILE *fp;
01102     pid_t tgid = 0;
01103     size_t size = 0;
01104     DIR *tids;
01105     struct dirent dirent, *next;
01106     int ret = 0;
01107 
01108     snprintf(fname, sizeof(fname), "/proc/%d/status", pid);
01109     fp = fopen(fname, "r");
01110     if (fp == NULL) {
01111         /* Process must have finished or invalid PID passed into us.
01112          * If we're doing system-wide profiling, this case can naturally
01113          * occur, and it's not an error.  But if profiling on a single
01114          * application, we can't continue after this, so we'll bail out now.
01115          */
01116         if (!sys_wide) {
01117             cerr << "Unable to find process information for process " << pid << "." << endl;
01118             cverb << vrecord << "couldn't open " << fname << endl;
01119             return -1;
01120         } else {
01121             return 0;
01122         }
01123     }
01124 
01125     memset(&comm, 0, sizeof(comm));
01126     while (!comm.comm[0] || !comm.pid) {
01127         if (fgets(buff, sizeof(buff), fp) == NULL) {
01128             ret = -1;
01129             cverb << vrecord << "Did not find Name or PID field in status file." << endl;
01130             goto out;
01131         }
01132         if (!strncmp(buff, "Name:", 5)) {
01133             char *name = buff + 5;
01134             while (*name && isspace(*name))
01135                 ++name;
01136             size = strlen(name) - 1;
01137             // The "Name" field in /proc/pid/status currently only allows for 16 characters,
01138             // but I'm not going to count on that being stable.  We'll ensure we copy no more
01139             // than 16 chars  since the comm.comm char array only holds 16.
01140             size = size > 16 ? 16 : size;
01141             memcpy(comm.comm, name, size++);
01142         } else if (memcmp(buff, "Tgid:", 5) == 0) {
01143             char *tgids = buff + 5;
01144             while (*tgids && isspace(*tgids))
01145                 ++tgids;
01146             tgid = comm.pid = atoi(tgids);
01147         }
01148     }
01149 
01150     comm.header.type = PERF_RECORD_COMM;
01151     size = align_64bit(size);
01152     comm.header.size = sizeof(comm) - (sizeof(comm.comm) - size);
01153     if (tgid != pid) {
01154         // passed pid must have been a secondary thread
01155         comm.tid = pid;
01156         int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size);
01157         pr->add_to_total(num);
01158         goto out;
01159     }
01160 
01161     snprintf(fname, sizeof(fname), "/proc/%d/task", pid);
01162     tids = opendir(fname);
01163     if (tids == NULL) {
01164         // process must have exited
01165         ret = -1;
01166         cverb << vrecord << "opendir returned NULL" << endl;
01167         goto out;
01168     }
01169 
01170     while (!readdir_r(tids, &dirent, &next) && next) {
01171         char *end;
01172         pid = strtol(dirent.d_name, &end, 10);
01173         if (*end)
01174             continue;
01175 
01176         comm.tid = pid;
01177 
01178         int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size);
01179         pr->add_to_total(num);
01180     }
01181     closedir(tids);
01182     if (cverb << vrecord)
01183         cout << "Created COMM event for " << comm.comm << endl;
01184 
01185 out:
01186     op_record_process_exec_mmaps(pid, tgid, output_fd, pr);
01187 
01188     fclose(fp);
01189     if (ret) {
01190         cverb << vrecord << "couldn't get app name and tgid for pid "
01191               << dec << pid << " from /proc fs." << endl;
01192     }
01193     return ret;
01194 
01195 }
01196 
01197 /* Obtain process information for an active process (where the user has
01198  * passed in a process ID via the --pid option) or all active processes
01199  * (where system_wide==true).  Then generate the necessary PERF_RECORD_COMM
01200  * and PERF_RECORD_MMAP entries into the profile data stream.
01201  */
01202 int OP_perf_utils::op_record_process_info(bool system_wide, pid_t pid, operf_record * pr,
01203                                           int output_fd)
01204 {
01205     int ret = 0;
01206     if (cverb << vrecord)
01207         cout << "op_record_process_info" << endl;
01208     if (!system_wide) {
01209         ret = _record_one_process_info(pid, system_wide, pr, output_fd);
01210     } else {
01211         char buff[BUFSIZ];
01212         pid_t tgid = 0;
01213         size_t size = 0;
01214         DIR *pids;
01215         struct dirent dirent, *next;
01216 
01217         pids = opendir("/proc");
01218         if (pids == NULL) {
01219             cerr << "Unable to open /proc." << endl;
01220             return -1;
01221         }
01222 
01223         while (!readdir_r(pids, &dirent, &next) && next) {
01224             char *end;
01225             pid = strtol(dirent.d_name, &end, 10);
01226             if (((errno == ERANGE && (pid == LONG_MAX || pid == LONG_MIN))
01227                     || (errno != 0 && pid == 0)) || (end == dirent.d_name)) {
01228                 cverb << vmisc << "/proc entry " << dirent.d_name << " is not a PID" << endl;
01229                 continue;
01230             }
01231             if ((ret = _record_one_process_info(pid, system_wide, pr, output_fd)) < 0)
01232                 break;
01233         }
01234         closedir(pids);
01235     }
01236     return ret;
01237 }
01238 
01239 
01240 /*
01241  * each line is in the format:
01242  *
01243  * module_name 16480 1 dependencies Live 0xe091e000
01244  *
01245  * without any blank space in each field
01246  */
01247 static void _record_module_info(int output_fd, operf_record * pr)
01248 {
01249     const char * fname = "/proc/modules";
01250     FILE *fp;
01251     char * line;
01252     struct operf_kernel_image * image;
01253     int module_size;
01254     char ref_count[32+1];
01255     int ret;
01256     char module_name[256+1];
01257     char live_info[32+1];
01258     char dependencies[4096+1];
01259     unsigned long long start_address;
01260 
01261     fp = fopen(fname, "r");
01262     if (fp == NULL) {
01263         cerr << "Error opening /proc/modules. Unable to process module samples" << endl;
01264         cerr << strerror(errno) << endl;
01265         return;
01266     }
01267 
01268     while (1) {
01269         struct mmap_event mmap;
01270         size_t size;
01271         memset(&mmap, 0, sizeof(mmap));
01272         mmap.pgoff = 0;
01273         line = op_get_line(fp);
01274 
01275         if (!line)
01276             break;
01277 
01278         if (line[0] == '\0') {
01279             free(line);
01280             continue;
01281         }
01282 
01283         ret = sscanf(line, "%256s %u %32s %4096s %32s %llx",
01284                  module_name, &module_size, ref_count,
01285                  dependencies, live_info, &start_address);
01286         if (ret != 6) {
01287             cerr << "op_record_kernel_info: Bad /proc/modules entry: \n\t" << line << endl;
01288             free(line);
01289             continue;
01290         }
01291 
01292         mmap.header.type = PERF_RECORD_MMAP;
01293         mmap.header.misc = PERF_RECORD_MISC_KERNEL;
01294         size = strlen(module_name) + 1;
01295         strncpy(mmap.filename, module_name, size);
01296         size = align_64bit(size);
01297         mmap.start = start_address;
01298         mmap.len = module_size;
01299         mmap.pid = 0;
01300         mmap.tid = 0;
01301         mmap.header.size = (sizeof(mmap) -
01302                 (sizeof(mmap.filename) - size));
01303         int num = OP_perf_utils::op_write_output(output_fd, &mmap, mmap.header.size);
01304         if (cverb << vrecord)
01305             cout << "Created MMAP event for " << module_name << ". Size: "
01306                   << module_size << "; start addr: " << start_address << endl;
01307         pr->add_to_total(num);
01308         free(line);
01309     }
01310     fclose(fp);
01311     return;
01312 }
01313 
01314 void OP_perf_utils::op_record_kernel_info(string vmlinux_file, u64 start_addr, u64 end_addr,
01315                                           int output_fd, operf_record * pr)
01316 {
01317     struct mmap_event mmap;
01318     size_t size;
01319     memset(&mmap, 0, sizeof(mmap));
01320     mmap.pgoff = 0;
01321     mmap.header.type = PERF_RECORD_MMAP;
01322     mmap.header.misc = PERF_RECORD_MISC_KERNEL;
01323     if (vmlinux_file.empty()) {
01324         size = strlen( "no_vmlinux") + 1;
01325         strncpy(mmap.filename, "no-vmlinux", size);
01326         mmap.start = 0ULL;
01327         mmap.len = 0ULL;
01328     } else {
01329         size = vmlinux_file.length() + 1;
01330         strncpy(mmap.filename, vmlinux_file.c_str(), size);
01331         mmap.start = start_addr;
01332         mmap.len = end_addr - mmap.start;
01333     }
01334     size = align_64bit(size);
01335     mmap.pid = 0;
01336     mmap.tid = 0;
01337     mmap.header.size = (sizeof(mmap) -
01338             (sizeof(mmap.filename) - size));
01339     int num = op_write_output(output_fd, &mmap, mmap.header.size);
01340     if (cverb << vrecord)
01341         cout << "Created MMAP event of size " << mmap.header.size << " for " <<mmap.filename << ". length: "
01342              << hex << mmap.len << "; start addr: " << mmap.start << endl;
01343     pr->add_to_total(num);
01344     _record_module_info(output_fd, pr);
01345 }
01346 
01347 void OP_perf_utils::op_get_kernel_event_data(struct mmap_data *md, operf_record * pr)
01348 {
01349     struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)md->base;
01350     int out_fd = pr->out_fd();
01351 
01352     uint64_t head = pc->data_head;
01353     // Comment in perf_event.h says "User-space reading the @data_head value should issue
01354     // an rmb(), on SMP capable platforms, after reading this value."
01355     rmb();
01356 
01357     uint64_t old = md->prev;
01358     unsigned char *data = ((unsigned char *)md->base) + pagesize;
01359     uint64_t size;
01360     void *buf;
01361     int64_t diff;
01362 
01363     diff = head - old;
01364     if (diff < 0) {
01365         throw runtime_error("ERROR: event buffer wrapped, which should NEVER happen.");
01366     }
01367 
01368     if (old != head)
01369         sample_reads++;
01370 
01371     size = head - old;
01372 
01373     if ((old & md->mask) + size != (head & md->mask)) {
01374         buf = &data[old & md->mask];
01375         size = md->mask + 1 - (old & md->mask);
01376         old += size;
01377         pr->add_to_total(op_write_output(out_fd, buf, size));
01378     }
01379 
01380     buf = &data[old & md->mask];
01381     size = head - old;
01382     old += size;
01383     pr->add_to_total(op_write_output(out_fd, buf, size));
01384     md->prev = old;
01385     pc->data_tail = old;
01386 }
01387 
01388 
01389 int OP_perf_utils::op_get_next_online_cpu(DIR * dir, struct dirent *entry)
01390 {
01391 #define OFFLINE 0x30
01392     unsigned int cpu_num;
01393     char cpu_online_pathname[40];
01394     int res;
01395     FILE * online;
01396     again:
01397     do {
01398         entry = readdir(dir);
01399         if (!entry)
01400             return -1;
01401     } while (entry->d_type != DT_DIR);
01402 
01403     res = sscanf(entry->d_name, "cpu%u", &cpu_num);
01404     if (res <= 0)
01405         goto again;
01406 
01407     errno = 0;
01408     snprintf(cpu_online_pathname, 40, "/sys/devices/system/cpu/cpu%u/online", cpu_num);
01409     if ((online = fopen(cpu_online_pathname, "r")) == NULL) {
01410         cerr << "Unable to open " << cpu_online_pathname << endl;
01411         if (errno)
01412             cerr << strerror(errno) << endl;
01413         return -1;
01414     }
01415     res = fgetc(online);
01416     fclose(online);
01417     if (res == OFFLINE)
01418         goto again;
01419     else
01420         return cpu_num;
01421 }

Generated on 8 Nov 2012 for Oprofile by  doxygen 1.6.1