00001
00017 #include <errno.h>
00018 #include <dirent.h>
00019 #include <stdlib.h>
00020 #include <stdint.h>
00021 #include <string.h>
00022 #include <fcntl.h>
00023 #include <cverb.h>
00024 #include <iostream>
00025 #include "operf_counter.h"
00026 #include "operf_utils.h"
00027 #ifdef HAVE_LIBPFM
00028 #include <perfmon/pfmlib.h>
00029 #endif
00030 #include "op_types.h"
00031 #include "operf_process_info.h"
00032 #include "file_manip.h"
00033 #include "operf_kernel.h"
00034 #include "operf_sfile.h"
00035 #include "op_fileio.h"
00036 #include "op_libiberty.h"
00037 #include "operf_stats.h"
00038
00039
00040 extern verbose vmisc;
00041 extern volatile bool quit;
00042 extern volatile bool read_quit;
00043 extern operf_read operfRead;
00044 extern int sample_reads;
00045 extern unsigned int pagesize;
00046 extern char * app_name;
00047 extern pid_t app_PID;
00048 extern verbose vrecord;
00049 extern verbose vconvert;
00050
00051 using namespace std;
00052
00053 map<pid_t, operf_process_info *> process_map;
00054 multimap<string, struct operf_mmap *> all_images_map;
00055 map<u64, struct operf_mmap *> kernel_modules;
00056 struct operf_mmap * kernel_mmap;
00057 bool first_time_processing;
00058 bool throttled;
00059 size_t mmap_size;
00060 size_t pg_sz;
00061
00062 static list<event_t *> unresolved_events;
00063 static struct operf_transient trans;
00064 static bool sfile_init_done;
00065
00066
00067
00068
00069
00070
00071
00072
00073 #ifdef _TEST_DEFERRED_MAPPING
00074 static bool do_comm_event;
00075 static event_t comm_event;
00076 #endif
00077
00078
00079
00080
00081
00082
00083
00084
00085 #if (defined(__powerpc__) || defined(__powerpc64__))
00086 #define NIL_CODE ~0U
00087
00088 #if HAVE_LIBPFM3
00089 static bool _get_codes_for_match(unsigned int pfm_idx, const char name[],
00090 vector<operf_event_t> * evt_vec)
00091 {
00092 unsigned int num_events = evt_vec->size();
00093 int tmp_code, ret;
00094 char evt_name[OP_MAX_EVT_NAME_LEN];
00095 char * grp_name;
00096 unsigned int events_converted = 0;
00097 for (unsigned int i = 0; i < num_events; i++) {
00098 operf_event_t event = (*evt_vec)[i];
00099 if (event.evt_code != NIL_CODE) {
00100 events_converted++;
00101 continue;
00102 }
00103 memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
00104 if (!strcmp(event.name, "CYCLES")) {
00105 strcpy(evt_name ,"PM_CYC") ;
00106 } else if ((grp_name = strstr(event.name, "_GRP"))) {
00107 strncpy(evt_name, event.name, grp_name - event.name);
00108 } else {
00109 strncpy(evt_name, event.name, strlen(event.name));
00110 }
00111 if (strncmp(name, evt_name, OP_MAX_EVT_NAME_LEN))
00112 continue;
00113 ret = pfm_get_event_code(pfm_idx, &tmp_code);
00114 if (ret != PFMLIB_SUCCESS) {
00115 string evt_name_str = event.name;
00116 string msg = "libpfm cannot find event code for " + evt_name_str +
00117 "; cannot continue";
00118 throw runtime_error(msg);
00119 }
00120 event.evt_code = tmp_code;
00121 (*evt_vec)[i] = event;
00122 events_converted++;
00123 cverb << vrecord << "Successfully converted " << event.name << " to perf_event code "
00124 << hex << tmp_code << endl;
00125 }
00126 return (events_converted == num_events);
00127 }
00128 #else
00129 static bool _op_get_event_codes(vector<operf_event_t> * evt_vec)
00130 {
00131 int ret, i;
00132 unsigned int num_events = evt_vec->size();
00133 char evt_name[OP_MAX_EVT_NAME_LEN];
00134 char * grp_name;
00135 unsigned int events_converted = 0;
00136 uint64_t code[1];
00137
00138 typedef struct {
00139 uint64_t *codes;
00140 char **fstr;
00141 size_t size;
00142 int count;
00143 int idx;
00144 } pfm_raw_pmu_encode_t;
00145
00146 pfm_raw_pmu_encode_t raw;
00147 raw.codes = code;
00148 raw.count = 1;
00149 raw.fstr = NULL;
00150
00151 if (pfm_initialize() != PFM_SUCCESS)
00152 throw runtime_error("Unable to initialize libpfm; cannot continue");
00153
00154 for (unsigned int i = 0; i < num_events; i++) {
00155 operf_event_t event = (*evt_vec)[i];
00156 memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
00157 if (!strcmp(event.name, "CYCLES")) {
00158 strcpy(evt_name ,"PM_CYC") ;
00159 } else if ((grp_name = strstr(event.name, "_GRP"))) {
00160 strncpy(evt_name, event.name, grp_name - event.name);
00161 } else {
00162 strncpy(evt_name, event.name, strlen(event.name));
00163 }
00164
00165 memset(&raw, 0, sizeof(raw));
00166 ret = pfm_get_os_event_encoding(evt_name, PFM_PLM3, PFM_OS_NONE, &raw);
00167 if (ret != PFM_SUCCESS) {
00168 string evt_name_str = event.name;
00169 string msg = "libpfm cannot find event code for " + evt_name_str +
00170 "; cannot continue";
00171 throw runtime_error(msg);
00172 }
00173
00174 event.evt_code = raw.codes[0];
00175 (*evt_vec)[i] = event;
00176 events_converted++;
00177 cverb << vrecord << "Successfully converted " << event.name << " to perf_event code "
00178 << hex << event.evt_code << endl;
00179 }
00180 return (events_converted == num_events);
00181 }
00182 #endif
00183
00184 bool OP_perf_utils::op_convert_event_vals(vector<operf_event_t> * evt_vec)
00185 {
00186 unsigned int i, count;
00187 char name[256];
00188 int ret;
00189 for (unsigned int i = 0; i < evt_vec->size(); i++) {
00190 operf_event_t event = (*evt_vec)[i];
00191 event.evt_code = NIL_CODE;
00192 (*evt_vec)[i] = event;
00193 }
00194
00195 #if HAVE_LIBPFM3
00196 if (pfm_initialize() != PFMLIB_SUCCESS)
00197 throw runtime_error("Unable to initialize libpfm; cannot continue");
00198
00199 ret = pfm_get_num_events(&count);
00200 if (ret != PFMLIB_SUCCESS)
00201 throw runtime_error("Unable to use libpfm to obtain event code; cannot continue");
00202 for(i =0 ; i < count; i++)
00203 {
00204 ret = pfm_get_event_name(i, name, 256);
00205 if (ret != PFMLIB_SUCCESS)
00206 continue;
00207 if (_get_codes_for_match(i, name, evt_vec))
00208 break;
00209 }
00210 return (i != count);
00211 #else
00212 return _op_get_event_codes(evt_vec);
00213 #endif
00214 }
00215
00216 #endif
00217
00218
00219 static inline void update_trans_last(struct operf_transient * trans)
00220 {
00221 trans->last = trans->current;
00222 trans->last_pc = trans->pc;
00223 }
00224
00225 static inline void clear_trans(struct operf_transient * trans)
00226 {
00227 trans->tgid = ~0U;
00228 trans->cur_procinfo = NULL;
00229 }
00230
00231 static void __handle_fork_event(event_t * event)
00232 {
00233 if (cverb << vconvert)
00234 cout << "PERF_RECORD_FORK for tgid/tid = " << event->fork.pid
00235 << "/" << event->fork.tid << endl;
00236
00237 map<pid_t, operf_process_info *>::iterator it;
00238 operf_process_info * parent = NULL;
00239 operf_process_info * forked_proc = NULL;
00240
00241 it = process_map.find(event->fork.ppid);
00242 if (it != process_map.end()) {
00243 parent = it->second;
00244 } else {
00245
00246
00247 parent = new operf_process_info(event->fork.ppid, app_name ? app_name : NULL,
00248 app_name != NULL, false);
00249 if (cverb << vconvert)
00250 cout << "Adding new proc info to collection for PID " << event->fork.ppid << endl;
00251 process_map[event->fork.ppid] = parent;
00252 }
00253
00254 it = process_map.find(event->fork.pid);
00255 if (it == process_map.end()) {
00256 forked_proc = new operf_process_info(event->fork.pid,
00257 parent->get_app_name().c_str(),
00258 parent->is_appname_valid(), parent->is_valid());
00259 if (cverb << vconvert)
00260 cout << "Adding new proc info to collection for PID " << event->fork.pid << endl;
00261 process_map[event->fork.pid] = forked_proc;
00262 forked_proc->connect_forked_process_to_parent(parent);
00263 parent->add_forked_pid_association(forked_proc);
00264 if (cverb << vconvert)
00265 cout << "Connecting forked proc " << event->fork.pid << " to parent" << endl;
00266 } else {
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285 forked_proc = it->second;
00286 if (!forked_proc->is_valid()) {
00287 forked_proc->connect_forked_process_to_parent(parent);
00288 parent->add_forked_pid_association(forked_proc);
00289 if (cverb << vconvert)
00290 cout << "Connecting existing incomplete forked proc " << event->fork.pid
00291 << " to parent" << endl;
00292 }
00293 }
00294 }
00295
00296 static void __handle_comm_event(event_t * event)
00297 {
00298 #ifdef _TEST_DEFERRED_MAPPING
00299 if (!do_comm_event) {
00300 comm_event = event;
00301 return;
00302 }
00303 #endif
00304 if (cverb << vconvert)
00305 cout << "PERF_RECORD_COMM for " << event->comm.comm << ", tgid/tid = "
00306 << event->comm.pid << "/" << event->comm.tid << endl;
00307
00308 map<pid_t, operf_process_info *>::iterator it;
00309 it = process_map.find(event->comm.pid);
00310 if (it == process_map.end()) {
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323 const char * appname_arg;
00324 bool is_complete_appname;
00325 if (app_name && (app_PID == event->comm.pid)) {
00326 appname_arg = app_name;
00327 is_complete_appname = true;
00328 } else {
00329 appname_arg = event->comm.comm;
00330 is_complete_appname = false;
00331 }
00332 operf_process_info * proc = new operf_process_info(event->comm.pid,appname_arg,
00333 is_complete_appname, true);
00334 if (cverb << vconvert)
00335 cout << "Adding new proc info to collection for PID " << event->comm.pid << endl;
00336 process_map[event->comm.pid] = proc;
00337 } else {
00338 if (it->second->is_valid()) {
00339 if (it->second->is_forked()) {
00340
00341
00342
00343
00344
00345
00346
00347 if (cverb << vconvert)
00348 cout << "Disassociating forked proc " << event->comm.pid
00349 << " from parent" << endl;
00350 it->second->disassociate_from_parent(event->comm.comm);
00351 } else {
00352 if (cverb << vconvert)
00353 cout << "Received extraneous COMM event for " << event->comm.comm
00354 << ", PID " << event->comm.pid << endl;
00355 }
00356 } else {
00357 if (cverb << vconvert)
00358 cout << "Processing deferred mappings" << endl;
00359 it->second->process_deferred_mappings(event->comm.comm);
00360 }
00361 }
00362 }
00363
00364 static void __handle_mmap_event(event_t * event)
00365 {
00366 static bool kptr_restrict_warning_displayed_already = false;
00367 string image_basename = op_basename(event->mmap.filename);
00368 struct operf_mmap * mapping = NULL;
00369 multimap<string, struct operf_mmap *>::iterator it;
00370 pair<multimap<string, struct operf_mmap *>::iterator,
00371 multimap<string, struct operf_mmap *>::iterator> range;
00372
00373 range = all_images_map.equal_range(image_basename);
00374 for (it = range.first; it != range.second; it++) {
00375 if (((strcmp((*it).second->filename, image_basename.c_str())) == 0)
00376 && ((*it).second->start_addr == event->mmap.start)) {
00377 mapping = (*it).second;
00378 break;
00379 }
00380 }
00381 if (!mapping) {
00382 mapping = new struct operf_mmap;
00383 memset(mapping, 0, sizeof(struct operf_mmap));
00384 mapping->start_addr = event->mmap.start;
00385 strcpy(mapping->filename, event->mmap.filename);
00386
00387
00388
00389
00390 if (mapping->filename[0] == '[') {
00391 mapping->is_anon_mapping = true;
00392 } else if ((strncmp(mapping->filename, "//anon",
00393 strlen("//anon")) == 0)) {
00394 mapping->is_anon_mapping = true;
00395 strcpy(mapping->filename, "anon");
00396 }
00397 mapping->end_addr = (event->mmap.len == 0ULL)? 0ULL : mapping->start_addr + event->mmap.len - 1;
00398 mapping->pgoff = event->mmap.pgoff;
00399
00400 if (cverb << vconvert) {
00401 cout << "PERF_RECORD_MMAP for " << event->mmap.filename << endl;
00402 cout << "\tstart_addr: " << hex << mapping->start_addr;
00403 cout << "; end addr: " << mapping->end_addr << endl;
00404 }
00405
00406 if (event->header.misc & PERF_RECORD_MISC_USER)
00407 all_images_map.insert(pair<string, struct operf_mmap *>(image_basename, mapping));
00408 }
00409
00410 if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
00411 if (!strncmp(mapping->filename, operf_get_vmlinux_name(),
00412 strlen(mapping->filename))) {
00413
00414
00415
00416
00417
00418 kernel_mmap = mapping;
00419 } else {
00420 if ((kptr_restrict == 1) && !no_vmlinux && (my_uid != 0)) {
00421 if (!kptr_restrict_warning_displayed_already) {
00422 kptr_restrict_warning_displayed_already = true;
00423 cerr << endl << "< < < WARNING > > >" << endl;
00424 cerr << "Samples for vmlinux kernel will be recorded, but kernel module profiling"
00425 << endl << "is not possible with current system config." << endl;
00426 cerr << "Set /proc/sys/kernel/kptr_restrict to 0 to see samples for kernel modules."
00427 << endl << "< < < < < > > > > >" << endl << endl;
00428 }
00429 } else {
00430 operf_create_module(mapping->filename,
00431 mapping->start_addr,
00432 mapping->end_addr);
00433 kernel_modules[mapping->start_addr] = mapping;
00434 }
00435 }
00436 } else {
00437 map<pid_t, operf_process_info *>::iterator it;
00438 it = process_map.find(event->mmap.pid);
00439 if (it == process_map.end()) {
00440
00441
00442
00443
00444
00445
00446
00447
00448 const char * appname_arg;
00449 bool is_complete_appname;
00450 if (app_name && (app_PID == event->mmap.pid)) {
00451 appname_arg = app_name;
00452 is_complete_appname = true;
00453 } else {
00454 appname_arg = NULL;
00455 is_complete_appname = false;
00456 }
00457
00458 operf_process_info * proc = new operf_process_info(event->mmap.pid, appname_arg,
00459 is_complete_appname, false);
00460 proc->add_deferred_mapping(mapping);
00461 if (cverb << vconvert)
00462 cout << "Added deferred mapping " << event->mmap.filename
00463 << " for new process_info object" << endl;
00464 process_map[event->mmap.pid] = proc;
00465 #ifdef _TEST_DEFERRED_MAPPING
00466 if (!do_comm_event) {
00467 do_comm_event = true;
00468 __handle_comm_event(comm_event, out);
00469 }
00470 #endif
00471 } else if (!it->second->is_valid()) {
00472 it->second->add_deferred_mapping(mapping);
00473 if (cverb << vconvert)
00474 cout << "Added deferred mapping " << event->mmap.filename
00475 << " for existing but incomplete process_info object" << endl;
00476 } else {
00477 if (cverb << vconvert)
00478 cout << "Process mapping for " << event->mmap.filename << " on behalf of "
00479 << event->mmap.pid << endl;
00480 it->second->process_new_mapping(mapping);
00481 }
00482 }
00483 }
00484
00485 static struct operf_transient * __get_operf_trans(struct sample_data * data, bool hypervisor_domain,
00486 bool kernel_mode)
00487 {
00488 operf_process_info * proc = NULL;
00489 const struct operf_mmap * op_mmap = NULL;
00490 struct operf_transient * retval = NULL;
00491
00492 if (trans.tgid == data->pid) {
00493 proc = trans.cur_procinfo;
00494 if (cverb << vconvert)
00495 cout << "trans.tgid == data->pid : " << data->pid << endl;
00496
00497 } else {
00498
00499 std::map<pid_t, operf_process_info *>::const_iterator it = process_map.find(data->pid);
00500 if (it != process_map.end() && (it->second->is_appname_valid())) {
00501 proc = it->second;
00502 } else {
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514 if ((cverb << vconvert) && !first_time_processing) {
00515 cerr << "Dropping sample -- process info unavailable" << endl;
00516 if (kernel_mode)
00517 operf_stats[OPERF_NO_APP_KERNEL_SAMPLE]++;
00518 else
00519 operf_stats[OPERF_NO_APP_USER_SAMPLE]++;
00520 }
00521 goto out;
00522 }
00523 }
00524
00525
00526
00527 if (kernel_mode) {
00528 if (data->ip >= kernel_mmap->start_addr &&
00529 data->ip <= kernel_mmap->end_addr) {
00530 op_mmap = kernel_mmap;
00531 } else {
00532 map<u64, struct operf_mmap *>::iterator it;
00533 it = kernel_modules.begin();
00534 while (it != kernel_modules.end()) {
00535 if (data->ip >= it->second->start_addr &&
00536 data->ip <= it->second->end_addr) {
00537 op_mmap = it->second;
00538 break;
00539 }
00540 it++;
00541 }
00542 } if (!op_mmap) {
00543 if ((kernel_mmap->start_addr == 0ULL) &&
00544 (kernel_mmap->end_addr == 0ULL))
00545 op_mmap = kernel_mmap;
00546 }
00547 if (!op_mmap) {
00548
00549
00550
00551
00552 }
00553 } else {
00554 op_mmap = proc->find_mapping_for_sample(data->ip);
00555 if (op_mmap && op_mmap->is_hypervisor && !hypervisor_domain) {
00556 cverb << vconvert << "Invalid sample: Address falls within hypervisor address range, but is not a hypervisor domain sample." << endl;
00557 operf_stats[OPERF_INVALID_CTX]++;
00558 op_mmap = NULL;
00559 }
00560 }
00561 if (op_mmap) {
00562 if (cverb << vconvert)
00563 cout << "Found mmap for sample; image_name is " << op_mmap->filename <<
00564 " and app name is " << proc->get_app_name() << endl;
00565 trans.image_name = op_mmap->filename;
00566 trans.app_filename = proc->get_app_name().c_str();
00567 trans.image_len = strlen(trans.image_name);
00568 trans.app_len = strlen(trans.app_filename);
00569 trans.start_addr = op_mmap->start_addr;
00570 trans.end_addr = op_mmap->end_addr;
00571 trans.tgid = data->pid;
00572 trans.tid = data->tid;
00573 trans.cur_procinfo = proc;
00574 trans.cpu = data->cpu;
00575 trans.is_anon = op_mmap->is_anon_mapping;
00576 trans.in_kernel = kernel_mode;
00577 if (trans.in_kernel || trans.is_anon)
00578 trans.pc = data->ip;
00579 else
00580 trans.pc = data->ip - trans.start_addr;
00581
00582 trans.sample_id = data->id;
00583 retval = &trans;
00584 } else {
00585 if ((cverb << vconvert) && !first_time_processing) {
00586 string domain = trans.in_kernel ? "kernel" : "userspace";
00587 cerr << "Discarding " << domain << " sample for process " << data->pid
00588 << " where no appropriate mapping was found. (pc=0x"
00589 << hex << data->ip <<")" << endl;
00590 operf_stats[OPERF_LOST_NO_MAPPING]++;
00591 }
00592 retval = NULL;
00593 }
00594 out:
00595 return retval;
00596 }
00597
00598 static void __handle_callchain(u64 * array, struct sample_data * data)
00599 {
00600 bool in_kernel = false;
00601 data->callchain = (struct ip_callchain *) array;
00602 if (data->callchain->nr) {
00603 if (cverb << vconvert)
00604 cout << "Processing callchain" << endl;
00605 for (int i = 0; i < data->callchain->nr; i++) {
00606 data->ip = data->callchain->ips[i];
00607 if (data->ip >= PERF_CONTEXT_MAX) {
00608 switch (data->ip) {
00609 case PERF_CONTEXT_HV:
00610
00611
00612 break;
00613 case PERF_CONTEXT_KERNEL:
00614 in_kernel = true;
00615 break;
00616 case PERF_CONTEXT_USER:
00617 in_kernel = false;
00618 break;
00619 default:
00620 break;
00621 }
00622 continue;
00623 }
00624 if (data->ip && __get_operf_trans(data, false, in_kernel)) {
00625 if ((trans.current = operf_sfile_find(&trans))) {
00626 operf_sfile_log_arc(&trans);
00627 update_trans_last(&trans);
00628 }
00629 } else {
00630 if (data->ip)
00631 operf_stats[OPERF_BT_LOST_NO_MAPPING]++;
00632 }
00633 }
00634 }
00635 }
00636
00637 static void __map_hypervisor_sample(u64 ip, u32 pid)
00638 {
00639 operf_process_info * proc;
00640 map<pid_t, operf_process_info *>::iterator it;
00641 it = process_map.find(pid);
00642 if (it == process_map.end()) {
00643
00644
00645
00646
00647
00648
00649
00650
00651 const char * appname_arg;
00652 bool is_complete_appname;
00653 if (app_name && (app_PID == pid)) {
00654 appname_arg = app_name;
00655 is_complete_appname = true;
00656 } else {
00657 appname_arg = NULL;
00658 is_complete_appname = false;
00659 }
00660
00661 proc = new operf_process_info(pid, appname_arg,
00662 is_complete_appname, false);
00663
00664 if (cverb << vconvert)
00665 cout << "Adding new proc info to collection for PID " << pid << endl;
00666 process_map[pid] = proc;
00667
00668 } else {
00669 proc = it->second;
00670 }
00671 proc->process_hypervisor_mapping(ip);
00672 }
00673
00674 static void __handle_sample_event(event_t * event, u64 sample_type)
00675 {
00676 struct sample_data data;
00677 bool found_trans = false;
00678 bool in_kernel;
00679 const struct operf_mmap * op_mmap = NULL;
00680 bool hypervisor = (event->header.misc == PERF_RECORD_MISC_HYPERVISOR);
00681 u64 *array = event->sample.array;
00682
00683 if (sample_type & PERF_SAMPLE_IP) {
00684 data.ip = event->ip.ip;
00685 array++;
00686 }
00687
00688 if (sample_type & PERF_SAMPLE_TID) {
00689 u_int32_t *p = (u_int32_t *)array;
00690 data.pid = p[0];
00691 data.tid = p[1];
00692 array++;
00693 }
00694
00695 data.id = ~0ULL;
00696 if (sample_type & PERF_SAMPLE_ID) {
00697 data.id = *array;
00698 array++;
00699 }
00700
00701 if (sample_type & PERF_SAMPLE_CPU) {
00702 u_int32_t *p = (u_int32_t *)array;
00703 data.cpu = *p;
00704 array++;
00705 }
00706 if (event->header.misc == PERF_RECORD_MISC_KERNEL) {
00707 in_kernel = true;
00708 } else if (event->header.misc == PERF_RECORD_MISC_USER) {
00709 in_kernel = false;
00710 }
00711 #if (defined(__powerpc__) || defined(__powerpc64__))
00712 else if (event->header.misc == PERF_RECORD_MISC_HYPERVISOR) {
00713 #define MAX_HYPERVISOR_ADDRESS 0xfffffffULL
00714 if (data.ip > MAX_HYPERVISOR_ADDRESS) {
00715 cverb << vconvert << "Discarding out-of-range hypervisor sample: "
00716 << hex << data.ip << endl;
00717 operf_stats[OPERF_LOST_INVALID_HYPERV_ADDR]++;
00718 goto out;
00719 }
00720 in_kernel = false;
00721 if (first_time_processing) {
00722 __map_hypervisor_sample(data.ip, data.pid);
00723 }
00724 }
00725 #endif
00726 else {
00727
00728
00729 if (cverb << vconvert) {
00730 const char * domain;
00731 switch (event->header.misc) {
00732 case PERF_RECORD_MISC_HYPERVISOR:
00733 domain = "hypervisor";
00734 break;
00735 case PERF_RECORD_MISC_GUEST_KERNEL:
00736 domain = "guest OS";
00737 break;
00738 case PERF_RECORD_MISC_GUEST_USER:
00739 domain = "guest user";
00740 break;
00741 default:
00742 domain = "unknown";
00743 break;
00744 }
00745 cerr << "Discarding sample from " << domain << " domain: "
00746 << hex << data.ip << endl;
00747 }
00748 goto out;
00749 }
00750
00751
00752
00753
00754
00755
00756
00757 if (!trans.image_name && (data.pid == 0)) {
00758 cverb << vconvert << "Discarding sample for PID 0" << endl;
00759 goto out;
00760 }
00761
00762 if (cverb << vconvert)
00763 cout << "(IP, " << event->header.misc << "): " << dec << data.pid << "/"
00764 << data.tid << ": " << hex << (unsigned long long)data.ip
00765 << endl << "\tdata ID: " << data.id << endl;
00766
00767
00768 trans.event = operfRead.get_eventnum_by_perf_event_id(data.id);
00769 if (trans.event < 0) {
00770 cerr << "Event num " << trans.event << " for id " << data.id
00771 << " is invalid. Skipping sample." << endl;
00772 goto out;
00773 }
00774
00775
00776
00777
00778 if ((operfRead.get_event_by_counter(trans.event)->no_user) &&
00779 (event->header.misc == PERF_RECORD_MISC_USER)) {
00780
00781 goto out;
00782 }
00783
00784 if ((event->header.misc == PERF_RECORD_MISC_HYPERVISOR) && first_time_processing) {
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794 event_t * ev = (event_t *)xmalloc(event->header.size);
00795 memcpy(ev, event, event->header.size);
00796 unresolved_events.push_back(ev);
00797 if (cverb << vconvert)
00798 cout << "Deferring processing of hypervisor sample." << endl;
00799 goto out;
00800 }
00801
00802
00803
00804
00805
00806
00807 if (in_kernel) {
00808 if (trans.image_name && trans.tgid == data.pid) {
00809
00810 if ((trans.start_addr == 0ULL) && (trans.end_addr == 0ULL)) {
00811 trans.pc = data.ip;
00812 found_trans = true;
00813
00814 } else if (data.ip >= trans.start_addr && data.ip <= trans.end_addr) {
00815 trans.pc = data.ip;
00816 found_trans = true;
00817 }
00818 }
00819 } else if (trans.tgid == data.pid && data.ip >= trans.start_addr && data.ip <= trans.end_addr) {
00820 trans.tid = data.tid;
00821 if (trans.is_anon)
00822 trans.pc = data.ip;
00823 else
00824 trans.pc = data.ip - trans.start_addr;
00825 found_trans = true;
00826 }
00827
00828 if (!found_trans && __get_operf_trans(&data, hypervisor, in_kernel)) {
00829 trans.current = operf_sfile_find(&trans);
00830 found_trans = true;
00831 }
00832
00833
00834
00835
00836
00837 if (found_trans && trans.current) {
00838
00839 operf_sfile_log_sample(&trans);
00840
00841 update_trans_last(&trans);
00842 if (sample_type & PERF_SAMPLE_CALLCHAIN)
00843 __handle_callchain(array, &data);
00844 goto done;
00845 }
00846
00847 if (first_time_processing) {
00848 event_t * ev = (event_t *)xmalloc(event->header.size);
00849 memcpy(ev, event, event->header.size);
00850 unresolved_events.push_back(ev);
00851 }
00852
00853 out:
00854 clear_trans(&trans);
00855 done:
00856 return;
00857 }
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869 void OP_perf_utils::op_write_event(event_t * event, u64 sample_type)
00870 {
00871 #if 0
00872 if (event->header.type < PERF_RECORD_MAX) {
00873 cverb << vconvert << "PERF_RECORD type " << hex << event->header.type << endl;
00874 }
00875 #endif
00876
00877 switch (event->header.type) {
00878 case PERF_RECORD_SAMPLE:
00879 __handle_sample_event(event, sample_type);
00880 return;
00881 case PERF_RECORD_MMAP:
00882 __handle_mmap_event(event);
00883 return;
00884 case PERF_RECORD_COMM:
00885 if (!sfile_init_done) {
00886 operf_sfile_init();
00887 sfile_init_done = true;
00888 }
00889 __handle_comm_event(event);
00890 return;
00891 case PERF_RECORD_FORK:
00892 __handle_fork_event(event);
00893 return;
00894 case PERF_RECORD_THROTTLE:
00895 throttled = true;
00896 return;
00897 case PERF_RECORD_LOST:
00898 operf_stats[OPERF_RECORD_LOST_SAMPLE] += event->lost.lost;
00899 return;
00900 case PERF_RECORD_EXIT:
00901 return;
00902 default:
00903
00904 cverb << vconvert << "No matching event type for " << hex << event->header.type << endl;
00905 return;
00906 }
00907 }
00908
00909 void OP_perf_utils::op_reprocess_unresolved_events(u64 sample_type)
00910 {
00911 cverb << vconvert << "Reprocessing samples" << endl;
00912 list<event_t *>::const_iterator it = unresolved_events.begin();
00913 for (; it != unresolved_events.end(); it++) {
00914 event_t * evt = (*it);
00915
00916
00917 if (evt->header.type == PERF_RECORD_SAMPLE) {
00918 __handle_sample_event(evt, sample_type);
00919 free(evt);
00920 }
00921 }
00922 }
00923
00924 void OP_perf_utils::op_release_resources(void)
00925 {
00926 map<pid_t, operf_process_info *>::iterator it = process_map.begin();
00927 while (it != process_map.end())
00928 delete it++->second;
00929 process_map.clear();
00930
00931 multimap<string, struct operf_mmap *>::iterator images_it = all_images_map.begin();
00932 while (images_it != all_images_map.end())
00933 delete images_it++->second;
00934 all_images_map.clear();
00935 delete kernel_mmap;
00936
00937 operf_sfile_close_files();
00938 operf_free_modules_list();
00939
00940 }
00941
00942 void OP_perf_utils::op_perfrecord_sigusr1_handler(int sig __attribute__((unused)),
00943 siginfo_t * siginfo __attribute__((unused)),
00944 void *u_context __attribute__((unused)))
00945 {
00946 quit = true;
00947 }
00948
00949 void OP_perf_utils::op_perfread_sigusr1_handler(int sig __attribute__((unused)),
00950 siginfo_t * siginfo __attribute__((unused)),
00951 void *u_context __attribute__((unused)))
00952 {
00953 read_quit = true;
00954 }
00955
00956 int OP_perf_utils::op_read_from_stream(ifstream & is, char * buf, streamsize sz)
00957 {
00958 int rc = 0;
00959 is.read(buf, sz);
00960 if (!is.eof() && is.fail()) {
00961 cerr << "Internal error: Failed to read from input file." << endl;
00962 rc = -1;
00963 } else {
00964 rc = is.gcount();
00965 }
00966 return rc;
00967 }
00968
00969
00970 static int __mmap_trace_file(struct mmap_info & info)
00971 {
00972 int mmap_prot = PROT_READ;
00973 int mmap_flags = MAP_SHARED;
00974
00975 info.buf = (char *) mmap(NULL, mmap_size, mmap_prot,
00976 mmap_flags, info.traceFD, info.offset);
00977 if (info.buf == MAP_FAILED) {
00978 cerr << "Error: mmap failed with errno:\n\t" << strerror(errno) << endl;
00979 return -1;
00980 }
00981 else {
00982 cverb << vconvert << hex << "mmap with the following parameters" << endl
00983 << "\tinfo.head: " << info.head << endl
00984 << "\tinfo.offset: " << info.offset << endl;
00985 return 0;
00986 }
00987 }
00988
00989
00990 int OP_perf_utils::op_mmap_trace_file(struct mmap_info & info, bool init)
00991 {
00992 u64 shift;
00993 if (init) {
00994 if (!pg_sz)
00995 pg_sz = sysconf(_SC_PAGESIZE);
00996 if (!mmap_size) {
00997 if (MMAP_WINDOW_SZ > info.file_data_size) {
00998 mmap_size = info.file_data_size;
00999 } else {
01000 mmap_size = MMAP_WINDOW_SZ;
01001 }
01002 }
01003 info.offset = 0;
01004 info.head = info.file_data_offset;
01005 shift = pg_sz * (info.head / pg_sz);
01006 info.offset += shift;
01007 info.head -= shift;
01008 }
01009 return __mmap_trace_file(info);
01010 }
01011
01012
01013 int OP_perf_utils::op_write_output(int output, void *buf, size_t size)
01014 {
01015 int sum = 0;
01016 while (size) {
01017 int ret = write(output, buf, size);
01018
01019 if (ret < 0) {
01020 string errmsg = "Internal error: Failed to write sample data to pipe. errno is ";
01021 errmsg += strerror(errno);
01022 throw runtime_error(errmsg);
01023 }
01024
01025 size -= ret;
01026 buf = (char *)buf + ret;
01027 sum += ret;
01028 }
01029 return sum;
01030 }
01031
01032
01033 static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, operf_record * pr)
01034 {
01035 char fname[PATH_MAX];
01036 FILE *fp;
01037
01038 snprintf(fname, sizeof(fname), "/proc/%d/maps", tgid);
01039
01040 fp = fopen(fname, "r");
01041 if (fp == NULL) {
01042
01043 cverb << vrecord << "couldn't open " << fname << endl;
01044 return;
01045 }
01046
01047 while (1) {
01048 char line_buffer[BUFSIZ];
01049 char perms[5], pathname[PATH_MAX], dev[16];
01050 unsigned long long start_addr, end_addr, offset;
01051 u_int32_t inode;
01052
01053 memset(pathname, '\0', sizeof(pathname));
01054 struct mmap_event mmap;
01055 size_t size;
01056 memset(&mmap, 0, sizeof(mmap));
01057 mmap.pgoff = 0;
01058 mmap.header.type = PERF_RECORD_MMAP;
01059 mmap.header.misc = PERF_RECORD_MISC_USER;
01060
01061 if (fgets(line_buffer, sizeof(line_buffer), fp) == NULL)
01062 break;
01063
01064 sscanf(line_buffer, "%llx-%llx %s %llx %s %d %s",
01065 &start_addr, &end_addr, perms, &offset, dev, &inode, pathname);
01066 if (perms[2] == 'x') {
01067 char *imagename = strchr(pathname, '/');
01068
01069 if (imagename == NULL)
01070 imagename = strstr(pathname, "[vdso]");
01071
01072 if (imagename == NULL)
01073 continue;
01074
01075 size = strlen(imagename) + 1;
01076 strcpy(mmap.filename, imagename);
01077 size = align_64bit(size);
01078 mmap.start = start_addr;
01079 mmap.len = end_addr - mmap.start;
01080 mmap.pid = tgid;
01081 mmap.tid = pid;
01082 mmap.header.size = (sizeof(mmap) -
01083 (sizeof(mmap.filename) - size));
01084 int num = OP_perf_utils::op_write_output(output_fd, &mmap, mmap.header.size);
01085 if (cverb << vrecord)
01086 cout << "Created MMAP event for " << imagename << endl;
01087 pr->add_to_total(num);
01088 }
01089 }
01090
01091 fclose(fp);
01092 return;
01093 }
01094
01095 static int _record_one_process_info(pid_t pid, bool sys_wide, operf_record * pr,
01096 int output_fd)
01097 {
01098 struct comm_event comm;
01099 char fname[PATH_MAX];
01100 char buff[BUFSIZ];
01101 FILE *fp;
01102 pid_t tgid = 0;
01103 size_t size = 0;
01104 DIR *tids;
01105 struct dirent dirent, *next;
01106 int ret = 0;
01107
01108 snprintf(fname, sizeof(fname), "/proc/%d/status", pid);
01109 fp = fopen(fname, "r");
01110 if (fp == NULL) {
01111
01112
01113
01114
01115
01116 if (!sys_wide) {
01117 cerr << "Unable to find process information for process " << pid << "." << endl;
01118 cverb << vrecord << "couldn't open " << fname << endl;
01119 return -1;
01120 } else {
01121 return 0;
01122 }
01123 }
01124
01125 memset(&comm, 0, sizeof(comm));
01126 while (!comm.comm[0] || !comm.pid) {
01127 if (fgets(buff, sizeof(buff), fp) == NULL) {
01128 ret = -1;
01129 cverb << vrecord << "Did not find Name or PID field in status file." << endl;
01130 goto out;
01131 }
01132 if (!strncmp(buff, "Name:", 5)) {
01133 char *name = buff + 5;
01134 while (*name && isspace(*name))
01135 ++name;
01136 size = strlen(name) - 1;
01137
01138
01139
01140 size = size > 16 ? 16 : size;
01141 memcpy(comm.comm, name, size++);
01142 } else if (memcmp(buff, "Tgid:", 5) == 0) {
01143 char *tgids = buff + 5;
01144 while (*tgids && isspace(*tgids))
01145 ++tgids;
01146 tgid = comm.pid = atoi(tgids);
01147 }
01148 }
01149
01150 comm.header.type = PERF_RECORD_COMM;
01151 size = align_64bit(size);
01152 comm.header.size = sizeof(comm) - (sizeof(comm.comm) - size);
01153 if (tgid != pid) {
01154
01155 comm.tid = pid;
01156 int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size);
01157 pr->add_to_total(num);
01158 goto out;
01159 }
01160
01161 snprintf(fname, sizeof(fname), "/proc/%d/task", pid);
01162 tids = opendir(fname);
01163 if (tids == NULL) {
01164
01165 ret = -1;
01166 cverb << vrecord << "opendir returned NULL" << endl;
01167 goto out;
01168 }
01169
01170 while (!readdir_r(tids, &dirent, &next) && next) {
01171 char *end;
01172 pid = strtol(dirent.d_name, &end, 10);
01173 if (*end)
01174 continue;
01175
01176 comm.tid = pid;
01177
01178 int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size);
01179 pr->add_to_total(num);
01180 }
01181 closedir(tids);
01182 if (cverb << vrecord)
01183 cout << "Created COMM event for " << comm.comm << endl;
01184
01185 out:
01186 op_record_process_exec_mmaps(pid, tgid, output_fd, pr);
01187
01188 fclose(fp);
01189 if (ret) {
01190 cverb << vrecord << "couldn't get app name and tgid for pid "
01191 << dec << pid << " from /proc fs." << endl;
01192 }
01193 return ret;
01194
01195 }
01196
01197
01198
01199
01200
01201
01202 int OP_perf_utils::op_record_process_info(bool system_wide, pid_t pid, operf_record * pr,
01203 int output_fd)
01204 {
01205 int ret = 0;
01206 if (cverb << vrecord)
01207 cout << "op_record_process_info" << endl;
01208 if (!system_wide) {
01209 ret = _record_one_process_info(pid, system_wide, pr, output_fd);
01210 } else {
01211 char buff[BUFSIZ];
01212 pid_t tgid = 0;
01213 size_t size = 0;
01214 DIR *pids;
01215 struct dirent dirent, *next;
01216
01217 pids = opendir("/proc");
01218 if (pids == NULL) {
01219 cerr << "Unable to open /proc." << endl;
01220 return -1;
01221 }
01222
01223 while (!readdir_r(pids, &dirent, &next) && next) {
01224 char *end;
01225 pid = strtol(dirent.d_name, &end, 10);
01226 if (((errno == ERANGE && (pid == LONG_MAX || pid == LONG_MIN))
01227 || (errno != 0 && pid == 0)) || (end == dirent.d_name)) {
01228 cverb << vmisc << "/proc entry " << dirent.d_name << " is not a PID" << endl;
01229 continue;
01230 }
01231 if ((ret = _record_one_process_info(pid, system_wide, pr, output_fd)) < 0)
01232 break;
01233 }
01234 closedir(pids);
01235 }
01236 return ret;
01237 }
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247 static void _record_module_info(int output_fd, operf_record * pr)
01248 {
01249 const char * fname = "/proc/modules";
01250 FILE *fp;
01251 char * line;
01252 struct operf_kernel_image * image;
01253 int module_size;
01254 char ref_count[32+1];
01255 int ret;
01256 char module_name[256+1];
01257 char live_info[32+1];
01258 char dependencies[4096+1];
01259 unsigned long long start_address;
01260
01261 fp = fopen(fname, "r");
01262 if (fp == NULL) {
01263 cerr << "Error opening /proc/modules. Unable to process module samples" << endl;
01264 cerr << strerror(errno) << endl;
01265 return;
01266 }
01267
01268 while (1) {
01269 struct mmap_event mmap;
01270 size_t size;
01271 memset(&mmap, 0, sizeof(mmap));
01272 mmap.pgoff = 0;
01273 line = op_get_line(fp);
01274
01275 if (!line)
01276 break;
01277
01278 if (line[0] == '\0') {
01279 free(line);
01280 continue;
01281 }
01282
01283 ret = sscanf(line, "%256s %u %32s %4096s %32s %llx",
01284 module_name, &module_size, ref_count,
01285 dependencies, live_info, &start_address);
01286 if (ret != 6) {
01287 cerr << "op_record_kernel_info: Bad /proc/modules entry: \n\t" << line << endl;
01288 free(line);
01289 continue;
01290 }
01291
01292 mmap.header.type = PERF_RECORD_MMAP;
01293 mmap.header.misc = PERF_RECORD_MISC_KERNEL;
01294 size = strlen(module_name) + 1;
01295 strncpy(mmap.filename, module_name, size);
01296 size = align_64bit(size);
01297 mmap.start = start_address;
01298 mmap.len = module_size;
01299 mmap.pid = 0;
01300 mmap.tid = 0;
01301 mmap.header.size = (sizeof(mmap) -
01302 (sizeof(mmap.filename) - size));
01303 int num = OP_perf_utils::op_write_output(output_fd, &mmap, mmap.header.size);
01304 if (cverb << vrecord)
01305 cout << "Created MMAP event for " << module_name << ". Size: "
01306 << module_size << "; start addr: " << start_address << endl;
01307 pr->add_to_total(num);
01308 free(line);
01309 }
01310 fclose(fp);
01311 return;
01312 }
01313
01314 void OP_perf_utils::op_record_kernel_info(string vmlinux_file, u64 start_addr, u64 end_addr,
01315 int output_fd, operf_record * pr)
01316 {
01317 struct mmap_event mmap;
01318 size_t size;
01319 memset(&mmap, 0, sizeof(mmap));
01320 mmap.pgoff = 0;
01321 mmap.header.type = PERF_RECORD_MMAP;
01322 mmap.header.misc = PERF_RECORD_MISC_KERNEL;
01323 if (vmlinux_file.empty()) {
01324 size = strlen( "no_vmlinux") + 1;
01325 strncpy(mmap.filename, "no-vmlinux", size);
01326 mmap.start = 0ULL;
01327 mmap.len = 0ULL;
01328 } else {
01329 size = vmlinux_file.length() + 1;
01330 strncpy(mmap.filename, vmlinux_file.c_str(), size);
01331 mmap.start = start_addr;
01332 mmap.len = end_addr - mmap.start;
01333 }
01334 size = align_64bit(size);
01335 mmap.pid = 0;
01336 mmap.tid = 0;
01337 mmap.header.size = (sizeof(mmap) -
01338 (sizeof(mmap.filename) - size));
01339 int num = op_write_output(output_fd, &mmap, mmap.header.size);
01340 if (cverb << vrecord)
01341 cout << "Created MMAP event of size " << mmap.header.size << " for " <<mmap.filename << ". length: "
01342 << hex << mmap.len << "; start addr: " << mmap.start << endl;
01343 pr->add_to_total(num);
01344 _record_module_info(output_fd, pr);
01345 }
01346
01347 void OP_perf_utils::op_get_kernel_event_data(struct mmap_data *md, operf_record * pr)
01348 {
01349 struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)md->base;
01350 int out_fd = pr->out_fd();
01351
01352 uint64_t head = pc->data_head;
01353
01354
01355 rmb();
01356
01357 uint64_t old = md->prev;
01358 unsigned char *data = ((unsigned char *)md->base) + pagesize;
01359 uint64_t size;
01360 void *buf;
01361 int64_t diff;
01362
01363 diff = head - old;
01364 if (diff < 0) {
01365 throw runtime_error("ERROR: event buffer wrapped, which should NEVER happen.");
01366 }
01367
01368 if (old != head)
01369 sample_reads++;
01370
01371 size = head - old;
01372
01373 if ((old & md->mask) + size != (head & md->mask)) {
01374 buf = &data[old & md->mask];
01375 size = md->mask + 1 - (old & md->mask);
01376 old += size;
01377 pr->add_to_total(op_write_output(out_fd, buf, size));
01378 }
01379
01380 buf = &data[old & md->mask];
01381 size = head - old;
01382 old += size;
01383 pr->add_to_total(op_write_output(out_fd, buf, size));
01384 md->prev = old;
01385 pc->data_tail = old;
01386 }
01387
01388
01389 int OP_perf_utils::op_get_next_online_cpu(DIR * dir, struct dirent *entry)
01390 {
01391 #define OFFLINE 0x30
01392 unsigned int cpu_num;
01393 char cpu_online_pathname[40];
01394 int res;
01395 FILE * online;
01396 again:
01397 do {
01398 entry = readdir(dir);
01399 if (!entry)
01400 return -1;
01401 } while (entry->d_type != DT_DIR);
01402
01403 res = sscanf(entry->d_name, "cpu%u", &cpu_num);
01404 if (res <= 0)
01405 goto again;
01406
01407 errno = 0;
01408 snprintf(cpu_online_pathname, 40, "/sys/devices/system/cpu/cpu%u/online", cpu_num);
01409 if ((online = fopen(cpu_online_pathname, "r")) == NULL) {
01410 cerr << "Unable to open " << cpu_online_pathname << endl;
01411 if (errno)
01412 cerr << strerror(errno) << endl;
01413 return -1;
01414 }
01415 res = fgetc(online);
01416 fclose(online);
01417 if (res == OFFLINE)
01418 goto again;
01419 else
01420 return cpu_num;
01421 }