HPCToolkit
PathFindMgr.cpp
Go to the documentation of this file.
1 // -*-Mode: C++;-*-
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 //***************************************************************************
48 //
49 // File:
50 // $HeadURL$
51 //
52 // Purpose:
53 // [The purpose of this file]
54 //
55 // Description:
56 // [The set of functions, macros, etc. defined in the file]
57 //
58 //***************************************************************************
59 
60 //************************* System Include Files ****************************
61 
62 #include <string>
63 using std::string;
64 
65 #include <cstring>
66 #include <sys/stat.h>
67 #include <dirent.h>
68 
69 //*************************** User Include Files ****************************
70 
71 #include <include/gcc-attr.h>
72 
73 #include "FileUtil.hpp"
74 #include "PathFindMgr.hpp"
75 #include "StrUtil.hpp"
76 
77 #include "diagnostics.h"
78 #include "pathfind.h"
79 #include "realpath.h"
80 
81 //***************************************************************************
82 
83 //***************************************************************************
84 // private operations
85 //***************************************************************************
86 
87 // if there is a ".." path component in a path that is not absolute,
88 // then the path may lie outside the path cache computed. if that is the
89 // case, inform PathFind::pathfind that it should try the slow lookup
90 // method, which looks outside the cache.
91 bool try_slow_lookup_for_relative_paths(const char *path)
92 {
93  bool contains_relative = false;
94 
95  if (path[0] != '/') {
96  contains_relative = strstr(path, "..") != NULL;
97  }
98  return contains_relative;
99 }
100 
101 //***************************************************************************
102 // PathFindMgr
103 //***************************************************************************
104 
106 
108 {
109  m_isPopulated = false;
110  m_isFull = false;
111  m_size = 0;
112 }
113 
114 
116 {
117 }
118 
119 
122 {
123  return s_singleton;
124 }
125 
126 
127 const char*
128 PathFindMgr::pathfind(const char* pathList, const char* name, const char* mode)
129 {
130  // -------------------------------------------------------
131  // 0. Cache files found using 'pathList'
132  // -------------------------------------------------------
133  if (!m_isPopulated) {
134  m_isPopulated = true;
135  std::vector<std::string> pathVec; // will contain all -I paths
136  StrUtil::tokenize_str(std::string(pathList), ":", pathVec);
137 
138  std::set<std::string> seenPaths;
139  std::vector<std::string> recursionStack;
140  while (!m_isFull && !pathVec.empty()) {
141  if (pathVec.back() != ".") { // do not cache within CWD
142  scan(pathVec.back(), seenPaths, &recursionStack);
143  seenPaths.clear();
144  recursionStack.clear();
145  }
146  pathVec.pop_back();
147  }
148  }
149 
150  // -------------------------------------------------------
151  // 1. Resolve 'name' either by pathfind cache or by pathfind_slow
152  // -------------------------------------------------------
153  std::string name_real = name;
154 
155  bool found = find(name_real);
156 
157  if (!found) {
159  std::set<std::string> seenPaths;
160  const char* temp = pathfind_slow(pathList, name, mode, seenPaths);
161  if (temp) {
162  found = true;
163  name_real = temp;
164  }
165  }
166  }
167 
168  // -------------------------------------------------------
169  // 2. Resolve (a) non-real-paths found by pathfind, (b) absolute
170  // paths not found by pathfind() and (c) paths relative to the
171  // current-working-directory.
172  // -------------------------------------------------------
173 
174  // FIXME: static buffer (per object) for pathfind answer
175  m_pathfind_ans = RealPath(name_real.c_str());
176 
177  if (found || m_pathfind_ans[0] == '/') {
178  return m_pathfind_ans.c_str();
179  }
180  else {
181  return NULL; // failure
182  }
183 }
184 
185 
186 const char*
187 PathFindMgr::pathfind_slow(const char* pathList, const char* name,
188  const char* mode,
189  std::set<std::string>& seenPaths)
190 {
191  // -------------------------------------------------------
192  // *. Collect all recursive and non-recursive paths in separate lists
193  // -------------------------------------------------------
194 
195  const char* result = NULL; // 'result' will point to storage in ::pathfind
196 
197  int len = strlen(pathList) + 1;
198  char* myPathList = new char[len];
199  char* pathList_nr = new char[len];
200  char* pathList_r = new char[len];
201  char* saveptr = NULL;
202  strcpy(myPathList, pathList);
203  pathList_nr[0] = '\0';
204  pathList_r[0] = '\0';
205 
206  char* aPath = strtok_r(myPathList, ":", &saveptr);
207  int first_nr = 1;
208  while (aPath != NULL) {
209  if (PathFindMgr::isRecursivePath(aPath)) {
210  // copy to recursive path list (Do not copy trailing '/*' )
211  int l = strlen(aPath);
212  strncat(pathList_r, aPath, l - RecursivePathSfxLn);
213  strcat(pathList_r, ":"); // will have a trailing ':' for 'strchr'
214  }
215  else {
216  // copy to non-recurisve path list
217  if (first_nr == 0) { strcat(pathList_nr, ":"); }
218  strcat(pathList_nr, aPath);
219  first_nr = 0; // the first copy has been made
220  }
221 
222  aPath = strtok_r((char*)NULL, ":", &saveptr);
223  }
224  delete[] myPathList;
225 
226  // -------------------------------------------------------
227  // 1. Try a ::pathfind on all non-recursive paths
228  // -------------------------------------------------------
229  char* sep; // pre-declaration because of 'goto'
230  result = ::pathfind(pathList_nr, name, mode);
231  if (result) {
232  goto fini;
233  }
234 
235  // -------------------------------------------------------
236  // 2. Try a pathfind on all recursive paths
237  // -------------------------------------------------------
238  // For every recursive path... (cannot use 'strtok' because of recursion)
239  aPath = pathList_r; // beginning of token string
240  sep = strchr(aPath, ':'); // end of token
241  while (sep != NULL) {
242  *sep = '\0'; // now 'aPath' points to only the current token
243 
244  // 2a. Do a pathfind in this directory
245  result = ::pathfind(aPath, name, mode);
246  if (result) {
247  goto fini;
248  }
249 
250  // 2b. If no match, open the directory and do a pathfind
251  // on every sub-directory
252  std::string myPath = aPath;
253  std::string dirPathList = scan(myPath, seenPaths);
254 
255  if (!dirPathList.empty()) {
256  result = pathfind_slow(dirPathList.c_str(), name, mode, seenPaths);
257  if (result) {
258  goto fini;
259  }
260  }
261 
262  *sep = ':'; // restore full token string
263  aPath = sep + 1; // points to beginning of next token or '\0'
264  sep = strchr(aPath, ':'); // points to end of next token or NULL
265  }
266 
267  fini:
268  delete[] pathList_nr;
269  delete[] pathList_r;
270  return result;
271 }
272 
273 
274 bool
275 PathFindMgr::find(std::string& pathNm)
276 {
277  std::string fileName = FileUtil::basename(pathNm);
278  PathMap::iterator it = m_cache.find(fileName);
279 
280  if (it != m_cache.end()) {
281  int levelsDeep = resolve(pathNm); // min depth a path must be
282  const std::vector<std::string>& pathVec = it->second;
283 
284  // -----------------------------------------------------
285  // short-circuit if only one result is possible
286  // -----------------------------------------------------
287  if ((pathNm.find_first_of('/') == pathNm.npos) // only filename given
288  || (pathVec.size() == 1)) { // only 1 string in pathVec
289  pathNm = pathVec[0];
290  return true;
291  }
292 
293  // -----------------------------------------------------
294  // general case
295  // -----------------------------------------------------
296  std::string toReturn;
297  int comparisonDepth = 0;
298  std::vector<std::string>::const_iterator it1;
299 
300  for (it1 = pathVec.begin(); it1 != pathVec.end(); it1++) {
301  const std::string& currentPath = *it1;
302 
303  if (currentPath == toReturn) {
304  continue;
305  }
306 
307  size_t cTrailing = currentPath.length(); //trailing index
308  // cIn points to first char after last '/'
309  size_t cIn = currentPath.find_last_of("/") + 1;
310 
311  // these number will be same for all iterations, consider caching.
312  size_t fpTrailing = pathNm.length();
313  size_t fpIn = pathNm.find_last_of("/") + 1;
314 
315  int level = -1; // since the filename will always match
316  int totalLevels = 0; // total levels in currentPath
317  bool loopedOnce = false;
318  while (cIn < cTrailing && cTrailing != currentPath.npos) {
319  // checks how deep the 2 strings are congruent
320  // also counts how many levels currentPath is
321  if (!loopedOnce) {
322  std::string comp1 = currentPath.substr(cIn, cTrailing - cIn);
323  std::string comp2 = pathNm.substr(fpIn, fpTrailing - fpIn);
324 
325  if (comp1 == comp2) {
326  level++;
327  // b/c the fp vars change here, comp2 only changes once
328  // the segments are equal
329  fpTrailing = fpIn - 1;
330  fpIn = pathNm.find_last_of("/", fpTrailing - 1) + 1;
331 
332  if (fpIn >= fpTrailing || fpTrailing == pathNm.npos) {
333  loopedOnce = true;
334  }
335  }
336  }
337 
338  cTrailing = cIn - 1; //points to previous '/'
339  // cIn points to first char after next '/'
340  cIn = currentPath.find_last_of("/", cTrailing - 1) + 1;
341 
342  totalLevels++;
343  }
344 
345  // only allow toReturn to change if level is greater than the current
346  // comparisonDepth and if there are enough levels in currentPath to
347  // satisfy levelsDeep
348  if (level > comparisonDepth && totalLevels >= levelsDeep) {
349  comparisonDepth = level;
350  toReturn = currentPath;
351  }
352  }
353 
354  pathNm = toReturn;
355 
356  if (pathNm.empty()) {
357  pathNm = pathVec[0];
358  }
359 
360  return true;
361  }
362 
363  return false;
364 }
365 
366 
367 void
368 PathFindMgr::insert(const std::string& path)
369 {
370  std::string fnm = FileUtil::basename(path);
371 
372  // -------------------------------------------------------
373  // Insert <fnm, path> in m_cache if there is still space
374  // -------------------------------------------------------
375  if (m_size < s_sizeMax) {
376  PathMap::iterator it = m_cache.find(fnm);
377  if (it == m_cache.end()) {
378  // 0. There is no entry for 'fnm'
379  std::vector<std::string> pathVec;
380  pathVec.push_back(path);
381  m_cache.insert(std::make_pair(fnm, pathVec));
382 
383  m_size += sizeof(pathVec);
384  m_size += fnm.size() + 1 + path.size() + 1;
385  }
386  else {
387  // 1. There is already an entry for 'fnm'
388  std::vector<std::string>& pathVec = it->second;
389 
390  for (std::vector<std::string>::const_iterator it1 = pathVec.begin();
391  it1 != pathVec.end(); ++it1) {
392  const std::string& x = *it1;
393  if (x == path) {
394  return;
395  }
396  }
397  pathVec.push_back(path);
398 
399  m_size += path.size() + 1;
400  }
401  }
402  else {
403  // Not smart caching. We are not expecting there to be more data
404  // than can fit in the cache, but in case there is, this will
405  // prevent the program from crashing.
406  DIAG_WMsgIf(m_size >= s_sizeMax,
407  "PathFindMgr::insert(): cache size limit reached");
408  m_isFull = true;
409  }
410 }
411 
412 
413 std::string
414 PathFindMgr::scan(std::string& path, std::set<std::string>& seenPaths,
415  std::vector<std::string>* recursionStack)
416 {
417  bool doCacheFiles = (recursionStack != NULL);
418 
419  bool doRecursiveScan = isRecursivePath(path.c_str());
420  if (doRecursiveScan) {
421  path = path.substr(0, path.length() - RecursivePathSfxLn);
422  }
423 
424  std::string localPaths;
425 
426  if (path.empty()) {
427  return localPaths;
428  }
429 
430  // -------------------------------------------------------
431  // ensure we have not already seen 'path' (e.g., through a symlink)
432  // -------------------------------------------------------
433 
434  // 0. ensure we are using a canonical path by realpath-ing (a) the
435  // root path and (b) symlink directories (below)
436  if ((recursionStack && recursionStack->empty())
437  || !recursionStack) {
438  path = RealPath(path.c_str());
439  }
440 
441  // 1. check for a past occurance
442  std::set<std::string>::iterator it = seenPaths.find(path);
443  if (it != seenPaths.end()) {
444  return localPaths;
445  }
446 
447  seenPaths.insert(path);
448 
449 
450  // -------------------------------------------------------
451  // Scan 'path'
452  // -------------------------------------------------------
453  DIR* dir = opendir(path.c_str());
454  if (!dir) {
455  return localPaths;
456  }
457 
458  bool isFirstDir = true;
459  struct dirent* x;
460  while ( (x = readdir(dir)) ) {
461  // skip "." and ".."
462  if (strcmp(x->d_name, ".") == 0 || strcmp(x->d_name, "..") == 0) {
463  continue;
464  }
465 
466  std::string x_fnm = path + "/" + x->d_name;
467 
468  // --------------------------------------------------
469  // compute type of 'x_fnm'
470  // --------------------------------------------------
471  unsigned char x_type = DT_UNKNOWN;
472 #if defined(_DIRENT_HAVE_D_TYPE)
473  x_type = x->d_type;
474 #endif
475 
476  // Even if 'd_type' is available, it may be bogus. Try stat().
477  if (x_type == DT_UNKNOWN) {
478  struct stat statbuf;
479  int ret = lstat(x_fnm.c_str(), &statbuf); // do not follow symlinks!
480  if (ret != 0) {
481  continue; // error
482  }
483 
484  if (S_ISLNK(statbuf.st_mode)) {
485  x_type = DT_LNK; // S_IFLNK
486  }
487  else if (S_ISREG(statbuf.st_mode)) {
488  x_type = DT_REG; // S_IFREG
489  }
490  else if (S_ISDIR(statbuf.st_mode)) {
491  x_type = DT_DIR; // S_IFDIR
492  }
493  }
494 
495  // --------------------------------------------------
496  // special case: resolve symlink to regular file or directory
497  // --------------------------------------------------
498  if (x_type == DT_LNK) {
499  struct stat statbuf;
500  int ret = stat(x_fnm.c_str(), &statbuf); // 'stat' resolves symlinks
501  if (ret != 0) {
502  continue; // error
503  }
504 
505  if (S_ISREG(statbuf.st_mode)) {
506  x_type = DT_REG;
507  }
508  else if (S_ISDIR(statbuf.st_mode)) {
509  x_type = DT_DIR;
510  x_fnm = RealPath(x_fnm.c_str());
511  if (seenPaths.find(x_fnm) != seenPaths.end()) {
512  continue; // avoid cycles
513  }
514  }
515  }
516 
517  // --------------------------------------------------
518  // case 1: regular file
519  // --------------------------------------------------
520  if (x_type == DT_REG) {
521  if (doCacheFiles && !m_isFull) {
522  insert(x_fnm);
523  }
524  }
525 
526  // --------------------------------------------------
527  // case 2: directory
528  // --------------------------------------------------
529  else if (x_type == DT_DIR) {
530  if (recursionStack) {
531  if (doRecursiveScan) {
532  x_fnm += "/*";
533  recursionStack->push_back(x_fnm);
534  }
535  }
536  else {
537  x_fnm += "/*";
538  if (!isFirstDir) {
539  localPaths += ":";
540  }
541  localPaths += x_fnm;
542  isFirstDir = false;
543  }
544  }
545  }
546  closedir(dir);
547 
548  if (recursionStack && !recursionStack->empty()) {
549  std::string nextPath = recursionStack->back();
550  recursionStack->pop_back();
551  scan(nextPath, seenPaths, recursionStack);
552  }
553 
554  return localPaths;
555 }
556 
557 
558 int
559 PathFindMgr::resolve(std::string& path)
560 {
561  if (path[0] == '/') { // path in resolved form
562  return 0;
563  }
564 
565  std::string result;
566  int trailing = path.length();
567  int in = path.find_last_of("/") + 1; // add 1 to move past '/'
568  int levelsBack = 0;
569 
570  while (trailing != -1) {
571  std::string section = path.substr(in, trailing - in + 1 );
572 
573  if (section == "../") { // don't include it in result yet.
574  levelsBack++;
575  }
576  else if (section != "./") { // here, section is some directory/file name
577  if (levelsBack == 0) {
578  result = section + result; // append section to the beginning
579  }
580  else { // here, have encountered at least 1 ".." so don't include section
581  levelsBack--; // since we didnt include section, we went back a level
582  }
583  }
584 
585  trailing = in - 1;
586  in = path.find_last_of("/", trailing - 1) + 1;
587  }
588 
589  if (!result.empty()) {
590  path = result;
591  }
592  return levelsBack;
593 }
594 
595 
596 //-----------------------------------------------------------
597 // PathFindMgr::isRecursivePath
598 //
599 // Description
600 // If the last two characters on a path are '/*' or '/+'
601 // then treat this as a path that needs to be recursively
602 // expanded. The '+' was added as a superior alternative to
603 // '*' because it sidesteps problems with quoting '*' to avoid
604 // interaction with shell expansion when wrapper scripts
605 // are employed.
606 //
607 // Modification history:
608 // 2012/03/02 - johnmc
609 //-----------------------------------------------------------
610 int
612 {
613  int l = strlen(path);
615  (path[l - 1] == '*' || path[l - 1] == '+') &&
616  path[l - 2] == '/') {
617  return 1;
618  }
619  return 0;
620 }
621 
622 
623 //***************************************************************************
624 
625 string
627 {
628  std::ostringstream os;
629  dump(os, flags);
630  return os.str();
631 }
632 
633 
634 std::ostream&
635 PathFindMgr::dump(std::ostream& os, uint GCC_ATTR_UNUSED flags,
636  const char* pfx) const
637 {
638  using std::endl;
639 
640  os << pfx << "[ PathFindMgr: " << endl
641  << pfx << " isPopulated: " << m_isPopulated << endl
642  << pfx << " isFull: " << m_isFull << endl
643  << pfx << " size: " << m_size << " (max: " << s_sizeMax << ")" << endl;
644  for (PathMap::const_iterator it = m_cache.begin();
645  it != m_cache.end(); ++it) {
646  const string& x = it->first;
647  const std::vector<string>& y = it->second;
648 
649  os << pfx << " " << x << " => {";
650  for (size_t i = 0; i < y.size(); ++i) {
651  if (i != 0) {
652  os << ", ";
653  }
654  os << y[i];
655  }
656  os << "}" << endl;
657  }
658  os << pfx << "]" << endl;
659  return os;
660 }
661 
662 
663 void
665 {
666  dump(std::cerr, flags);
667 }
bool find(std::string &filePath)
static int isRecursivePath(const char *path)
static PathFindMgr & singleton()
std::string toString(uint flags=0) const
const char * pathfind(const char *pathList, const char *name, const char *mode)
std::string scan(std::string &path, std::set< std::string > &seenPaths, std::vector< std::string > *recursionStack=NULL)
std::string m_pathfind_ans
PathMap m_cache
static PathFindMgr s_singleton
void tokenize_str(const std::string &tokenstr, const char *delim, std::vector< std::string > &tokenvec)
Definition: StrUtil.cpp:122
int resolve(std::string &path)
unsigned int uint
Definition: uint.h:124
uint64_t m_size
void ddump(uint flags=0) const
static const int RecursivePathSfxLn
Definition: PathFindMgr.hpp:86
bool found
Definition: cct.c:129
static const uint64_t s_sizeMax
bool m_isPopulated
const char * RealPath(const char *nm)
Definition: realpath.c:69
bool try_slow_lookup_for_relative_paths(const char *path)
Definition: PathFindMgr.cpp:91
#define NULL
Definition: ElfHelper.cpp:85
const char * pathfind_slow(const char *pathList, const char *name, const char *mode, std::set< std::string > &seenPaths)
string basename(const char *fName)
Definition: FileUtil.cpp:90
std::ostream & dump(std::ostream &os, uint flags=0, const char *pfx="") const
#define GCC_ATTR_UNUSED
Definition: gcc-attr.h:80
void insert(const std::string &path)