HPCToolkit
PathFindMgr.hpp
Go to the documentation of this file.
1 // -*-Mode: C++;-*-
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 //***************************************************************************
48 //
49 // File:
50 // $HeadURL$
51 //
52 // Purpose:
53 // [The purpose of this file]
54 //
55 // Description:
56 // [The set of functions, macros, etc. defined in the file]
57 //
58 //***************************************************************************
59 
60 #ifndef PathFindMgr_hpp
61 #define PathFindMgr_hpp
62 
63 //************************* System Include Files ****************************
64 
65 #include <iostream>
66 #include <map>
67 #include <set>
68 #include <string>
69 #include <vector>
70 
71 #include <stdint.h>
72 
73 //*************************** User Include Files ****************************
74 
75 #include <include/uint.h>
76 
77 //*************************** Forward Declarations **************************
78 
79 //***************************************************************************
80 // PathFindMgr
81 //***************************************************************************
82 
84 {
85 public:
86  static const int RecursivePathSfxLn = 2;
87 
88 public:
89  PathFindMgr();
90  ~PathFindMgr();
91 
92 
93  static PathFindMgr&
94  singleton();
95 
96 
97  // pathfind - (recursively) search for file 'name' in given
98  // colon-separated (possibly recursive) pathlist. If found,
99  // returns the fully resolved 'real path', otherwise NULL.
100  //
101  // First searches for 'name' in the PathFindMgr::singleton member
102  // 'm_cache'. If that search is unsuccessful and the cache is full,
103  // it then searches for a file named "name" in each directory in the
104  // colon-separated pathlist given as the first argument, and returns
105  // the full pathname to the first occurence that has at least the
106  // mode bits specified by mode. For any 'recursive-path', it
107  // recursively searches all of that paths descendents as well. An
108  // empty path in the pathlist is interpreted as the current
109  // directory. Returns NULL if 'name' is not found.
110  //
111  // A 'recursive-path' is specified by appending a single '*' at the
112  // end of the directory. /home/.../dir/\*
113  //
114  // ** Note: the '*' is escaped with '\' so it does not look like a
115  // C-style comment; in reality it should not be escaped! **
116  //
117  // The following mode bits are understood:
118  // "r" - read access
119  // "w" - write access
120  // "x" - execute access
121  //
122  // FIXME: better not to use a static buffer for the pathfind answer,
123  // but that requires changing the API. (This is a hack to avoid
124  // dealing with malloc/free issues.)
125  //
126  // The returned pointer points to an area that will be reused on subsequent
127  // calls to this function, and must not be freed by the caller.
128  const char*
129  pathfind(const char* pathList, const char* name, const char* mode);
130 
131 
132  // Is this a valid recursive path of the form '.../path/\*' ?
133  static int
134  isRecursivePath(const char* path);
135 
136 
137  // -------------------------------------------------------
138  // debugging
139  // -------------------------------------------------------
140  std::string
141  toString(uint flags = 0) const;
142 
143  std::ostream&
144  dump(std::ostream& os, uint flags = 0, const char* pfx = "") const;
145 
146  void
147  ddump(uint flags = 0) const;
148 
149 
150 private:
151 
152  // Retreives the highest priority and closest matching real path to
153  // "filePath" from 'm_cache', where priority is defined by how close
154  // the real path is to the front of the vector of real paths. If
155  // the file name exists in 'm_cache', but none of the real paths
156  // match it beyond the file name, the first path in the vector will
157  // be returned.
158  //
159  // Notes:
160  // * We cache all files in a recursive seach path such that:
161  // - the path portion of the file is fully resolved
162  // - the filename portion may or may not be a symlink
163  // - with forward-sym links, there can be multiple paths that lead to
164  // the same file. We only cache the real-pathed path + the file
165  //
166  // * Will not go into an infinite loop when symlinks form a loop.
167  //
168  // * Ambiguous case 1:
169  // input: ../../zoo.c
170  // cache for entry 'zoo.c'
171  // 0. p0/p1/zoo.c
172  // 1. p0/p2/zoo.c
173  // 2. p3/p4/zoo.c
174  //
175  // In this case the input matches all three entries. We return
176  // entry 0.
177  //
178  // * Ambiguous case 2:
179  // input: ../p5/zoo.c
180  // cache:
181  // 0. p0/p1/zoo.c
182  // 1. p0/p2/zoo.c
183  // 2. p3/p4/zoo.c
184  //
185  // In this case, the input matches no entry. However, because p5
186  // could be a symlink, it could potentially match one or *or* no
187  // entry. In this case we still return entry 0.
188  //
189  // @param filePath: A partial path to the file we are searching for. Will be
190  // altered to be its associated path.
191  // *note* - 'filePath' can range from a file name to a full path
192  //
193  // @return: A bool indicating whether 'filePath' was found in the list.
194  //
195  bool
196  find(std::string& filePath);
197 
198 
199  // This method adds a file name and its associated real path to
200  // 'm_cache'. Paths are store according to the file it is
201  // associated with. 'path' is not added if it is already in the
202  // vector associated with the file name.
203  //
204  // @param path: The path a file is located at.
205  void
206  insert(const std::string& path);
207 
208 
209  // Scans the directory designated by 'path' and does one of two
210  // things, depending on the value of 'recursionStack'.
211  // - If 'recursionStack' is non-NULL, cache all the files in 'path'
212  // (until the cache is full). If 'path' is recursive, scan
213  // recursively. (Use 'recursionStack' to implement recursion.)
214  // - Otherwise, ignore files and return a (non-recursive)
215  // colon-separated list of all subdirectories within 'path'. Each
216  // path is suffixed with '*' (a recursive path).
217  //
218  // Assumes that path has been real-pathed.
219  //
220  // ** N.B.: This is a confusing conflation of two functions that are
221  // similar; there should have been more abstraction. **
222  //
223  // @param path: The path to the directory whose contents are to
224  // be cached. If it is recursive, a '*' will be
225  // appended at the end.
226  //
227  // @param seenPaths: Set of paths already seen. Used to avoid
228  // cycles caused by symlinks.
229  //
230  // @param resultpathVec: If non-NULL and 'path' is recursive, all
231  // sub-directories of 'path' will be added to
232  // this vector, which is used in a LIFO manner.
233  std::string
234  scan(std::string& path, std::set<std::string>& seenPaths,
235  std::vector<std::string>* recursionStack = NULL);
236 
237 
238  // If the cache is full and a path cannot be found from the cache,
239  // pathfind_slow is called to try to resolve the path. Searches
240  // through all the directories in 'pathList', attempting to find
241  // 'name'. Touches the disk alot, making this a very slow, and last
242  // resort, method. Returns NULL if the file cannot be found.
243  //
244  // @param pathList: List of all the paths to search through. Each
245  // path is separated by a ":".
246  //
247  // @param name: File to be found.
248  //
249  // @param mode: "r" - read access
250  // "w" - write access
251  // "x" - execute access
252  //
253  // @param seenPaths: Set of paths already seen. Used to avoid cycles caused
254  // by symlinks.
255  const char*
256  pathfind_slow(const char* pathList, const char* name, const char* mode,
257  std::set<std::string>& seenPaths);
258 
259 
260  // Resolves all '..' and '.' in 'path' in reference to itself. Does
261  // NOT find the unique real path of 'path'. Returns how many '..'
262  // are left in 'path'. Helps make sure more accurate results are
263  // returned from find().
264  //
265  // Ex: if path = 'src/../../lib/src/./../ex.c' then resolve(path) would turn
266  // path into '../lib/ex.c'.
267  //
268  // @param path: The file path to resolve.
269  // @return: The number of '..' in 'path' after it has been resolved.
270  int
271  resolve(std::string& path);
272 
273 
274 private:
275  typedef std::map<std::string, std::vector<std::string> > PathMap;
276 
277  PathMap m_cache;
278  bool m_isPopulated; // cache has been populated
279  bool m_isFull; // max size has been reached
280 
281  static const uint64_t s_sizeMax = 20 * 1024 * 1024; // default is 20 MB
282  uint64_t m_size;
283 
284  std::string m_pathfind_ans;
285 };
286 
287 #endif
bool find(std::string &filePath)
static int isRecursivePath(const char *path)
static PathFindMgr & singleton()
std::string toString(uint flags=0) const
const char * pathfind(const char *pathList, const char *name, const char *mode)
std::string scan(std::string &path, std::set< std::string > &seenPaths, std::vector< std::string > *recursionStack=NULL)
std::string m_pathfind_ans
std::map< std::string, std::vector< std::string > > PathMap
PathMap m_cache
int resolve(std::string &path)
unsigned int uint
Definition: uint.h:124
uint64_t m_size
void ddump(uint flags=0) const
static const int RecursivePathSfxLn
Definition: PathFindMgr.hpp:86
static const uint64_t s_sizeMax
bool m_isPopulated
#define NULL
Definition: ElfHelper.cpp:85
const char * pathfind_slow(const char *pathList, const char *name, const char *mode, std::set< std::string > &seenPaths)
std::ostream & dump(std::ostream &os, uint flags=0, const char *pfx="") const
void insert(const std::string &path)