extractExternal.cpp

Go to the documentation of this file.
00001 /*
00002  * extractExternal.cpp
00003  * $Revision: 42181 $
00004  * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 2006-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include <stdlib.h>
00048 #include <iostream>
00049 #include <strstream>
00050 #include <fstream>
00051 #include <string>
00052 #include <set>
00053 #include <map>
00054 
00055 /* Given a set of n object files h ('external' object files) and a set of m
00056    object files o ('internal' object files),
00057    1. Determines r, the subset of h that o depends on, directly or indirectly
00058    2. Removes the files in h - r from the file system
00059    3. For each external symbol defined in some file in r, rename it in r U o
00060       by prefixing it with "__kmp_external_"
00061    Usage:
00062    hide.exe <n> <filenames for h> <filenames for o>
00063 
00064    Thus, the prefixed symbols become hidden in the sense that they now have a special
00065    prefix.
00066 */
00067 
00068 using namespace std;
00069 
00070 void stop(char* errorMsg) {
00071     printf("%s\n", errorMsg);
00072     exit(1);
00073 }
00074 
00075 // an entry in the symbol table of a .OBJ file
00076 class Symbol {
00077 public:
00078     __int64 name;
00079     unsigned value;
00080     unsigned short sectionNum, type;
00081     char storageClass, nAux;
00082 };
00083 
00084 class _rstream : public istrstream {
00085 private:
00086     const char *buf;
00087 protected:
00088     _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
00089     ~_rstream() {
00090     delete[]buf;
00091     }
00092 };
00093 
00094 /* A stream encapuslating the content of a file or the content of a string, overriding the
00095    >> operator to read various integer types in binary form, as well as a symbol table
00096    entry.
00097 */
00098 class rstream : public _rstream {
00099 private:
00100     template<class T>
00101     inline rstream& doRead(T &x) {
00102     read((char*)&x, sizeof(T));
00103     return *this;
00104     }
00105     static pair<const char*, streamsize> getBuf(const char *fileName) {
00106     ifstream raw(fileName,ios::binary | ios::in);
00107     if(!raw.is_open())
00108         stop("rstream.getBuf: Error opening file");
00109     raw.seekg(0,ios::end);
00110     streampos fileSize = raw.tellg();
00111     if(fileSize < 0)
00112         stop("rstream.getBuf: Error reading file");
00113     char *buf = new char[fileSize];
00114     raw.seekg(0,ios::beg);
00115     raw.read(buf, fileSize);
00116     return pair<const char*, streamsize>(buf,fileSize);
00117     }
00118 public:
00119     // construct from a string
00120     rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
00121     /* construct from a file whole content is fully read once to initialize the content of
00122        this stream
00123     */
00124     rstream(const char *fileName):_rstream(getBuf(fileName)){}
00125     rstream& operator>>(int &x) {
00126     return doRead(x);
00127     }
00128     rstream& operator>>(unsigned &x) {
00129     return doRead(x);
00130     }
00131     rstream& operator>>(short &x) {
00132     return doRead(x);
00133     }
00134     rstream& operator>>(unsigned short &x) {
00135     return doRead(x);
00136     }
00137     rstream& operator>>(Symbol &e) {
00138     read((char*)&e, 18);
00139     return *this;
00140     }
00141 };
00142 
00143 // string table in a .OBJ file
00144 class StringTable {
00145 private:
00146     map<string, unsigned> directory;
00147     size_t length;
00148     char *data;
00149 
00150     // make <directory> from <length> bytes in <data>
00151     void makeDirectory(void) {
00152     unsigned i = 4;
00153     while(i < length) {
00154         string s = string(data + i);
00155         directory.insert(make_pair(s, i));
00156         i += s.size() + 1;
00157     }
00158     }
00159     // initialize <length> and <data> with contents specified by the arguments
00160     void init(const char *_data) {
00161     unsigned _length = *(unsigned*)_data;
00162 
00163     if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
00164         stop("StringTable.init: Invalid symbol table");
00165     if(_data[_length - 1]) {
00166         // to prevent runaway strings, make sure the data ends with a zero
00167         data = new char[length = _length + 1];
00168         data[_length] = 0;
00169     } else {
00170         data = new char[length = _length];
00171     }
00172     *(unsigned*)data = length;
00173     memcpy(data + sizeof(unsigned), _data + sizeof(unsigned),
00174            length - sizeof(unsigned));
00175     makeDirectory();
00176     }
00177 public:
00178     StringTable(rstream &f) {
00179     /* Construct string table by reading from f.
00180      */
00181     streampos s;
00182     unsigned strSize;
00183     char *strData;
00184 
00185     s = f.tellg();
00186     f>>strSize;
00187     if(strSize < sizeof(unsigned))
00188         stop("StringTable: Invalid string table");
00189     strData = new char[strSize];
00190     *(unsigned*)strData = strSize;
00191     // read the raw data into <strData>
00192     f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
00193     s = f.tellg() - s;
00194     if(s < strSize)
00195         stop("StringTable: Unexpected EOF");
00196     init(strData);
00197     delete[]strData;
00198     }
00199     StringTable(const set<string> &strings) {
00200     /* Construct string table from given strings.
00201      */
00202     char *p;
00203     set<string>::const_iterator it;
00204     size_t s;
00205 
00206     // count required size for data
00207     for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
00208         size_t l = (*it).size();
00209 
00210         if(l > (unsigned) 0xFFFFFFFF)
00211         stop("StringTable: String too long");
00212         if(l > 8) {
00213         length += l + 1;
00214         if(length > (unsigned) 0xFFFFFFFF)
00215             stop("StringTable: Symbol table too long");
00216         }
00217     }
00218     data = new char[length];
00219     *(unsigned*)data = length;
00220     // populate data and directory
00221     for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
00222         const string &str = *it;
00223         size_t l = str.size();
00224         if(l > 8) {
00225         directory.insert(make_pair(str, p - data));
00226         memcpy(p, str.c_str(), l);
00227         p[l] = 0;
00228         p += l + 1;
00229         }
00230     }
00231     }
00232     ~StringTable() {
00233     delete[] data;
00234     }
00235     /* Returns encoding for given string based on this string table.
00236        Error if string length is greater than 8 but string is not in
00237        the string table--returns 0.
00238     */
00239     __int64 encode(const string &str) {
00240     __int64 r;
00241 
00242     if(str.size() <= 8) {
00243         // encoded directly
00244         ((char*)&r)[7] = 0;
00245         strncpy((char*)&r, str.c_str(), 8);
00246         return r;
00247     } else {
00248         // represented as index into table
00249         map<string,unsigned>::const_iterator it = directory.find(str);
00250         if(it == directory.end())
00251         stop("StringTable::encode: String now found in string table");
00252         ((unsigned*)&r)[0] = 0;
00253         ((unsigned*)&r)[1] = (*it).second;
00254         return r;
00255     }
00256     }
00257     /* Returns string represented by x based on this string table.
00258        Error if x references an invalid position in the table--returns
00259        the empty string.
00260     */
00261     string decode(__int64 x) const {
00262     if(*(unsigned*)&x == 0) {
00263         // represented as index into table
00264         unsigned &p = ((unsigned*)&x)[1];
00265         if(p >= length)
00266         stop("StringTable::decode: Invalid string table lookup");
00267         return string(data + p);
00268     } else {
00269         // encoded directly
00270         char *p = (char*)&x;
00271         int i;
00272 
00273         for(i = 0; i < 8 && p[i]; ++i);
00274         return string(p, i);
00275     }
00276     }
00277     void write(ostream &os) {
00278     os.write(data, length);
00279     }
00280 };
00281 
00282 /* for the named object file, determines the set of defined symbols and the set of undefined external symbols
00283    and writes them to <defined> and <undefined> respectively
00284 */
00285 void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
00286     streampos fileSize;
00287     size_t strTabStart;
00288     unsigned symTabStart, symNEntries;
00289     rstream f(fileName);
00290 
00291     f.seekg(0,ios::end);
00292     fileSize = f.tellg();
00293 
00294     f.seekg(8);
00295     f >> symTabStart >> symNEntries;
00296     // seek to the string table
00297     f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
00298     if(f.eof()) {
00299     printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
00300            fileName, (unsigned long) fileSize, symTabStart, symNEntries);
00301     stop("computeExternalSymbols: Unexpected EOF 1");
00302     }
00303     StringTable stringTable(f); // read the string table
00304     if(f.tellg() != fileSize)
00305     stop("computeExternalSymbols: Unexpected data after string table");
00306 
00307     f.clear();
00308     f.seekg(symTabStart); // seek to the symbol table
00309 
00310     defined->clear(); undefined->clear();
00311     for(int i = 0; i < symNEntries; ++i) {
00312     // process each entry
00313     Symbol e;
00314 
00315     if(f.eof())
00316         stop("computeExternalSymbols: Unexpected EOF 2");
00317     f>>e;
00318     if(f.fail())
00319         stop("computeExternalSymbols: File read error");
00320     if(e.nAux) { // auxiliary entry: skip
00321         f.seekg(e.nAux * 18, ios::cur);
00322         i += e.nAux;
00323     }
00324     // if symbol is extern and defined in the current file, insert it
00325     if(e.storageClass == 2)
00326         if(e.sectionNum)
00327         defined->insert(stringTable.decode(e.name));
00328         else
00329         undefined->insert(stringTable.decode(e.name));
00330     }
00331 }
00332 
00333 /* For each occurence of an external symbol in the object file named by
00334    by <fileName> that is a member of <hide>, renames it by prefixing
00335    with "__kmp_external_", writing back the file in-place
00336 */
00337 void hideSymbols(char *fileName, const set<string> &hide) {
00338     static const string prefix("__kmp_external_");
00339     set<string> strings; // set of all occurring symbols, appropriately prefixed
00340     streampos fileSize;
00341     size_t strTabStart;
00342     unsigned symTabStart, symNEntries;
00343     int i;
00344     rstream in(fileName);
00345 
00346     in.seekg(0,ios::end);
00347     fileSize = in.tellg();
00348 
00349     in.seekg(8);
00350     in >> symTabStart >> symNEntries;
00351     in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
00352     if(in.eof())
00353     stop("hideSymbols: Unexpected EOF");
00354     StringTable stringTableOld(in); // read original string table
00355 
00356     if(in.tellg() != fileSize)
00357     stop("hideSymbols: Unexpected data after string table");
00358 
00359     // compute set of occurring strings with prefix added
00360     for(i = 0; i < symNEntries; ++i) {
00361     Symbol e;
00362 
00363     in.seekg(symTabStart + i * 18);
00364     if(in.eof())
00365         stop("hideSymbols: Unexpected EOF");
00366     in >> e;
00367     if(in.fail())
00368         stop("hideSymbols: File read error");
00369     if(e.nAux)
00370         i += e.nAux;
00371     const string &s = stringTableOld.decode(e.name);
00372     // if symbol is extern and found in <hide>, prefix and insert into strings,
00373     // otherwise, just insert into strings without prefix
00374     strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
00375             prefix + s : s);
00376     }
00377 
00378     ofstream out(fileName, ios::trunc | ios::out | ios::binary);
00379     if(!out.is_open())
00380     stop("hideSymbols: Error opening output file");
00381 
00382     // make new string table from string set
00383     StringTable stringTableNew = StringTable(strings);
00384 
00385     // copy input file to output file up to just before the symbol table
00386     in.seekg(0);
00387     char *buf = new char[symTabStart];
00388     in.read(buf, symTabStart);
00389     out.write(buf, symTabStart);
00390     delete []buf;
00391 
00392     // copy input symbol table to output symbol table with name translation
00393     for(i = 0; i < symNEntries; ++i) {
00394     Symbol e;
00395 
00396     in.seekg(symTabStart + i*18);
00397     if(in.eof())
00398         stop("hideSymbols: Unexpected EOF");
00399     in >> e;
00400     if(in.fail())
00401         stop("hideSymbols: File read error");
00402     const string &s = stringTableOld.decode(e.name);
00403     out.seekp(symTabStart + i*18);
00404     e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
00405                     prefix + s : s);
00406     out.write((char*)&e, 18);
00407     if(out.fail())
00408         stop("hideSymbols: File write error");
00409     if(e.nAux) {
00410         // copy auxiliary symbol table entries
00411         int nAux = e.nAux;
00412         for(int j = 1; j <= nAux; ++j) {
00413         in >> e;
00414         out.seekp(symTabStart + (i + j) * 18);
00415         out.write((char*)&e, 18);
00416         }
00417         i += nAux;
00418     }
00419     }
00420     // output string table
00421     stringTableNew.write(out);
00422 }
00423 
00424 // returns true iff <a> and <b> have no common element
00425 template <class T>
00426 bool isDisjoint(const set<T> &a, const set<T> &b) {
00427     set<T>::const_iterator ita, itb;
00428 
00429     for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
00430     const T &ta = *ita, &tb = *itb;
00431     if(ta < tb)
00432         ++ita;
00433     else if (tb < ta)
00434         ++itb;
00435     else
00436         return false;
00437     }
00438     return true;
00439 }
00440 
00441 /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
00442    <nTotal> >= <nExternal>.  The first <nExternal> elements correspond to the external object
00443    files and the rest correspond to the internal object files.
00444    postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
00445    disjoint.  Returns the transitive closure of the set of internal object files, as a set of
00446    file indexes, under the 'depends on' relation, minus the set of internal object files.
00447 */
00448 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
00449     set<int> *required = new set<int>;
00450     set<int> fresh[2];
00451     int i, cur = 0;
00452     bool changed;
00453 
00454     for(i = nTotal - 1; i >= nExternal; --i)
00455     fresh[cur].insert(i);
00456     do {
00457     changed = false;
00458     for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
00459         set<string> &s = undefined[*it];
00460 
00461         for(i = 0; i < nExternal; ++i) {
00462         if(required->find(i) == required->end()) {
00463             if(!isDisjoint(defined[i], s)) {
00464             // found a new qualifying element
00465             required->insert(i);
00466             fresh[1 - cur].insert(i);
00467             changed = true;
00468             }
00469         }
00470         }
00471     }
00472     fresh[cur].clear();
00473     cur = 1 - cur;
00474     } while(changed);
00475     return required;
00476 }
00477 
00478 int main(int argc, char **argv) {
00479     int nExternal, nInternal, i;
00480     set<string> *defined, *undefined;
00481     set<int>::iterator it;
00482 
00483     if(argc < 3)
00484     stop("Please specify a positive integer followed by a list of object filenames");
00485     nExternal = atoi(argv[1]);
00486     if(nExternal <= 0)
00487     stop("Please specify a positive integer followed by a list of object filenames");
00488     if(nExternal +  2 > argc)
00489     stop("Too few external objects");
00490     nInternal = argc - nExternal - 2;
00491     defined = new set<string>[argc - 2];
00492     undefined = new set<string>[argc - 2];
00493 
00494     // determine the set of defined and undefined external symbols
00495     for(i = 2; i < argc; ++i)
00496     computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
00497 
00498     // determine the set of required external files
00499     set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
00500     set<string> hide;
00501 
00502     /* determine the set of symbols to hide--namely defined external symbols of the
00503        required external files
00504     */
00505     for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
00506     int idx = *it;
00507     set<string>::iterator it2;
00508     /* We have to insert one element at a time instead of inserting a range because
00509        the insert member function taking a range doesn't exist on Windows* OS, at least
00510        at the time of this writing.
00511     */
00512     for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
00513         hide.insert(*it2);
00514     }
00515 
00516     /* process the external files--removing those that are not required and hiding
00517        the appropriate symbols in the others
00518     */
00519     for(i = 0; i < nExternal; ++i)
00520     if(requiredExternal->find(i) != requiredExternal->end())
00521         hideSymbols(argv[2 + i], hide);
00522     else
00523         remove(argv[2 + i]);
00524     // hide the appropriate symbols in the internal files
00525     for(i = nExternal + 2; i < argc; ++i)
00526     hideSymbols(argv[i], hide);
00527     return 0;
00528 }

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1