kmp_affinity.cpp

Go to the documentation of this file.
00001 /*
00002  * kmp_affinity.cpp -- affinity management
00003  * $Revision: 42195 $
00004  * $Date: 2013-03-27 16:10:35 -0500 (Wed, 27 Mar 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include "kmp.h"
00048 #include "kmp_i18n.h"
00049 #include "kmp_io.h"
00050 #include "kmp_str.h"
00051 
00052 
00053 #if KMP_OS_WINDOWS || KMP_OS_LINUX 
00054 
00055 //
00056 // Print the affinity mask to the character array in a pretty format.
00057 //
00058 char *
00059 __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
00060 {
00061     KMP_ASSERT(buf_len >= 40);
00062     char *scan = buf;
00063     char *end = buf + buf_len - 1;
00064 
00065     //
00066     // Find first element / check for empty set.
00067     //
00068     int i;
00069     for (i = 0; i < KMP_CPU_SETSIZE; i++) {
00070         if (KMP_CPU_ISSET(i, mask)) {
00071             break;
00072         }
00073     }
00074     if (i == KMP_CPU_SETSIZE) {
00075         sprintf(scan, "{<empty>}");
00076         while (*scan != '\0') scan++;
00077         KMP_ASSERT(scan <= end);
00078         return buf;
00079     }
00080 
00081     sprintf(scan, "{%d", i);
00082     while (*scan != '\0') scan++;
00083     i++;
00084     for (; i < KMP_CPU_SETSIZE; i++) {
00085         if (! KMP_CPU_ISSET(i, mask)) {
00086             continue;
00087         }
00088 
00089         //
00090         // Check for buffer overflow.  A string of the form ",<n>" will have
00091         // at most 10 characters, plus we want to leave room to print ",...}"
00092         // if the set is too large to print for a total of 15 characters.
00093         // We already left room for '\0' in setting end.
00094         //
00095         if (end - scan < 15) {
00096            break;
00097         }
00098         sprintf(scan, ",%-d", i);
00099         while (*scan != '\0') scan++;
00100     }
00101     if (i < KMP_CPU_SETSIZE) {
00102         sprintf(scan, ",...");
00103         while (*scan != '\0') scan++;
00104     }
00105     sprintf(scan, "}");
00106     while (*scan != '\0') scan++;
00107     KMP_ASSERT(scan <= end);
00108     return buf;
00109 }
00110 
00111 
00112 void
00113 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
00114 {
00115     KMP_CPU_ZERO(mask);
00116 
00117 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
00118 
00119     if (__kmp_num_proc_groups > 1) {
00120         int group;
00121         struct GROUP_AFFINITY ga;
00122         KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
00123         for (group = 0; group < __kmp_num_proc_groups; group++) {
00124             int i;
00125             int num = __kmp_GetActiveProcessorCount(group);
00126             for (i = 0; i < num; i++) {
00127                 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
00128             }
00129         }
00130     }
00131     else
00132 
00133 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
00134 
00135     {
00136         int proc;
00137         for (proc = 0; proc < __kmp_xproc; proc++) {
00138             KMP_CPU_SET(proc, mask);
00139         }
00140     }
00141 }
00142 
00143 
00144 //
00145 // In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
00146 // functions.
00147 //
00148 // The icc codegen emits sections with extremely long names, of the form
00149 // ".gnu.linkonce.<mangled_name>".  There seems to have been a linker bug
00150 // introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
00151 // some sort of memory corruption or table overflow that is triggered by
00152 // these long strings.  I checked the latest version of the linker -
00153 // GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
00154 // fixed.
00155 //
00156 // Unfortunately, my attempts to reproduce it in a smaller example have
00157 // failed - I'm not sure what the prospects are of getting it fixed
00158 // properly - but we need a reproducer smaller than all of libiomp.
00159 //
00160 // Work around the problem by avoiding inline constructors in such builds.
00161 // We do this for all platforms, not just Linux* OS - non-inline functions are
00162 // more debuggable and provide better coverage into than inline functions.
00163 // Use inline functions in shipping libs, for performance.
00164 //
00165 
00166 # if !defined(KMP_DEBUG) && !defined(COVER)
00167 
00168 class Address {
00169 public:
00170     static const unsigned maxDepth = 32;
00171     unsigned labels[maxDepth];
00172     unsigned childNums[maxDepth];
00173     unsigned depth;
00174     unsigned leader;
00175     Address(unsigned _depth)
00176       : depth(_depth), leader(FALSE) {
00177     }
00178     Address &operator=(const Address &b) {
00179         depth = b.depth;
00180         for (unsigned i = 0; i < depth; i++) {
00181             labels[i] = b.labels[i];
00182             childNums[i] = b.childNums[i];
00183         }
00184         leader = FALSE;
00185         return *this;
00186     }
00187     bool operator==(const Address &b) const {
00188         if (depth != b.depth)
00189             return false;
00190         for (unsigned i = 0; i < depth; i++)
00191             if(labels[i] != b.labels[i])
00192                 return false;
00193         return true;
00194     }
00195     bool isClose(const Address &b, int level) const {
00196         if (depth != b.depth)
00197             return false;
00198         if (level >= depth)
00199             return true;
00200         for (unsigned i = 0; i < (depth - level); i++)
00201             if(labels[i] != b.labels[i])
00202                 return false;
00203         return true;
00204     }
00205     bool operator!=(const Address &b) const {
00206         return !operator==(b);
00207     }
00208 };
00209 
00210 class AddrUnsPair {
00211 public:
00212     Address first;
00213     unsigned second;
00214     AddrUnsPair(Address _first, unsigned _second)
00215       : first(_first), second(_second) {
00216     }
00217     AddrUnsPair &operator=(const AddrUnsPair &b)
00218     {
00219         first = b.first;
00220         second = b.second;
00221         return *this;
00222     }
00223 };
00224 
00225 # else
00226 
00227 class Address {
00228 public:
00229     static const unsigned maxDepth = 32;
00230     unsigned labels[maxDepth];
00231     unsigned childNums[maxDepth];
00232     unsigned depth;
00233     unsigned leader;
00234     Address(unsigned _depth);
00235     Address &operator=(const Address &b);
00236     bool operator==(const Address &b) const;
00237     bool isClose(const Address &b, int level) const;
00238     bool operator!=(const Address &b) const;
00239 };
00240 
00241 Address::Address(unsigned _depth)
00242 {
00243     depth = _depth;
00244     leader = FALSE;
00245 }
00246 
00247 Address &Address::operator=(const Address &b) {
00248     depth = b.depth;
00249     for (unsigned i = 0; i < depth; i++) {
00250         labels[i] = b.labels[i];
00251         childNums[i] = b.childNums[i];
00252     }
00253     leader = FALSE;
00254     return *this;
00255 }
00256 
00257 bool Address::operator==(const Address &b) const {
00258     if (depth != b.depth)
00259         return false;
00260     for (unsigned i = 0; i < depth; i++)
00261         if(labels[i] != b.labels[i])
00262             return false;
00263     return true;
00264 }
00265 
00266 bool Address::isClose(const Address &b, int level) const {
00267     if (depth != b.depth)
00268         return false;
00269     if (level >= depth)
00270         return true;
00271     for (unsigned i = 0; i < (depth - level); i++)
00272         if(labels[i] != b.labels[i])
00273             return false;
00274     return true;
00275 }
00276 
00277 bool Address::operator!=(const Address &b) const {
00278     return !operator==(b);
00279 }
00280 
00281 class AddrUnsPair {
00282 public:
00283     Address first;
00284     unsigned second;
00285     AddrUnsPair(Address _first, unsigned _second);
00286     AddrUnsPair &operator=(const AddrUnsPair &b);
00287 };
00288 
00289 AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
00290   : first(_first), second(_second)
00291 {
00292 }
00293 
00294 AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
00295 {
00296     first = b.first;
00297     second = b.second;
00298     return *this;
00299 }
00300 
00301 # endif /* !defined(KMP_DEBUG) && !defined(COVER) */
00302 
00303 
00304 static int
00305 __kmp_affinity_cmp_Address_labels(const void *a, const void *b)
00306 {
00307     const Address *aa = (const Address *)&(((AddrUnsPair *)a)
00308       ->first);
00309     const Address *bb = (const Address *)&(((AddrUnsPair *)b)
00310       ->first);
00311     unsigned depth = aa->depth;
00312     unsigned i;
00313     KMP_DEBUG_ASSERT(depth == bb->depth);
00314     for (i  = 0; i < depth; i++) {
00315         if (aa->labels[i] < bb->labels[i]) return -1;
00316         if (aa->labels[i] > bb->labels[i]) return 1;
00317     }
00318     return 0;
00319 }
00320 
00321 
00322 static int
00323 __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
00324 {
00325     const Address *aa = (const Address *)&(((AddrUnsPair *)a)
00326       ->first);
00327     const Address *bb = (const Address *)&(((AddrUnsPair *)b)
00328       ->first);
00329     unsigned depth = aa->depth;
00330     unsigned i;
00331     KMP_DEBUG_ASSERT(depth == bb->depth);
00332     KMP_DEBUG_ASSERT(__kmp_affinity_compact <= depth);
00333     for (i = 0; i < __kmp_affinity_compact; i++) {
00334         int j = depth - i - 1;
00335         if (aa->childNums[j] < bb->childNums[j]) return -1;
00336         if (aa->childNums[j] > bb->childNums[j]) return 1;
00337     }
00338     for (; i < depth; i++) {
00339         int j = i - __kmp_affinity_compact;
00340         if (aa->childNums[j] < bb->childNums[j]) return -1;
00341         if (aa->childNums[j] > bb->childNums[j]) return 1;
00342     }
00343     return 0;
00344 }
00345 
00346 
00347 //
00348 // When sorting by labels, __kmp_affinity_assign_child_nums() must first be
00349 // called to renumber the labels from [0..n] and place them into the child_num
00350 // vector of the address object.  This is done in case the labels used for
00351 // the children at one node of the heirarchy differ from those used for
00352 // another node at the same level.  Example:  suppose the machine has 2 nodes
00353 // with 2 packages each.  The first node contains packages 601 and 602, and
00354 // second node contains packages 603 and 604.  If we try to sort the table
00355 // for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
00356 // because we are paying attention to the labels themselves, not the ordinal
00357 // child numbers.  By using the child numbers in the sort, the result is
00358 // {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
00359 //
00360 static void
00361 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
00362   int numAddrs)
00363 {
00364     KMP_DEBUG_ASSERT(numAddrs > 0);
00365     int depth = address2os->first.depth;
00366     unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
00367     unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
00368       * sizeof(unsigned));
00369     int labCt;
00370     for (labCt = 0; labCt < depth; labCt++) {
00371         address2os[0].first.childNums[labCt] = counts[labCt] = 0;
00372         lastLabel[labCt] = address2os[0].first.labels[labCt];
00373     }
00374     int i;
00375     for (i = 1; i < numAddrs; i++) {
00376         for (labCt = 0; labCt < depth; labCt++) {
00377             if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
00378                 int labCt2;
00379                 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
00380                     counts[labCt2] = 0;
00381                     lastLabel[labCt2] = address2os[i].first.labels[labCt2];
00382                 }
00383                 counts[labCt]++;
00384                 lastLabel[labCt] = address2os[i].first.labels[labCt];
00385                 break;
00386             }
00387         }
00388         for (labCt = 0; labCt < depth; labCt++) {
00389             address2os[i].first.childNums[labCt] = counts[labCt];
00390         }
00391         for (; labCt < Address::maxDepth; labCt++) {
00392             address2os[i].first.childNums[labCt] = 0;
00393         }
00394     }
00395 }
00396 
00397 
00398 //
00399 // All of the __kmp_affinity_create_*_map() routines should set
00400 // __kmp_affinity_masks to a vector of affinity mask objects of length
00401 // __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
00402 // return the number of levels in the machine topology tree (zero if
00403 // __kmp_affinity_type == affinity_none).
00404 //
00405 // All of the __kmp_affinity_create_*_map() routines should set *fullMask
00406 // to the affinity mask for the initialization thread.  They need to save and
00407 // restore the mask, and it could be needed later, so saving it is just an
00408 // optimization to avoid calling kmp_get_system_affinity() again.
00409 //
00410 static kmp_affin_mask_t *fullMask = NULL;
00411 
00412 kmp_affin_mask_t *
00413 __kmp_affinity_get_fullMask() { return fullMask; }
00414 
00415 
00416 static int nCoresPerPkg, nPackages;
00417 int __kmp_nThreadsPerCore;
00418 
00419 //
00420 // __kmp_affinity_uniform_topology() doesn't work when called from
00421 // places which support arbitrarily many levels in the machine topology
00422 // map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
00423 // __kmp_affinity_create_x2apicid_map().
00424 //
00425 inline static bool
00426 __kmp_affinity_uniform_topology()
00427 {
00428     return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
00429 }
00430 
00431 
00432 //
00433 // Print out the detailed machine topology map, i.e. the physical locations
00434 // of each OS proc.
00435 //
00436 static void
00437 __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
00438   int pkgLevel, int coreLevel, int threadLevel)
00439 {
00440     int proc;
00441 
00442     KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
00443     for (proc = 0; proc < len; proc++) {
00444         int level;
00445         kmp_str_buf_t buf;
00446         __kmp_str_buf_init(&buf);
00447         for (level = 0; level < depth; level++) {
00448             if (level == threadLevel) {
00449                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
00450             }
00451             else if (level == coreLevel) {
00452                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
00453             }
00454             else if (level == pkgLevel) {
00455                 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
00456             }
00457             else if (level > pkgLevel) {
00458                 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
00459                   level - pkgLevel - 1);
00460             }
00461             else {
00462                 __kmp_str_buf_print(&buf, "L%d ", level);
00463             }
00464             __kmp_str_buf_print(&buf, "%d ",
00465               address2os[proc].first.labels[level]);
00466         }
00467         KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
00468           buf.str);
00469         __kmp_str_buf_free(&buf);
00470     }
00471 }
00472 
00473 
00474 //
00475 // If we don't know how to retrieve the machine's processor topology, or
00476 // encounter an error in doing so, this routine is called to form a "flat"
00477 // mapping of os thread id's <-> processor id's.
00478 //
00479 static int
00480 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
00481   kmp_i18n_id_t *const msg_id)
00482 {
00483     *address2os = NULL;
00484     *msg_id = kmp_i18n_null;
00485 
00486     //
00487     // Even if __kmp_affinity_type == affinity_none, this routine might still
00488     // called to set __kmp_ht_enabled, & __kmp_ncores, as well as
00489     // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
00490     //
00491     if (! KMP_AFFINITY_CAPABLE()) {
00492         KMP_ASSERT(__kmp_affinity_type == affinity_none);
00493         __kmp_ncores = nPackages = __kmp_xproc;
00494         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
00495         __kmp_ht_enabled = FALSE;
00496         if (__kmp_affinity_verbose) {
00497             KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
00498             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00499             KMP_INFORM(Uniform, "KMP_AFFINITY");
00500             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00501               __kmp_nThreadsPerCore, __kmp_ncores);
00502         }
00503         return 0;
00504     }
00505 
00506     //
00507     // When affinity is off, this routine will still be called to set
00508     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
00509     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
00510     //  correctly, and return now if affinity is not enabled.
00511     //
00512     __kmp_ncores = nPackages = __kmp_avail_proc;
00513     __kmp_nThreadsPerCore = nCoresPerPkg = 1;
00514     __kmp_ht_enabled = FALSE;
00515     if (__kmp_affinity_verbose) {
00516         char buf[KMP_AFFIN_MASK_PRINT_LEN];
00517         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
00518 
00519         KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
00520         if (__kmp_affinity_respect_mask) {
00521             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
00522         } else {
00523             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
00524         }
00525         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00526         KMP_INFORM(Uniform, "KMP_AFFINITY");
00527         KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00528           __kmp_nThreadsPerCore, __kmp_ncores);
00529     }
00530     if (__kmp_affinity_type == affinity_none) {
00531         return 0;
00532     }
00533 
00534     //
00535     // Contruct the data structure to be returned.
00536     //
00537     *address2os = (AddrUnsPair*)
00538       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
00539     int avail_ct = 0;
00540     int i;
00541     for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
00542         //
00543         // Skip this proc if it is not included in the machine model.
00544         //
00545         if (! KMP_CPU_ISSET(i, fullMask)) {
00546             continue;
00547         }
00548 
00549         Address addr(1);
00550         addr.labels[0] = i;
00551         (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
00552     }
00553     if (__kmp_affinity_verbose) {
00554         KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
00555     }
00556 
00557     if (__kmp_affinity_gran_levels < 0) {
00558         //
00559         // Only the package level is modeled in the machine topology map,
00560         // so the #levels of granularity is either 0 or 1.
00561         //
00562         if (__kmp_affinity_gran > affinity_gran_package) {
00563             __kmp_affinity_gran_levels = 1;
00564         }
00565         else {
00566             __kmp_affinity_gran_levels = 0;
00567         }
00568     }
00569     return 1;
00570 }
00571 
00572 
00573 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
00574 
00575 //
00576 // If multiple Windows* OS processor groups exist, we can create a 2-level
00577 // topology map with the groups at level 0 and the individual procs at
00578 // level 1.
00579 //
00580 // This facilitates letting the threads float among all procs in a group,
00581 // if granularity=group (the default when there are multiple groups).
00582 //
00583 static int
00584 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
00585   kmp_i18n_id_t *const msg_id)
00586 {
00587     *address2os = NULL;
00588     *msg_id = kmp_i18n_null;
00589 
00590     //
00591     // If we don't have multiple processor groups, return now.
00592     // The flat mapping will be used.
00593     //
00594     if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
00595         // FIXME set *msg_id
00596         return -1;
00597     }
00598 
00599     //
00600     // Contruct the data structure to be returned.
00601     //
00602     *address2os = (AddrUnsPair*)
00603       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
00604     int avail_ct = 0;
00605     int i;
00606     for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
00607         //
00608         // Skip this proc if it is not included in the machine model.
00609         //
00610         if (! KMP_CPU_ISSET(i, fullMask)) {
00611             continue;
00612         }
00613 
00614         Address addr(2);
00615         addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
00616         addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
00617         (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
00618 
00619         if (__kmp_affinity_verbose) {
00620             KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
00621               addr.labels[1]);
00622         }
00623     }
00624 
00625     if (__kmp_affinity_gran_levels < 0) {
00626         if (__kmp_affinity_gran == affinity_gran_group) {
00627             __kmp_affinity_gran_levels = 1;
00628         }
00629         else if ((__kmp_affinity_gran == affinity_gran_fine)
00630           || (__kmp_affinity_gran == affinity_gran_thread)) {
00631             __kmp_affinity_gran_levels = 0;
00632         }
00633         else {
00634             const char *gran_str = NULL;
00635             if (__kmp_affinity_gran == affinity_gran_core) {
00636                 gran_str = "core";
00637             }
00638             else if (__kmp_affinity_gran == affinity_gran_package) {
00639                 gran_str = "package";
00640             }
00641             else if (__kmp_affinity_gran == affinity_gran_node) {
00642                 gran_str = "node";
00643             }
00644             else {
00645                 KMP_ASSERT(0);
00646             }
00647 
00648             // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
00649             __kmp_affinity_gran_levels = 0;
00650         }
00651     }
00652     return 2;
00653 }
00654 
00655 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
00656 
00657 
00658 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
00659 
00660 static int
00661 __kmp_cpuid_mask_width(int count) {
00662     int r = 0;
00663 
00664     while((1<<r) < count)
00665         ++r;
00666     return r;
00667 }
00668 
00669 
00670 class apicThreadInfo {
00671 public:
00672     unsigned osId;              // param to __kmp_affinity_bind_thread
00673     unsigned apicId;            // from cpuid after binding
00674     unsigned maxCoresPerPkg;    //      ""
00675     unsigned maxThreadsPerPkg;  //      ""
00676     unsigned pkgId;             // inferred from above values
00677     unsigned coreId;            //      ""
00678     unsigned threadId;          //      ""
00679 };
00680 
00681 
00682 static int
00683 __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
00684 {
00685     const apicThreadInfo *aa = (const apicThreadInfo *)a;
00686     const apicThreadInfo *bb = (const apicThreadInfo *)b;
00687     if (aa->osId < bb->osId) return -1;
00688     if (aa->osId > bb->osId) return 1;
00689     return 0;
00690 }
00691 
00692 
00693 static int
00694 __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
00695 {
00696     const apicThreadInfo *aa = (const apicThreadInfo *)a;
00697     const apicThreadInfo *bb = (const apicThreadInfo *)b;
00698     if (aa->pkgId < bb->pkgId) return -1;
00699     if (aa->pkgId > bb->pkgId) return 1;
00700     if (aa->coreId < bb->coreId) return -1;
00701     if (aa->coreId > bb->coreId) return 1;
00702     if (aa->threadId < bb->threadId) return -1;
00703     if (aa->threadId > bb->threadId) return 1;
00704     return 0;
00705 }
00706 
00707 
00708 //
00709 // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use 
00710 // an algorithm which cycles through the available os threads, setting 
00711 // the current thread's affinity mask to that thread, and then retrieves
00712 // the Apic Id for each thread context using the cpuid instruction.
00713 //
00714 static int
00715 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
00716   kmp_i18n_id_t *const msg_id)
00717 {
00718     int rc;
00719     *address2os = NULL;
00720     *msg_id = kmp_i18n_null;
00721 
00722 #  if KMP_MIC
00723     {
00724         // The code below will use cpuid(4).
00725         // Check if cpuid(4) is supported.
00726         // FIXME? - this really doesn't need to be specific to MIC.
00727         kmp_cpuid buf;
00728         __kmp_x86_cpuid(0, 0, &buf);
00729         if (buf.eax < 4) {
00730             *msg_id = kmp_i18n_str_NoLeaf4Support;
00731             return -1;
00732         }
00733     }
00734 #  endif // KMP_MIC
00735 
00736     //
00737     // Even if __kmp_affinity_type == affinity_none, this routine is still
00738     // called to set __kmp_ht_enabled, & __kmp_ncores, as well as
00739     // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
00740     //
00741     // The algorithm used starts by setting the affinity to each available
00742     // thread and retreiving info from the cpuid instruction, so if we are not
00743     // capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
00744     // then we need to do something else.
00745     //
00746     if (! KMP_AFFINITY_CAPABLE()) {
00747         //
00748         // Hack to try and infer the machine topology using only the data
00749         // available from cpuid on the current thread, and __kmp_xproc.
00750         //
00751         KMP_ASSERT(__kmp_affinity_type == affinity_none);
00752 
00753         //
00754         // Get an upper bound on the number of threads per package using
00755         // cpuid(1).
00756         //
00757         // On some OS/chps combinations where HT is supported by the chip
00758         // but is disabled, this value will be 2 on a single core chip.
00759         // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
00760         //
00761         kmp_cpuid buf;
00762         __kmp_x86_cpuid(1, 0, &buf);
00763         int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
00764         if (maxThreadsPerPkg == 0) {
00765             maxThreadsPerPkg = 1;
00766         }
00767 
00768         //
00769         // The num cores per pkg comes from cpuid(4).
00770         // 1 must be added to the encoded value.
00771         //
00772         // The author of cpu_count.cpp treated this only an upper bound
00773         // on the number of cores, but I haven't seen any cases where it
00774         // was greater than the actual number of cores, so we will treat
00775         // it as exact in this block of code.
00776         //
00777         // First, we need to check if cpuid(4) is supported on this chip.
00778         // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
00779         // has the value n or greater.
00780         //
00781         __kmp_x86_cpuid(0, 0, &buf);
00782         if (buf.eax >= 4) {
00783             __kmp_x86_cpuid(4, 0, &buf);
00784             nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
00785         }
00786         else {
00787             nCoresPerPkg = 1;
00788         }
00789 
00790         //
00791         // There is no way to reliably tell if HT is enabled without issuing
00792         // the cpuid instruction from every thread, can correlating the cpuid
00793         // info, so if the machine is not affinity capable, we assume that HT
00794         // is off.  We have seen quite a few machines where maxThreadsPerPkg
00795         // is 2, yet the machine does not support HT.
00796         //
00797         // - Older OSes are usually found on machines with older chips, which
00798         //   do not support HT.
00799         //
00800         // - The performance penalty for mistakenly identifying a machine as
00801         //   HT when it isn't (which results in blocktime being incorrecly set
00802         //   to 0) is greater than the penalty when for mistakenly identifying
00803         //   a machine as being 1 thread/core when it is really HT enabled
00804         //   (which results in blocktime being incorrectly set to a positive
00805         //   value).
00806         //
00807         __kmp_ncores = __kmp_xproc;
00808         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
00809         __kmp_nThreadsPerCore = 1;
00810         __kmp_ht_enabled = FALSE;
00811         if (__kmp_affinity_verbose) {
00812             KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
00813             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00814             if (__kmp_affinity_uniform_topology()) {
00815                 KMP_INFORM(Uniform, "KMP_AFFINITY");
00816             } else {
00817                 KMP_INFORM(NonUniform, "KMP_AFFINITY");
00818             }
00819             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00820               __kmp_nThreadsPerCore, __kmp_ncores);
00821         }
00822         return 0;
00823     }
00824 
00825     //
00826     //
00827     // From here on, we can assume that it is safe to call
00828     // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
00829     // even if __kmp_affinity_type = affinity_none.
00830     //
00831 
00832     //
00833     // Save the affinity mask for the current thread.
00834     //
00835     kmp_affin_mask_t *oldMask;
00836     KMP_CPU_ALLOC(oldMask);
00837     KMP_ASSERT(oldMask != NULL);
00838     __kmp_get_system_affinity(oldMask, TRUE);
00839 
00840     //
00841     // Run through each of the available contexts, binding the current thread
00842     // to it, and obtaining the pertinent information using the cpuid instr.
00843     //
00844     // The relevant information is:
00845     //
00846     // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
00847     //    has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
00848     //
00849     // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1).  The
00850     //    value of this field determines the width of the core# + thread#
00851     //    fields in the Apic Id.  It is also an upper bound on the number
00852     //    of threads per package, but it has been verified that situations
00853     //    happen were it is not exact.  In particular, on certain OS/chip
00854     //    combinations where Intel(R) Hyper-Threading Technology is supported
00855     //    by the chip but has
00856     //    been disabled, the value of this field will be 2 (for a single core
00857     //    chip).  On other OS/chip combinations supporting 
00858     //    Intel(R) Hyper-Threading Technology, the value of
00859     //    this field will be 1 when Intel(R) Hyper-Threading Technology is 
00860     //    disabled and 2 when it is enabled.
00861     //
00862     // Max Cores Per Pkg:  Bits 26:31 of eax after issuing cpuid(4).  The
00863     //    value of this field (+1) determines the width of the core# field in
00864     //    the Apic Id.  The comments in "cpucount.cpp" say that this value is
00865     //    an upper bound, but the IA-32 architecture manual says that it is
00866     //    exactly the number of cores per package, and I haven't seen any
00867     //    case where it wasn't.
00868     //
00869     // From this information, deduce the package Id, core Id, and thread Id,
00870     // and set the corresponding fields in the apicThreadInfo struct.
00871     //
00872     unsigned i;
00873     apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
00874       __kmp_avail_proc * sizeof(apicThreadInfo));
00875     unsigned nApics = 0;
00876     for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
00877         //
00878         // Skip this proc if it is not included in the machine model.
00879         //
00880         if (! KMP_CPU_ISSET(i, fullMask)) {
00881             continue;
00882         }
00883         KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
00884 
00885         __kmp_affinity_bind_thread(i);
00886         threadInfo[nApics].osId = i;
00887 
00888         //
00889         // The apic id and max threads per pkg come from cpuid(1).
00890         //
00891         kmp_cpuid buf;
00892         __kmp_x86_cpuid(1, 0, &buf);
00893         if (! (buf.edx >> 9) & 1) {
00894             __kmp_set_system_affinity(oldMask, TRUE);
00895             __kmp_free(threadInfo);
00896             KMP_CPU_FREE(oldMask);
00897             *msg_id = kmp_i18n_str_ApicNotPresent;
00898             return -1;
00899         }
00900         threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
00901         threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
00902         if (threadInfo[nApics].maxThreadsPerPkg == 0) {
00903             threadInfo[nApics].maxThreadsPerPkg = 1;
00904         }
00905 
00906         //
00907         // Max cores per pkg comes from cpuid(4).
00908         // 1 must be added to the encoded value.
00909         //
00910         // First, we need to check if cpuid(4) is supported on this chip.
00911         // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
00912         // has the value n or greater.
00913         //
00914         __kmp_x86_cpuid(0, 0, &buf);
00915         if (buf.eax >= 4) {
00916             __kmp_x86_cpuid(4, 0, &buf);
00917             threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
00918         }
00919         else {
00920             threadInfo[nApics].maxCoresPerPkg = 1;
00921         }
00922 
00923         //
00924         // Infer the pkgId / coreId / threadId using only the info
00925         // obtained locally.
00926         //
00927         int widthCT = __kmp_cpuid_mask_width(
00928           threadInfo[nApics].maxThreadsPerPkg);
00929         threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
00930 
00931         int widthC = __kmp_cpuid_mask_width(
00932           threadInfo[nApics].maxCoresPerPkg);
00933         int widthT = widthCT - widthC;
00934         if (widthT < 0) {
00935             //
00936             // I've never seen this one happen, but I suppose it could, if
00937             // the cpuid instruction on a chip was really screwed up.
00938             // Make sure to restore the affinity mask before the tail call.
00939             //
00940             __kmp_set_system_affinity(oldMask, TRUE);
00941             __kmp_free(threadInfo);
00942             KMP_CPU_FREE(oldMask);
00943             *msg_id = kmp_i18n_str_InvalidCpuidInfo;
00944             return -1;
00945         }
00946 
00947         int maskC = (1 << widthC) - 1;
00948         threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
00949           &maskC;
00950 
00951         int maskT = (1 << widthT) - 1;
00952         threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
00953 
00954         nApics++;
00955     }
00956 
00957     //
00958     // We've collected all the info we need.
00959     // Restore the old affinity mask for this thread.
00960     //
00961     __kmp_set_system_affinity(oldMask, TRUE);
00962 
00963     //
00964     // If there's only one thread context to bind to, form an Address object
00965     // with depth 1 and return immediately (or, if affinity is off, set
00966     // address2os to NULL and return).
00967     //
00968     // If it is configured to omit the package level when there is only a
00969     // single package, the logic at the end of this routine won't work if
00970     // there is only a single thread - it would try to form an Address
00971     // object with depth 0.
00972     //
00973     KMP_ASSERT(nApics > 0);
00974     if (nApics == 1) {
00975         __kmp_ncores = nPackages = 1;
00976         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
00977         __kmp_ht_enabled = FALSE;
00978         if (__kmp_affinity_verbose) {
00979             char buf[KMP_AFFIN_MASK_PRINT_LEN];
00980             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
00981 
00982             KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
00983             if (__kmp_affinity_respect_mask) {
00984                 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
00985             } else {
00986                 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
00987             }
00988             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00989             KMP_INFORM(Uniform, "KMP_AFFINITY");
00990             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00991               __kmp_nThreadsPerCore, __kmp_ncores);
00992         }
00993 
00994         if (__kmp_affinity_type == affinity_none) {
00995             __kmp_free(threadInfo);
00996             KMP_CPU_FREE(oldMask);
00997             return 0;
00998         }
00999 
01000         *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
01001         Address addr(1);
01002         addr.labels[0] = threadInfo[0].pkgId;
01003         (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
01004 
01005         if (__kmp_affinity_gran_levels < 0) {
01006             __kmp_affinity_gran_levels = 0;
01007         }
01008 
01009         if (__kmp_affinity_verbose) {
01010             __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
01011         }
01012 
01013         __kmp_free(threadInfo);
01014         KMP_CPU_FREE(oldMask);
01015         return 1;
01016     }
01017 
01018     //
01019     // Sort the threadInfo table by physical Id.
01020     //
01021     qsort(threadInfo, nApics, sizeof(*threadInfo),
01022       __kmp_affinity_cmp_apicThreadInfo_phys_id);
01023 
01024     //
01025     // The table is now sorted by pkgId / coreId / threadId, but we really
01026     // don't know the radix of any of the fields.  pkgId's may be sparsely
01027     // assigned among the chips on a system.  Although coreId's are usually
01028     // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
01029     // [0..threadsPerCore-1], we don't want to make any such assumptions.
01030     //
01031     // For that matter, we don't know what coresPerPkg and threadsPerCore
01032     // (or the total # packages) are at this point - we want to determine
01033     // that now.  We only have an upper bound on the first two figures.
01034     //
01035     // We also perform a consistency check at this point: the values returned
01036     // by the cpuid instruction for any thread bound to a given package had
01037     // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
01038     //
01039     nPackages = 1;
01040     nCoresPerPkg = 1;
01041     __kmp_nThreadsPerCore = 1;
01042     unsigned nCores = 1;
01043 
01044     unsigned pkgCt = 1;                         // to determine radii
01045     unsigned lastPkgId = threadInfo[0].pkgId;
01046     unsigned coreCt = 1;
01047     unsigned lastCoreId = threadInfo[0].coreId;
01048     unsigned threadCt = 1;
01049     unsigned lastThreadId = threadInfo[0].threadId;
01050 
01051                                                 // intra-pkg consist checks
01052     unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
01053     unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
01054 
01055     for (i = 1; i < nApics; i++) {
01056         if (threadInfo[i].pkgId != lastPkgId) {
01057             nCores++;
01058             pkgCt++;
01059             lastPkgId = threadInfo[i].pkgId;
01060             if (coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
01061             coreCt = 1;
01062             lastCoreId = threadInfo[i].coreId;
01063             if (threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
01064             threadCt = 1;
01065             lastThreadId = threadInfo[i].threadId;
01066 
01067             //
01068             // This is a different package, so go on to the next iteration
01069             // without doing any consistency checks.  Reset the consistency
01070             // check vars, though.
01071             //
01072             prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
01073             prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
01074             continue;
01075         }
01076 
01077         if (threadInfo[i].coreId != lastCoreId) {
01078             nCores++;
01079             coreCt++;
01080             lastCoreId = threadInfo[i].coreId;
01081             if (threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
01082             threadCt = 1;
01083             lastThreadId = threadInfo[i].threadId;
01084         }
01085         else if (threadInfo[i].threadId != lastThreadId) {
01086             threadCt++;
01087             lastThreadId = threadInfo[i].threadId;
01088         }
01089         else {
01090             __kmp_free(threadInfo);
01091             KMP_CPU_FREE(oldMask);
01092             *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
01093             return -1;
01094         }
01095 
01096         //
01097         // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
01098         // fields agree between all the threads bounds to a given package.
01099         //
01100         if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
01101           || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
01102             __kmp_free(threadInfo);
01103             KMP_CPU_FREE(oldMask);
01104             *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
01105             return -1;
01106         }
01107     }
01108     nPackages = pkgCt;
01109     if (coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
01110     if (threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
01111 
01112     //
01113     // When affinity is off, this routine will still be called to set
01114     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
01115     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
01116     // correctly, and return now if affinity is not enabled.
01117     //
01118     __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
01119     __kmp_ncores = nCores;
01120     if (__kmp_affinity_verbose) {
01121         char buf[KMP_AFFIN_MASK_PRINT_LEN];
01122         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
01123 
01124         KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
01125         if (__kmp_affinity_respect_mask) {
01126             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
01127         } else {
01128             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
01129         }
01130         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01131         if (__kmp_affinity_uniform_topology()) {
01132             KMP_INFORM(Uniform, "KMP_AFFINITY");
01133         } else {
01134             KMP_INFORM(NonUniform, "KMP_AFFINITY");
01135         }
01136         KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
01137           __kmp_nThreadsPerCore, __kmp_ncores);
01138 
01139     }
01140 
01141     if (__kmp_affinity_type == affinity_none) {
01142         __kmp_free(threadInfo);
01143         KMP_CPU_FREE(oldMask);
01144         return 0;
01145     }
01146 
01147     //
01148     // Now that we've determined the number of packages, the number of cores
01149     // per package, and the number of threads per core, we can construct the
01150     // data structure that is to be returned.
01151     //
01152     int pkgLevel = 0;
01153     int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
01154     int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
01155     unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
01156 
01157     KMP_ASSERT(depth > 0);
01158     *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
01159 
01160     for (i = 0; i < nApics; ++i) {
01161         Address addr(depth);
01162         unsigned os = threadInfo[i].osId;
01163         int d = 0;
01164 
01165         if (pkgLevel >= 0) {
01166             addr.labels[d++] = threadInfo[i].pkgId;
01167         }
01168         if (coreLevel >= 0) {
01169             addr.labels[d++] = threadInfo[i].coreId;
01170         }
01171         if (threadLevel >= 0) {
01172             addr.labels[d++] = threadInfo[i].threadId;
01173         }
01174         (*address2os)[i] = AddrUnsPair(addr, os);
01175     }
01176 
01177     if (__kmp_affinity_gran_levels < 0) {
01178         //
01179         // Set the granularity level based on what levels are modeled
01180         // in the machine topology map.
01181         //
01182         __kmp_affinity_gran_levels = 0;
01183         if ((threadLevel >= 0)
01184           && (__kmp_affinity_gran > affinity_gran_thread)) {
01185             __kmp_affinity_gran_levels++;
01186         }
01187         if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
01188             __kmp_affinity_gran_levels++;
01189         }
01190         if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
01191             __kmp_affinity_gran_levels++;
01192         }
01193     }
01194 
01195     if (__kmp_affinity_verbose) {
01196         __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
01197           coreLevel, threadLevel);
01198     }
01199 
01200     __kmp_free(threadInfo);
01201     KMP_CPU_FREE(oldMask);
01202     return depth;
01203 }
01204 
01205 
01206 //
01207 // Intel(R) microarchitecture code name Nehalem, Dunnington and later
01208 // architectures support a newer interface for specifying the x2APIC Ids,
01209 // based on cpuid leaf 11.
01210 //
01211 static int
01212 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
01213   kmp_i18n_id_t *const msg_id)
01214 {
01215     kmp_cpuid buf;
01216 
01217     *address2os = NULL;
01218     *msg_id = kmp_i18n_null;
01219 
01220     //
01221     // Check to see if cpuid leaf 11 is supported.
01222     //
01223     __kmp_x86_cpuid(0, 0, &buf);
01224     if (buf.eax < 11) {
01225         *msg_id = kmp_i18n_str_NoLeaf11Support;
01226         return -1;
01227     }
01228     __kmp_x86_cpuid(11, 0, &buf);
01229     if (buf.ebx == 0) {
01230         *msg_id = kmp_i18n_str_NoLeaf11Support;
01231         return -1;
01232     }
01233 
01234     //
01235     // Find the number of levels in the machine topology.  While we're at it,
01236     // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg.  We will
01237     // try to get more accurate values later by explicitly counting them,
01238     // but get reasonable defaults now, in case we return early.
01239     //
01240     int level;
01241     int threadLevel = -1;
01242     int coreLevel = -1;
01243     int pkgLevel = -1;
01244     __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
01245 
01246     for (level = 0;; level++) {
01247         if (level > 31) {
01248             //
01249             // FIXME: Hack for DPD200163180
01250             //
01251             // If level is big then something went wrong -> exiting
01252             //
01253             // There could actually be 32 valid levels in the machine topology,
01254             // but so far, the only machine we have seen which does not exit
01255             // this loop before iteration 32 has fubar x2APIC settings.
01256             //
01257             // For now, just reject this case based upon loop trip count.
01258             //
01259             *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01260             return -1;
01261         }
01262         __kmp_x86_cpuid(11, level, &buf);
01263         if (buf.ebx == 0) {
01264             if (pkgLevel < 0) {
01265                 //
01266                 // Will infer nPackages from __kmp_xproc
01267                 //
01268                 pkgLevel = level;
01269                 level++;
01270             }
01271             break;
01272         }
01273         int kind = (buf.ecx >> 8) & 0xff;
01274         if (kind == 1) {
01275             //
01276             // SMT level
01277             //
01278             threadLevel = level;
01279             coreLevel = -1;
01280             pkgLevel = -1;
01281             __kmp_nThreadsPerCore = buf.ebx & 0xff;
01282             if (__kmp_nThreadsPerCore == 0) {
01283                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01284                 return -1;
01285             }
01286         }
01287         else if (kind == 2) {
01288             //
01289             // core level
01290             //
01291             coreLevel = level;
01292             pkgLevel = -1;
01293             nCoresPerPkg = buf.ebx & 0xff;
01294             if (nCoresPerPkg == 0) {
01295                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01296                 return -1;
01297             }
01298         }
01299         else {
01300             if (level <= 0) {
01301                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01302                 return -1;
01303             }
01304             if (pkgLevel >= 0) {
01305                 continue;
01306             }
01307             pkgLevel = level;
01308             nPackages = buf.ebx & 0xff;
01309             if (nPackages == 0) {
01310                 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01311                 return -1;
01312             }
01313         }
01314     }
01315     int depth = level;
01316 
01317     //
01318     // In the above loop, "level" was counted from the finest level (usually
01319     // thread) to the coarsest.  The caller expects that we will place the
01320     // labels in (*address2os)[].first.labels[] in the inverse order, so
01321     // we need to invert the vars saying which level means what.
01322     //
01323     if (threadLevel >= 0) {
01324         threadLevel = depth - threadLevel - 1;
01325     }
01326     if (coreLevel >= 0) {
01327         coreLevel = depth - coreLevel - 1;
01328     }
01329     KMP_DEBUG_ASSERT(pkgLevel >= 0);
01330     pkgLevel = depth - pkgLevel - 1;
01331 
01332     //
01333     // The algorithm used starts by setting the affinity to each available
01334     // thread and retrieving info from the cpuid instruction, so if we are not
01335     // capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
01336     // then we need to do something else - use the defaults that we calculated
01337     // from issuing cpuid without binding to each proc.
01338     //
01339     if (! KMP_AFFINITY_CAPABLE())
01340     {
01341         //
01342         // Hack to try and infer the machine topology using only the data
01343         // available from cpuid on the current thread, and __kmp_xproc.
01344         //
01345         KMP_ASSERT(__kmp_affinity_type == affinity_none);
01346 
01347         __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
01348         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
01349         __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
01350         if (__kmp_affinity_verbose) {
01351             KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
01352             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01353             if (__kmp_affinity_uniform_topology()) {
01354                 KMP_INFORM(Uniform, "KMP_AFFINITY");
01355             } else {
01356                 KMP_INFORM(NonUniform, "KMP_AFFINITY");
01357             }
01358             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
01359               __kmp_nThreadsPerCore, __kmp_ncores);
01360         }
01361         return 0;
01362     }
01363 
01364     //
01365     //
01366     // From here on, we can assume that it is safe to call
01367     // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
01368     // even if __kmp_affinity_type = affinity_none.
01369     //
01370 
01371     //
01372     // Save the affinity mask for the current thread.
01373     //
01374     kmp_affin_mask_t *oldMask;
01375     KMP_CPU_ALLOC(oldMask);
01376     __kmp_get_system_affinity(oldMask, TRUE);
01377 
01378     //
01379     // Allocate the data structure to be returned.
01380     //
01381     AddrUnsPair *retval = (AddrUnsPair *)
01382       __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
01383 
01384     //
01385     // Run through each of the available contexts, binding the current thread
01386     // to it, and obtaining the pertinent information using the cpuid instr.
01387     //
01388     int proc;
01389     int nApics = 0;
01390     for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
01391         //
01392         // Skip this proc if it is not included in the machine model.
01393         //
01394         if (! KMP_CPU_ISSET(proc, fullMask)) {
01395             continue;
01396         }
01397         KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
01398 
01399         __kmp_affinity_bind_thread(proc);
01400 
01401         //
01402         // Extrach the labels for each level in the machine topology map
01403         // from the Apic ID.
01404         //
01405         Address addr(depth);
01406         int prev_shift = 0;
01407 
01408         for (level = 0; level < depth; level++) {
01409             __kmp_x86_cpuid(11, level, &buf);
01410             unsigned apicId = buf.edx;
01411             if (buf.ebx == 0) {
01412                 if (level != depth - 1) {
01413                     KMP_CPU_FREE(oldMask);
01414                     *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
01415                     return -1;
01416                 }
01417                 addr.labels[depth - level - 1] = apicId >> prev_shift;
01418                 level++;
01419                 break;
01420             }
01421             int shift = buf.eax & 0x1f;
01422             int mask = (1 << shift) - 1;
01423             addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
01424             prev_shift = shift;
01425         }
01426         if (level != depth) {
01427             KMP_CPU_FREE(oldMask);
01428             *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
01429             return -1;
01430         }
01431 
01432         retval[nApics] = AddrUnsPair(addr, proc);
01433         nApics++;
01434     }
01435 
01436     //
01437     // We've collected all the info we need.
01438     // Restore the old affinity mask for this thread.
01439     //
01440     __kmp_set_system_affinity(oldMask, TRUE);
01441 
01442     //
01443     // If there's only one thread context to bind to, return now.
01444     //
01445     KMP_ASSERT(nApics > 0);
01446     if (nApics == 1) {
01447         __kmp_ncores = nPackages = 1;
01448         __kmp_nThreadsPerCore = nCoresPerPkg = 1;
01449         __kmp_ht_enabled = FALSE;
01450         if (__kmp_affinity_verbose) {
01451             char buf[KMP_AFFIN_MASK_PRINT_LEN];
01452             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
01453 
01454             KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
01455             if (__kmp_affinity_respect_mask) {
01456                 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
01457             } else {
01458                 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
01459             }
01460             KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01461             KMP_INFORM(Uniform, "KMP_AFFINITY");
01462             KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
01463               __kmp_nThreadsPerCore, __kmp_ncores);
01464         }
01465 
01466         if (__kmp_affinity_type == affinity_none) {
01467             __kmp_free(retval);
01468             KMP_CPU_FREE(oldMask);
01469             return 0;
01470         }
01471 
01472         //
01473         // Form an Address object which only includes the package level.
01474         //
01475         Address addr(1);
01476         addr.labels[0] = retval[0].first.labels[pkgLevel];
01477         retval[0].first = addr;
01478 
01479         if (__kmp_affinity_gran_levels < 0) {
01480             __kmp_affinity_gran_levels = 0;
01481         }
01482 
01483         if (__kmp_affinity_verbose) {
01484             __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
01485         }
01486 
01487         *address2os = retval;
01488         KMP_CPU_FREE(oldMask);
01489         return 1;
01490     }
01491 
01492     //
01493     // Sort the table by physical Id.
01494     //
01495     qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
01496 
01497     //
01498     // Find the radix at each of the levels.
01499     //
01500     unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01501     unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01502     unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01503     unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01504     for (level = 0; level < depth; level++) {
01505         totals[level] = 1;
01506         maxCt[level] = 1;
01507         counts[level] = 1;
01508         last[level] = retval[0].first.labels[level];
01509     }
01510 
01511     //
01512     // From here on, the iteration variable "level" runs from the finest
01513     // level to the coarsest, i.e. we iterate forward through
01514     // (*address2os)[].first.labels[] - in the previous loops, we iterated
01515     // backwards.
01516     //
01517     for (proc = 1; proc < nApics; proc++) {
01518         int level;
01519         for (level = 0; level < depth; level++) {
01520             if (retval[proc].first.labels[level] != last[level]) {
01521                 unsigned j;
01522                 for (j = level + 1; j < depth; j++) {
01523                     totals[j]++;
01524                     counts[j] = 1;
01525                     // The line below causes printing incorrect topology information
01526                     // in case the max value for some level (maxCt[level]) is encountered earlier than
01527                     // some less value while going through the array.
01528                     // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
01529                     // whereas it must be 4.
01530                     // TODO!!! Check if it can be commented safely
01531                     //maxCt[j] = 1;
01532                     last[j] = retval[proc].first.labels[j];
01533                 }
01534                 totals[level]++;
01535                 counts[level]++;
01536                 if (counts[level] > maxCt[level]) {
01537                     maxCt[level] = counts[level];
01538                 }
01539                 last[level] = retval[proc].first.labels[level];
01540                 break;
01541             }
01542             else if (level == depth - 1) {
01543                 __kmp_free(last);
01544                 __kmp_free(maxCt);
01545                 __kmp_free(counts);
01546                 __kmp_free(totals);
01547                 __kmp_free(retval);
01548                 KMP_CPU_FREE(oldMask);
01549                 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
01550                 return -1;
01551             }
01552         }
01553     }
01554 
01555     //
01556     // When affinity is off, this routine will still be called to set
01557     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
01558     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
01559     // correctly, and return if affinity is not enabled.
01560     //
01561     if (threadLevel >= 0) {
01562         __kmp_nThreadsPerCore = maxCt[threadLevel];
01563     }
01564     else {
01565         __kmp_nThreadsPerCore = 1;
01566     }
01567     __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
01568 
01569     nPackages = totals[pkgLevel];
01570 
01571     if (coreLevel >= 0) {
01572         __kmp_ncores = totals[coreLevel];
01573         nCoresPerPkg = maxCt[coreLevel];
01574     }
01575     else {
01576         __kmp_ncores = nPackages;
01577         nCoresPerPkg = 1;
01578     }
01579 
01580     //
01581     // Check to see if the machine topology is uniform
01582     //
01583     unsigned prod = maxCt[0];
01584     for (level = 1; level < depth; level++) {
01585        prod *= maxCt[level];
01586     }
01587     bool uniform = (prod == totals[level - 1]);
01588 
01589     //
01590     // Print the machine topology summary.
01591     //
01592     if (__kmp_affinity_verbose) {
01593         char mask[KMP_AFFIN_MASK_PRINT_LEN];
01594         __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
01595 
01596         KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
01597         if (__kmp_affinity_respect_mask) {
01598             KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
01599         } else {
01600             KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
01601         }
01602         KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01603         if (uniform) {
01604             KMP_INFORM(Uniform, "KMP_AFFINITY");
01605         } else {
01606             KMP_INFORM(NonUniform, "KMP_AFFINITY");
01607         }
01608 
01609         kmp_str_buf_t buf;
01610         __kmp_str_buf_init(&buf);
01611 
01612         __kmp_str_buf_print(&buf, "%d", totals[0]);
01613         for (level = 1; level <= pkgLevel; level++) {
01614             __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
01615         }
01616         KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
01617           __kmp_nThreadsPerCore, __kmp_ncores);
01618 
01619         __kmp_str_buf_free(&buf);
01620     }
01621 
01622     if (__kmp_affinity_type == affinity_none) {
01623         __kmp_free(last);
01624         __kmp_free(maxCt);
01625         __kmp_free(counts);
01626         __kmp_free(totals);
01627         __kmp_free(retval);
01628         KMP_CPU_FREE(oldMask);
01629         return 0;
01630     }
01631 
01632     //
01633     // Find any levels with radiix 1, and remove them from the map
01634     // (except for the package level).
01635     //
01636     int new_depth = 0;
01637     for (level = 0; level < depth; level++) {
01638         if ((maxCt[level] == 1) && (level != pkgLevel)) {
01639            continue;
01640         }
01641         new_depth++;
01642     }
01643 
01644     //
01645     // If we are removing any levels, allocate a new vector to return,
01646     // and copy the relevant information to it.
01647     //
01648     if (new_depth != depth) {
01649         AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
01650           sizeof(AddrUnsPair) * nApics);
01651         for (proc = 0; proc < nApics; proc++) {
01652             Address addr(new_depth);
01653             new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
01654         }
01655         int new_level = 0;
01656         for (level = 0; level < depth; level++) {
01657             if ((maxCt[level] == 1) && (level != pkgLevel)) {
01658                if (level == threadLevel) {
01659                    threadLevel = -1;
01660                }
01661                else if ((threadLevel >= 0) && (level < threadLevel)) {
01662                    threadLevel--;
01663                }
01664                if (level == coreLevel) {
01665                    coreLevel = -1;
01666                }
01667                else if ((coreLevel >= 0) && (level < coreLevel)) {
01668                    coreLevel--;
01669                }
01670                if (level < pkgLevel) {
01671                    pkgLevel--;
01672                }
01673                continue;
01674             }
01675             for (proc = 0; proc < nApics; proc++) {
01676                 new_retval[proc].first.labels[new_level]
01677                   = retval[proc].first.labels[level];
01678             }
01679             new_level++;
01680         }
01681 
01682         __kmp_free(retval);
01683         retval = new_retval;
01684         depth = new_depth;
01685     }
01686 
01687     if (__kmp_affinity_gran_levels < 0) {
01688         //
01689         // Set the granularity level based on what levels are modeled
01690         // in the machine topology map.
01691         //
01692         __kmp_affinity_gran_levels = 0;
01693         if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
01694             __kmp_affinity_gran_levels++;
01695         }
01696         if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
01697             __kmp_affinity_gran_levels++;
01698         }
01699         if (__kmp_affinity_gran > affinity_gran_package) {
01700             __kmp_affinity_gran_levels++;
01701         }
01702     }
01703 
01704     if (__kmp_affinity_verbose) {
01705         __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
01706           coreLevel, threadLevel);
01707     }
01708 
01709     __kmp_free(last);
01710     __kmp_free(maxCt);
01711     __kmp_free(counts);
01712     __kmp_free(totals);
01713     KMP_CPU_FREE(oldMask);
01714     *address2os = retval;
01715     return depth;
01716 }
01717 
01718 
01719 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
01720 
01721 
01722 #define osIdIndex       0
01723 #define threadIdIndex   1
01724 #define coreIdIndex     2
01725 #define pkgIdIndex      3
01726 #define nodeIdIndex     4
01727 
01728 typedef unsigned *ProcCpuInfo;
01729 static unsigned maxIndex = pkgIdIndex;
01730 
01731 
01732 static int
01733 __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
01734 {
01735     const unsigned *aa = (const unsigned *)a;
01736     const unsigned *bb = (const unsigned *)b;
01737     if (aa[osIdIndex] < bb[osIdIndex]) return -1;
01738     if (aa[osIdIndex] > bb[osIdIndex]) return 1;
01739     return 0;
01740 };
01741 
01742 
01743 static int
01744 __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
01745 {
01746     unsigned i;
01747     const unsigned *aa = *((const unsigned **)a);
01748     const unsigned *bb = *((const unsigned **)b);
01749     for (i = maxIndex; ; i--) {
01750         if (aa[i] < bb[i]) return -1;
01751         if (aa[i] > bb[i]) return 1;
01752         if (i == osIdIndex) break;
01753     }
01754     return 0;
01755 }
01756 
01757 
01758 //
01759 // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
01760 // affinity map.
01761 //
01762 static int
01763 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
01764   kmp_i18n_id_t *const msg_id, FILE *f)
01765 {
01766     *address2os = NULL;
01767     *msg_id = kmp_i18n_null;
01768 
01769     //
01770     // Scan of the file, and count the number of "processor" (osId) fields,
01771     // and find the higest value of <n> for a node_<n> field.
01772     //
01773     char buf[256];
01774     unsigned num_records = 0;
01775     while (! feof(f)) {
01776         buf[sizeof(buf) - 1] = 1;
01777         if (! fgets(buf, sizeof(buf), f)) {
01778             //
01779             // Read errors presumably because of EOF
01780             //
01781             break;
01782         }
01783 
01784         char s1[] = "processor";
01785         if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
01786             num_records++;
01787             continue;
01788         }
01789 
01790         //
01791         // FIXME - this will match "node_<n> <garbage>"
01792         //
01793         int level;
01794         if (sscanf(buf, "node_%d id", &level) == 1) {
01795             if (nodeIdIndex + level >= maxIndex) {
01796                 maxIndex = nodeIdIndex + level;
01797             }
01798             continue;
01799         }
01800     }
01801 
01802     //
01803     // Check for empty file / no valid processor records, or too many.
01804     // The number of records can't exceed the number of valid bits in the
01805     // affinity mask.
01806     //
01807     if (num_records == 0) {
01808         *line = 0;
01809         *msg_id = kmp_i18n_str_NoProcRecords;
01810         return -1;
01811     }
01812     if (num_records > __kmp_xproc) {
01813         *line = 0;
01814         *msg_id = kmp_i18n_str_TooManyProcRecords;
01815         return -1;
01816     }
01817 
01818     //
01819     // Set the file pointer back to the begginning, so that we can scan the
01820     // file again, this time performing a full parse of the data.
01821     // Allocate a vector of ProcCpuInfo object, where we will place the data.
01822     // Adding an extra element at the end allows us to remove a lot of extra
01823     // checks for termination conditions.
01824     //
01825     if (fseek(f, 0, SEEK_SET) != 0) {
01826         *line = 0;
01827         *msg_id = kmp_i18n_str_CantRewindCpuinfo;
01828         return -1;
01829     }
01830 
01831     //
01832     // Allocate the array of records to store the proc info in.  The dummy
01833     // element at the end makes the logic in filling them out easier to code.
01834     //
01835     unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
01836       * sizeof(unsigned *));
01837     unsigned i;
01838     for (i = 0; i <= num_records; i++) {
01839         threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
01840           * sizeof(unsigned));
01841     }
01842 
01843 #define CLEANUP_THREAD_INFO \
01844     for (i = 0; i <= num_records; i++) {                                \
01845         __kmp_free(threadInfo[i]);                                      \
01846     }                                                                   \
01847     __kmp_free(threadInfo);
01848 
01849     //
01850     // A value of UINT_MAX means that we didn't find the field
01851     //
01852     unsigned __index;
01853 
01854 #define INIT_PROC_INFO(p) \
01855     for (__index = 0; __index <= maxIndex; __index++) {                 \
01856         (p)[__index] = UINT_MAX;                                        \
01857     }
01858 
01859     for (i = 0; i <= num_records; i++) {
01860         INIT_PROC_INFO(threadInfo[i]);
01861     }
01862 
01863     unsigned num_avail = 0;
01864     *line = 0;
01865     while (! feof(f)) {
01866         //
01867         // Create an inner scoping level, so that all the goto targets at the
01868         // end of the loop appear in an outer scoping level.  This avoids
01869         // warnings about jumping past an initialization to a target in the
01870         // same block.
01871         //
01872         {
01873             buf[sizeof(buf) - 1] = 1;
01874             bool long_line = false;
01875             if (! fgets(buf, sizeof(buf), f)) {
01876                 //
01877                 // Read errors presumably because of EOF
01878                 //
01879                 // If there is valid data in threadInfo[num_avail], then fake
01880                 // a blank line in ensure that the last address gets parsed.
01881                 //
01882                 bool valid = false;
01883                 for (i = 0; i <= maxIndex; i++) {
01884                     if (threadInfo[num_avail][i] != UINT_MAX) {
01885                         valid = true;
01886                     }
01887                 }
01888                 if (! valid) {
01889                     break;
01890                 }
01891                 buf[0] = 0;
01892             } else if (!buf[sizeof(buf) - 1]) {
01893                 //
01894                 // The line is longer than the buffer.  Set a flag and don't
01895                 // emit an error if we were going to ignore the line, anyway.
01896                 //
01897                 long_line = true;
01898 
01899 #define CHECK_LINE \
01900     if (long_line) {                                                    \
01901         CLEANUP_THREAD_INFO;                                            \
01902         *msg_id = kmp_i18n_str_LongLineCpuinfo;                         \
01903         return -1;                                                      \
01904     }
01905             }
01906             (*line)++;
01907 
01908             char s1[] = "processor";
01909             if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
01910                 CHECK_LINE;
01911                 char *p = strchr(buf + sizeof(s1) - 1, ':');
01912                 unsigned val;
01913                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01914                 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
01915                 threadInfo[num_avail][osIdIndex] = val;
01916                 continue;
01917             }
01918             char s2[] = "physical id";
01919             if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
01920                 CHECK_LINE;
01921                 char *p = strchr(buf + sizeof(s2) - 1, ':');
01922                 unsigned val;
01923                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01924                 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
01925                 threadInfo[num_avail][pkgIdIndex] = val;
01926                 continue;
01927             }
01928             char s3[] = "core id";
01929             if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
01930                 CHECK_LINE;
01931                 char *p = strchr(buf + sizeof(s3) - 1, ':');
01932                 unsigned val;
01933                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01934                 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
01935                 threadInfo[num_avail][coreIdIndex] = val;
01936                 continue;
01937             }
01938             char s4[] = "thread id";
01939             if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
01940                 CHECK_LINE;
01941                 char *p = strchr(buf + sizeof(s4) - 1, ':');
01942                 unsigned val;
01943                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01944                 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
01945                 threadInfo[num_avail][threadIdIndex] = val;
01946                 continue;
01947             }
01948             int level;
01949             if (sscanf(buf, "node_%d id", &level) == 1) {
01950                 CHECK_LINE;
01951                 char *p = strchr(buf + sizeof(s4) - 1, ':');
01952                 unsigned val;
01953                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01954                 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
01955                 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
01956                 threadInfo[num_avail][nodeIdIndex + level] = val;
01957                 continue;
01958             }
01959 
01960             //
01961             // We didn't recognize the leading token on the line.
01962             // There are lots of leading tokens that we don't recognize -
01963             // if the line isn't empty, go on to the next line.
01964             //
01965             if ((*buf != 0) && (*buf != '\n')) {
01966                 //
01967                 // If the line is longer than the buffer, read characters
01968                 // until we find a newline.
01969                 //
01970                 if (long_line) {
01971                     int ch;
01972                     while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
01973                 }
01974                 continue;
01975             }
01976 
01977             //
01978             // A newline has signalled the end of the processor record.
01979             // Check that there aren't too many procs specified.
01980             //
01981             if (num_avail == __kmp_xproc) {
01982                 CLEANUP_THREAD_INFO;
01983                 *msg_id = kmp_i18n_str_TooManyEntries;
01984                 return -1;
01985             }
01986 
01987             //
01988             // Check for missing fields.  The osId field must be there, and we
01989             // currently require that the physical id field is specified, also.
01990             //
01991             if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
01992                 CLEANUP_THREAD_INFO;
01993                 *msg_id = kmp_i18n_str_MissingProcField;
01994                 return -1;
01995             }
01996             if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
01997                 CLEANUP_THREAD_INFO;
01998                 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
01999                 return -1;
02000             }
02001 
02002             //
02003             // Skip this proc if it is not included in the machine model.
02004             //
02005             if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
02006                 INIT_PROC_INFO(threadInfo[num_avail]);
02007                 continue;
02008             }
02009 
02010             //
02011             // We have a successful parse of this proc's info.
02012             // Increment the counter, and prepare for the next proc.
02013             //
02014             num_avail++;
02015             KMP_ASSERT(num_avail <= num_records);
02016             INIT_PROC_INFO(threadInfo[num_avail]);
02017         }
02018         continue;
02019 
02020         no_val:
02021         CLEANUP_THREAD_INFO;
02022         *msg_id = kmp_i18n_str_MissingValCpuinfo;
02023         return -1;
02024 
02025         dup_field:
02026         CLEANUP_THREAD_INFO;
02027         *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
02028         return -1;
02029     }
02030     *line = 0;
02031 
02032 # if KMP_MIC && REDUCE_TEAM_SIZE
02033     unsigned teamSize = 0;
02034 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02035 
02036     // check for num_records == __kmp_xproc ???
02037 
02038     //
02039     // If there's only one thread context to bind to, form an Address object
02040     // with depth 1 and return immediately (or, if affinity is off, set
02041     // address2os to NULL and return).
02042     //
02043     // If it is configured to omit the package level when there is only a
02044     // single package, the logic at the end of this routine won't work if
02045     // there is only a single thread - it would try to form an Address
02046     // object with depth 0.
02047     //
02048     KMP_ASSERT(num_avail > 0);
02049     KMP_ASSERT(num_avail <= num_records);
02050     if (num_avail == 1) {
02051         __kmp_ncores = 1;
02052         __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
02053         __kmp_ht_enabled = FALSE;
02054         if (__kmp_affinity_verbose) {
02055             if (! KMP_AFFINITY_CAPABLE()) {
02056                 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
02057                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02058                 KMP_INFORM(Uniform, "KMP_AFFINITY");
02059             }
02060             else {
02061                 char buf[KMP_AFFIN_MASK_PRINT_LEN];
02062                 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
02063                   fullMask);
02064                 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
02065                 if (__kmp_affinity_respect_mask) {
02066                     KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
02067                 } else {
02068                     KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
02069                 }
02070                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02071                 KMP_INFORM(Uniform, "KMP_AFFINITY");
02072             }
02073             int index;
02074             kmp_str_buf_t buf;
02075             __kmp_str_buf_init(&buf);
02076             __kmp_str_buf_print(&buf, "1");
02077             for (index = maxIndex - 1; index > pkgIdIndex; index--) {
02078                 __kmp_str_buf_print(&buf, " x 1");
02079             }
02080             KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
02081             __kmp_str_buf_free(&buf);
02082         }
02083 
02084         if (__kmp_affinity_type == affinity_none) {
02085             CLEANUP_THREAD_INFO;
02086             return 0;
02087         }
02088 
02089         *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
02090         Address addr(1);
02091         addr.labels[0] = threadInfo[0][pkgIdIndex];
02092         (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
02093 
02094         if (__kmp_affinity_gran_levels < 0) {
02095             __kmp_affinity_gran_levels = 0;
02096         }
02097 
02098         if (__kmp_affinity_verbose) {
02099             __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
02100         }
02101 
02102         CLEANUP_THREAD_INFO;
02103         return 1;
02104     }
02105 
02106     //
02107     // Sort the threadInfo table by physical Id.
02108     //
02109     qsort(threadInfo, num_avail, sizeof(*threadInfo),
02110       __kmp_affinity_cmp_ProcCpuInfo_phys_id);
02111 
02112     //
02113     // The table is now sorted by pkgId / coreId / threadId, but we really
02114     // don't know the radix of any of the fields.  pkgId's may be sparsely
02115     // assigned among the chips on a system.  Although coreId's are usually
02116     // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
02117     // [0..threadsPerCore-1], we don't want to make any such assumptions.
02118     //
02119     // For that matter, we don't know what coresPerPkg and threadsPerCore
02120     // (or the total # packages) are at this point - we want to determine
02121     // that now.  We only have an upper bound on the first two figures.
02122     //
02123     unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
02124       * sizeof(unsigned));
02125     unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
02126       * sizeof(unsigned));
02127     unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
02128       * sizeof(unsigned));
02129     unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
02130       * sizeof(unsigned));
02131 
02132     bool assign_thread_ids = false;
02133     int threadIdCt;
02134     int index;
02135 
02136     restart_radix_check:
02137     threadIdCt = 0;
02138 
02139     //
02140     // Initialize the counter arrays with data from threadInfo[0].
02141     //
02142     if (assign_thread_ids) {
02143         if (threadInfo[0][threadIdIndex] == UINT_MAX) {
02144             threadInfo[0][threadIdIndex] = threadIdCt++;
02145         }
02146         else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
02147             threadIdCt = threadInfo[0][threadIdIndex] + 1;
02148         }
02149     }
02150     for (index = 0; index <= maxIndex; index++) {
02151         counts[index] = 1;
02152         maxCt[index] = 1;
02153         totals[index] = 1;
02154         lastId[index] = threadInfo[0][index];;
02155     }
02156 
02157     //
02158     // Run through the rest of the OS procs.
02159     //
02160     for (i = 1; i < num_avail; i++) {
02161         //
02162         // Find the most significant index whose id differs
02163         // from the id for the previous OS proc.
02164         //
02165         for (index = maxIndex; index >= threadIdIndex; index--) {
02166             if (assign_thread_ids && (index == threadIdIndex)) {
02167                 //
02168                 // Auto-assign the thread id field if it wasn't specified.
02169                 //
02170                 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
02171                     threadInfo[i][threadIdIndex] = threadIdCt++;
02172                 }
02173 
02174                 //
02175                 // Aparrently the thread id field was specified for some
02176                 // entries and not others.  Start the thread id counter
02177                 // off at the next higher thread id.
02178                 //
02179                 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
02180                     threadIdCt = threadInfo[i][threadIdIndex] + 1;
02181                 }
02182             }
02183             if (threadInfo[i][index] != lastId[index]) {
02184                 //
02185                 // Run through all indices which are less significant,
02186                 // and reset the counts to 1.
02187                 //
02188                 // At all levels up to and including index, we need to
02189                 // increment the totals and record the last id.
02190                 //
02191                 int index2;
02192                 for (index2 = threadIdIndex; index2 < index; index2++) {
02193                     totals[index2]++;
02194                     if (counts[index2] > maxCt[index2]) {
02195                         maxCt[index2] = counts[index2];
02196                     }
02197                     counts[index2] = 1;
02198                     lastId[index2] = threadInfo[i][index2];
02199                 }
02200                 counts[index]++;
02201                 totals[index]++;
02202                 lastId[index] = threadInfo[i][index];
02203 
02204                 if (assign_thread_ids && (index > threadIdIndex)) {
02205 
02206 # if KMP_MIC && REDUCE_TEAM_SIZE
02207                     //
02208                     // The default team size is the total #threads in the machine
02209                     // minus 1 thread for every core that has 3 or more threads.
02210                     //
02211                     teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
02212 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02213 
02214                     //
02215                     // Restart the thread counter, as we are on a new core.
02216                     //
02217                     threadIdCt = 0;
02218 
02219                     //
02220                     // Auto-assign the thread id field if it wasn't specified.
02221                     //
02222                     if (threadInfo[i][threadIdIndex] == UINT_MAX) {
02223                         threadInfo[i][threadIdIndex] = threadIdCt++;
02224                     }
02225 
02226                     //
02227                     // Aparrently the thread id field was specified for some
02228                     // entries and not others.  Start the thread id counter
02229                     // off at the next higher thread id.
02230                     //
02231                     else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
02232                         threadIdCt = threadInfo[i][threadIdIndex] + 1;
02233                     }
02234                 }
02235                 break;
02236             }
02237         }
02238         if (index < threadIdIndex) {
02239             //
02240             // If thread ids were specified, it is an error if they are not
02241             // unique.  Also, check that we waven't already restarted the
02242             // loop (to be safe - shouldn't need to).
02243             //
02244             if ((threadInfo[i][threadIdIndex] != UINT_MAX)
02245               || assign_thread_ids) {
02246                 __kmp_free(lastId);
02247                 __kmp_free(totals);
02248                 __kmp_free(maxCt);
02249                 __kmp_free(counts);
02250                 CLEANUP_THREAD_INFO;
02251                 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
02252                 return -1;
02253             }
02254 
02255             //
02256             // If the thread ids were not specified and we see entries
02257             // entries that are duplicates, start the loop over and
02258             // assign the thread ids manually.
02259             //
02260             assign_thread_ids = true;
02261             goto restart_radix_check;
02262         }
02263     }
02264 
02265 # if KMP_MIC && REDUCE_TEAM_SIZE
02266     //
02267     // The default team size is the total #threads in the machine
02268     // minus 1 thread for every core that has 3 or more threads.
02269     //
02270     teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
02271 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02272 
02273     for (index = threadIdIndex; index <= maxIndex; index++) {
02274         if (counts[index] > maxCt[index]) {
02275             maxCt[index] = counts[index];
02276         }
02277     }
02278 
02279     __kmp_nThreadsPerCore = maxCt[threadIdIndex];
02280     nCoresPerPkg = maxCt[coreIdIndex];
02281     nPackages = totals[pkgIdIndex];
02282 
02283     //
02284     // Check to see if the machine topology is uniform
02285     //
02286     unsigned prod = totals[maxIndex];
02287     for (index = threadIdIndex; index < maxIndex; index++) {
02288        prod *= maxCt[index];
02289     }
02290     bool uniform = (prod == totals[threadIdIndex]);
02291 
02292     //
02293     // When affinity is off, this routine will still be called to set
02294     // __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
02295     // nCoresPerPkg, & nPackages.  Make sure all these vars are set
02296     // correctly, and return now if affinity is not enabled.
02297     //
02298     __kmp_ht_enabled = (maxCt[threadIdIndex] > 1); // threads per core > 1
02299     __kmp_ncores = totals[coreIdIndex];
02300 
02301     if (__kmp_affinity_verbose) {
02302         if (! KMP_AFFINITY_CAPABLE()) {
02303                 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
02304                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02305                 if (uniform) {
02306                     KMP_INFORM(Uniform, "KMP_AFFINITY");
02307                 } else {
02308                     KMP_INFORM(NonUniform, "KMP_AFFINITY");
02309                 }
02310         }
02311         else {
02312             char buf[KMP_AFFIN_MASK_PRINT_LEN];
02313             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
02314                 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
02315                 if (__kmp_affinity_respect_mask) {
02316                     KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
02317                 } else {
02318                     KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
02319                 }
02320                 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02321                 if (uniform) {
02322                     KMP_INFORM(Uniform, "KMP_AFFINITY");
02323                 } else {
02324                     KMP_INFORM(NonUniform, "KMP_AFFINITY");
02325                 }
02326         }
02327         kmp_str_buf_t buf;
02328         __kmp_str_buf_init(&buf);
02329 
02330         __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
02331         for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
02332             __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
02333         }
02334         KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str,  maxCt[coreIdIndex],
02335           maxCt[threadIdIndex], __kmp_ncores);
02336 
02337         __kmp_str_buf_free(&buf);
02338     }
02339 
02340 # if KMP_MIC && REDUCE_TEAM_SIZE
02341     //
02342     // Set the default team size.
02343     //
02344     if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
02345         __kmp_dflt_team_nth = teamSize;
02346         KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
02347           __kmp_dflt_team_nth));
02348     }
02349 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02350 
02351     if (__kmp_affinity_type == affinity_none) {
02352         __kmp_free(lastId);
02353         __kmp_free(totals);
02354         __kmp_free(maxCt);
02355         __kmp_free(counts);
02356         CLEANUP_THREAD_INFO;
02357         return 0;
02358     }
02359 
02360     //
02361     // Count the number of levels which have more nodes at that level than
02362     // at the parent's level (with there being an implicit root node of
02363     // the top level).  This is equivalent to saying that there is at least
02364     // one node at this level which has a sibling.  These levels are in the
02365     // map, and the package level is always in the map.
02366     //
02367     bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
02368     int level = 0;
02369     for (index = threadIdIndex; index < maxIndex; index++) {
02370         KMP_ASSERT(totals[index] >= totals[index + 1]);
02371         inMap[index] = (totals[index] > totals[index + 1]);
02372     }
02373     inMap[maxIndex] = (totals[maxIndex] > 1);
02374     inMap[pkgIdIndex] = true;
02375 
02376     int depth = 0;
02377     for (index = threadIdIndex; index <= maxIndex; index++) {
02378         if (inMap[index]) {
02379             depth++;
02380         }
02381     }
02382     KMP_ASSERT(depth > 0);
02383 
02384     //
02385     // Construct the data structure that is to be returned.
02386     //
02387     *address2os = (AddrUnsPair*)
02388       __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
02389     int pkgLevel = -1;
02390     int coreLevel = -1;
02391     int threadLevel = -1;
02392 
02393     for (i = 0; i < num_avail; ++i) {
02394         Address addr(depth);
02395         unsigned os = threadInfo[i][osIdIndex];
02396         int src_index;
02397         int dst_index = 0;
02398 
02399         for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
02400             if (! inMap[src_index]) {
02401                 continue;
02402             }
02403             addr.labels[dst_index] = threadInfo[i][src_index];
02404             if (src_index == pkgIdIndex) {
02405                 pkgLevel = dst_index;
02406             }
02407             else if (src_index == coreIdIndex) {
02408                 coreLevel = dst_index;
02409             }
02410             else if (src_index == threadIdIndex) {
02411                 threadLevel = dst_index;
02412             }
02413             dst_index++;
02414         }
02415         (*address2os)[i] = AddrUnsPair(addr, os);
02416     }
02417 
02418     if (__kmp_affinity_gran_levels < 0) {
02419         //
02420         // Set the granularity level based on what levels are modeled
02421         // in the machine topology map.
02422         //
02423         int src_index;
02424         __kmp_affinity_gran_levels = 0;
02425         for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
02426             if (! inMap[src_index]) {
02427                 continue;
02428             }
02429             switch (src_index) {
02430                 case threadIdIndex:
02431                 if (__kmp_affinity_gran > affinity_gran_thread) {
02432                     __kmp_affinity_gran_levels++;
02433                 }
02434 
02435                 break;
02436                 case coreIdIndex:
02437                 if (__kmp_affinity_gran > affinity_gran_core) {
02438                     __kmp_affinity_gran_levels++;
02439                 }
02440                 break;
02441 
02442                 case pkgIdIndex:
02443                 if (__kmp_affinity_gran > affinity_gran_package) {
02444                     __kmp_affinity_gran_levels++;
02445                 }
02446                 break;
02447             }
02448         }
02449     }
02450 
02451     if (__kmp_affinity_verbose) {
02452         __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
02453           coreLevel, threadLevel);
02454     }
02455 
02456     __kmp_free(inMap);
02457     __kmp_free(lastId);
02458     __kmp_free(totals);
02459     __kmp_free(maxCt);
02460     __kmp_free(counts);
02461     CLEANUP_THREAD_INFO;
02462     return depth;
02463 }
02464 
02465 
02466 //
02467 // Create and return a table of affinity masks, indexed by OS thread ID.
02468 // This routine handles OR'ing together all the affinity masks of threads
02469 // that are sufficiently close, if granularity > fine.
02470 //
02471 static kmp_affin_mask_t *
02472 __kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
02473   AddrUnsPair *address2os, unsigned numAddrs)
02474 {
02475     //
02476     // First form a table of affinity masks in order of OS thread id.
02477     //
02478     unsigned depth;
02479     unsigned maxOsId;
02480     unsigned i;
02481 
02482     KMP_ASSERT(numAddrs > 0);
02483     depth = address2os[0].first.depth;
02484 
02485     maxOsId = 0;
02486     for (i = 0; i < numAddrs; i++) {
02487         unsigned osId = address2os[i].second;
02488         if (osId > maxOsId) {
02489             maxOsId = osId;
02490         }
02491     }
02492     kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
02493       (maxOsId + 1) * __kmp_affin_mask_size);
02494 
02495     //
02496     // Sort the address2os table according to physical order.  Doing so
02497     // will put all threads on the same core/package/node in consecutive
02498     // locations.
02499     //
02500     qsort(address2os, numAddrs, sizeof(*address2os),
02501       __kmp_affinity_cmp_Address_labels);
02502 
02503     KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
02504     if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
02505         KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY",  __kmp_affinity_gran_levels);
02506     }
02507     if (__kmp_affinity_gran_levels >= depth) {
02508         if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02509           && (__kmp_affinity_type != affinity_none))) {
02510             KMP_WARNING(AffThreadsMayMigrate);
02511         }
02512     }
02513 
02514     //
02515     // Run through the table, forming the masks for all threads on each
02516     // core.  Threads on the same core will have identical "Address"
02517     // objects, not considering the last level, which must be the thread
02518     // id.  All threads on a core will appear consecutively.
02519     //
02520     unsigned unique = 0;
02521     unsigned j = 0;                             // index of 1st thread on core
02522     unsigned leader = 0;
02523     Address *leaderAddr = &(address2os[0].first);
02524     kmp_affin_mask_t *sum
02525       = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
02526     KMP_CPU_ZERO(sum);
02527     KMP_CPU_SET(address2os[0].second, sum);
02528     for (i = 1; i < numAddrs; i++) {
02529         //
02530         // If this thread is sufficiently close to the leader (withing the
02531         // granularity setting), then set the bit for this os thread in the
02532         // affinity mask for this group, and go on to the next thread.
02533         //
02534         if (leaderAddr->isClose(address2os[i].first,
02535           __kmp_affinity_gran_levels)) {
02536             KMP_CPU_SET(address2os[i].second, sum);
02537             continue;
02538         }
02539 
02540         //
02541         // For every thread in this group, copy the mask to the thread's
02542         // entry in the osId2Mask table.  Mark the first address as a
02543         // leader.
02544         //
02545         for (; j < i; j++) {
02546             unsigned osId = address2os[j].second;
02547             KMP_DEBUG_ASSERT(osId <= maxOsId);
02548             kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
02549             KMP_CPU_COPY(mask, sum);
02550             address2os[j].first.leader = (j == leader);
02551         }
02552         unique++;
02553 
02554         //
02555         // Start a new mask.
02556         //
02557         leader = i;
02558         leaderAddr = &(address2os[i].first);
02559         KMP_CPU_ZERO(sum);
02560         KMP_CPU_SET(address2os[i].second, sum);
02561     }
02562 
02563     //
02564     // For every thread in last group, copy the mask to the thread's
02565     // entry in the osId2Mask table.
02566     //
02567     for (; j < i; j++) {
02568         unsigned osId = address2os[j].second;
02569         KMP_DEBUG_ASSERT(osId <= maxOsId);
02570         kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
02571         KMP_CPU_COPY(mask, sum);
02572         address2os[j].first.leader = (j == leader);
02573     }
02574     unique++;
02575 
02576     *maxIndex = maxOsId;
02577     *numUnique = unique;
02578     return osId2Mask;
02579 }
02580 
02581 
02582 //
02583 // Stuff for the affinity proclist parsers.  It's easier to declare these vars
02584 // as file-static than to try and pass them through the calling sequence of
02585 // the recursive-descent OMP_PLACES parser.
02586 //
02587 static kmp_affin_mask_t *newMasks;
02588 static int numNewMasks;
02589 static int nextNewMask;
02590 
02591 #define ADD_MASK(_mask) \
02592     {                                                                   \
02593         if (nextNewMask >= numNewMasks) {                               \
02594             numNewMasks *= 2;                                           \
02595             newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
02596               numNewMasks * __kmp_affin_mask_size);                     \
02597         }                                                               \
02598         KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask));    \
02599         nextNewMask++;                                                  \
02600     }
02601 
02602 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
02603     {                                                                   \
02604         if (((_osId) > _maxOsId) ||                                     \
02605           (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX(_osId2Mask, (_osId))))) {\
02606             if (__kmp_affinity_verbose || (__kmp_affinity_warnings      \
02607               && (__kmp_affinity_type != affinity_none))) {             \
02608                 KMP_WARNING(AffIgnoreInvalidProcID, _osId);             \
02609             }                                                           \
02610         }                                                               \
02611         else {                                                          \
02612             ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId)));               \
02613         }                                                               \
02614     }
02615 
02616 
02617 //
02618 // Re-parse the proclist (for the explicit affinity type), and form the list
02619 // of affinity newMasks indexed by gtid.
02620 //
02621 static void
02622 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
02623   unsigned int *out_numMasks, const char *proclist,
02624   kmp_affin_mask_t *osId2Mask, int maxOsId)
02625 {
02626     const char *scan = proclist;
02627     const char *next = proclist;
02628 
02629     //
02630     // We use malloc() for the temporary mask vector,
02631     // so that we can use realloc() to extend it.
02632     //
02633     numNewMasks = 2;
02634     newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
02635       * __kmp_affin_mask_size);
02636     nextNewMask = 0;
02637     kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
02638       __kmp_affin_mask_size);
02639     int setSize = 0;
02640 
02641     for (;;) {
02642         int start, end, stride;
02643 
02644         SKIP_WS(scan);
02645         next = scan;
02646         if (*next == '\0') {
02647             break;
02648         }
02649 
02650         if (*next == '{') {
02651             int num;
02652             setSize = 0;
02653             next++;     // skip '{'
02654             SKIP_WS(next);
02655             scan = next;
02656 
02657             //
02658             // Read the first integer in the set.
02659             //
02660             KMP_ASSERT2((*next >= '0') && (*next <= '9'),
02661               "bad proclist");
02662             SKIP_DIGITS(next);
02663             num = __kmp_str_to_int(scan, *next);
02664             KMP_ASSERT2(num >= 0, "bad explicit proc list");
02665 
02666             //
02667             // Copy the mask for that osId to the sum (union) mask.
02668             //
02669             if ((num > maxOsId) ||
02670               (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
02671                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02672                   && (__kmp_affinity_type != affinity_none))) {
02673                     KMP_WARNING(AffIgnoreInvalidProcID, num);
02674                 }
02675                 KMP_CPU_ZERO(sumMask);
02676             }
02677             else {
02678                 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
02679                 setSize = 1;
02680             }
02681 
02682             for (;;) {
02683                 //
02684                 // Check for end of set.
02685                 //
02686                 SKIP_WS(next);
02687                 if (*next == '}') {
02688                     next++;     // skip '}'
02689                     break;
02690                 }
02691 
02692                 //
02693                 // Skip optional comma.
02694                 //
02695                 if (*next == ',') {
02696                     next++;
02697                 }
02698                 SKIP_WS(next);
02699 
02700                 //
02701                 // Read the next integer in the set.
02702                 //
02703                 scan = next;
02704                 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
02705                   "bad explicit proc list");
02706 
02707                 SKIP_DIGITS(next);
02708                 num = __kmp_str_to_int(scan, *next);
02709                 KMP_ASSERT2(num >= 0, "bad explicit proc list");
02710 
02711                 //
02712                 // Add the mask for that osId to the sum mask.
02713                 //
02714                 if ((num > maxOsId) ||
02715                   (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
02716                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02717                       && (__kmp_affinity_type != affinity_none))) {
02718                         KMP_WARNING(AffIgnoreInvalidProcID, num);
02719                     }
02720                 }
02721                 else {
02722                     KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
02723                     setSize++;
02724                 }
02725             }
02726             if (setSize > 0) {
02727                 ADD_MASK(sumMask);
02728             }
02729 
02730             SKIP_WS(next);
02731             if (*next == ',') {
02732                 next++;
02733             }
02734             scan = next;
02735             continue;
02736         }
02737 
02738         //
02739         // Read the first integer.
02740         //
02741         KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
02742         SKIP_DIGITS(next);
02743         start = __kmp_str_to_int(scan, *next);
02744         KMP_ASSERT2(start >= 0, "bad explicit proc list");
02745         SKIP_WS(next);
02746 
02747         //
02748         // If this isn't a range, then add a mask to the list and go on.
02749         //
02750         if (*next != '-') {
02751             ADD_MASK_OSID(start, osId2Mask, maxOsId);
02752 
02753             //
02754             // Skip optional comma.
02755             //
02756             if (*next == ',') {
02757                 next++;
02758             }
02759             scan = next;
02760             continue;
02761         }
02762 
02763         //
02764         // This is a range.  Skip over the '-' and read in the 2nd int.
02765         //
02766         next++;         // skip '-'
02767         SKIP_WS(next);
02768         scan = next;
02769         KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
02770         SKIP_DIGITS(next);
02771         end = __kmp_str_to_int(scan, *next);
02772         KMP_ASSERT2(end >= 0, "bad explicit proc list");
02773 
02774         //
02775         // Check for a stride parameter
02776         //
02777         stride = 1;
02778         SKIP_WS(next);
02779         if (*next == ':') {
02780             //
02781             // A stride is specified.  Skip over the ':" and read the 3rd int.
02782             //
02783             int sign = +1;
02784             next++;         // skip ':'
02785             SKIP_WS(next);
02786             scan = next;
02787             if (*next == '-') {
02788                 sign = -1;
02789                 next++;
02790                 SKIP_WS(next);
02791                 scan = next;
02792             }
02793             KMP_ASSERT2((*next >=  '0') && (*next <= '9'),
02794               "bad explicit proc list");
02795             SKIP_DIGITS(next);
02796             stride = __kmp_str_to_int(scan, *next);
02797             KMP_ASSERT2(stride >= 0, "bad explicit proc list");
02798             stride *= sign;
02799         }
02800 
02801         //
02802         // Do some range checks.
02803         //
02804         KMP_ASSERT2(stride != 0, "bad explicit proc list");
02805         if (stride > 0) {
02806             KMP_ASSERT2(start <= end, "bad explicit proc list");
02807         }
02808         else {
02809             KMP_ASSERT2(start >= end, "bad explicit proc list");
02810         }
02811         KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
02812 
02813         //
02814         // Add the mask for each OS proc # to the list.
02815         //
02816         if (stride > 0) {
02817             do {
02818                 ADD_MASK_OSID(start, osId2Mask, maxOsId);
02819                 start += stride;
02820             } while (start <= end);
02821         }
02822         else {
02823             do {
02824                 ADD_MASK_OSID(start, osId2Mask, maxOsId);
02825                 start += stride;
02826             } while (start >= end);
02827         }
02828 
02829         //
02830         // Skip optional comma.
02831         //
02832         SKIP_WS(next);
02833         if (*next == ',') {
02834             next++;
02835         }
02836         scan = next;
02837     }
02838 
02839     *out_numMasks = nextNewMask;
02840     if (nextNewMask == 0) {
02841         *out_masks = NULL;
02842         KMP_INTERNAL_FREE(newMasks);
02843         return;
02844     }
02845     *out_masks
02846       = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
02847     memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
02848     __kmp_free(sumMask);
02849     KMP_INTERNAL_FREE(newMasks);
02850 }
02851 
02852 
02853 # if OMP_40_ENABLED
02854 
02855 /*-----------------------------------------------------------------------------
02856 
02857 Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
02858 places.  Again, Here is the grammar:
02859 
02860 place_list := place
02861 place_list := place , place_list
02862 place := num
02863 place := place : num
02864 place := place : num : signed
02865 place := { subplacelist }
02866 place := ! place                  // (lowest priority)
02867 subplace_list := subplace
02868 subplace_list := subplace , subplace_list
02869 subplace := num
02870 subplace := num : num
02871 subplace := num : num : signed
02872 signed := num
02873 signed := + signed
02874 signed := - signed
02875 
02876 -----------------------------------------------------------------------------*/
02877 
02878 static void
02879 __kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
02880   int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
02881 {
02882     const char *next;
02883 
02884     for (;;) {
02885         int start, count, stride, i;
02886 
02887         //
02888         // Read in the starting proc id
02889         //
02890         SKIP_WS(*scan);
02891         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
02892           "bad explicit places list");
02893         next = *scan;
02894         SKIP_DIGITS(next);
02895         start = __kmp_str_to_int(*scan, *next);
02896         KMP_ASSERT(start >= 0);
02897         *scan = next;
02898 
02899         //
02900         // valid follow sets are ',' ':' and '}'
02901         //
02902         SKIP_WS(*scan);
02903         if (**scan == '}' || **scan == ',') {
02904             if ((start > maxOsId) ||
02905               (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
02906                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02907                   && (__kmp_affinity_type != affinity_none))) {
02908                     KMP_WARNING(AffIgnoreInvalidProcID, start);
02909                 }
02910             }
02911             else {
02912                 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
02913                 (*setSize)++;
02914             }
02915             if (**scan == '}') {
02916                 break;
02917             }
02918             (*scan)++;  // skip ','
02919             continue;
02920         }
02921         KMP_ASSERT2(**scan == ':', "bad explicit places list");
02922         (*scan)++;      // skip ':'
02923 
02924         //
02925         // Read count parameter
02926         //
02927         SKIP_WS(*scan);
02928         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
02929           "bad explicit places list");
02930         next = *scan;
02931         SKIP_DIGITS(next);
02932         count = __kmp_str_to_int(*scan, *next);
02933         KMP_ASSERT(count >= 0);
02934         *scan = next;
02935 
02936         //
02937         // valid follow sets are ',' ':' and '}'
02938         //
02939         SKIP_WS(*scan);
02940         if (**scan == '}' || **scan == ',') {
02941             for (i = 0; i < count; i++) {
02942                 if ((start > maxOsId) ||
02943                   (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
02944                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02945                       && (__kmp_affinity_type != affinity_none))) {
02946                         KMP_WARNING(AffIgnoreInvalidProcID, start);
02947                     }
02948                     break;  // don't proliferate warnings for large count
02949                 }
02950                 else {
02951                     KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
02952                     start++;
02953                     (*setSize)++;
02954                 }
02955             }
02956             if (**scan == '}') {
02957                 break;
02958             }
02959             (*scan)++;  // skip ','
02960             continue;
02961         }
02962         KMP_ASSERT2(**scan == ':', "bad explicit places list");
02963         (*scan)++;      // skip ':'
02964 
02965         //
02966         // Read stride parameter
02967         //
02968         int sign = +1;
02969         for (;;) {
02970             SKIP_WS(*scan);
02971             if (**scan == '+') {
02972                 (*scan)++; // skip '+'
02973                 continue;
02974             }
02975             if (**scan == '-') {
02976                 sign *= -1;
02977                 (*scan)++; // skip '-'
02978                 continue;
02979             }
02980             break;
02981         }
02982         SKIP_WS(*scan);
02983         KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
02984           "bad explicit places list");
02985         next = *scan;
02986         SKIP_DIGITS(next);
02987         stride = __kmp_str_to_int(*scan, *next);
02988         KMP_ASSERT(stride >= 0);
02989         *scan = next;
02990         stride *= sign;
02991 
02992         //
02993         // valid follow sets are ',' and '}'
02994         //
02995         SKIP_WS(*scan);
02996         if (**scan == '}' || **scan == ',') {
02997             for (i = 0; i < count; i++) {
02998                 if ((start > maxOsId) ||
02999                   (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
03000                     if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03001                       && (__kmp_affinity_type != affinity_none))) {
03002                         KMP_WARNING(AffIgnoreInvalidProcID, start);
03003                     }
03004                     break;  // don't proliferate warnings for large count
03005                 }
03006                 else {
03007                     KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
03008                     start += stride;
03009                     (*setSize)++;
03010                 }
03011             }
03012             if (**scan == '}') {
03013                 break;
03014             }
03015             (*scan)++;  // skip ','
03016             continue;
03017         }
03018 
03019         KMP_ASSERT2(0, "bad explicit places list");
03020     }
03021 }
03022 
03023 
03024 static void
03025 __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
03026   int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
03027 {
03028     const char *next;
03029 
03030     //
03031     // valid follow sets are '{' '!' and num
03032     //
03033     SKIP_WS(*scan);
03034     if (**scan == '{') {
03035         (*scan)++;      // skip '{'
03036         __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
03037           setSize);
03038         KMP_ASSERT2(**scan == '}', "bad explicit places list");
03039         (*scan)++;      // skip '}'
03040     }
03041     else if (**scan == '!') {
03042         __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
03043         KMP_CPU_COMPLEMENT(tempMask);
03044         (*scan)++;      // skip '!'
03045     }
03046     else if ((**scan >= '0') && (**scan <= '9')) {
03047         next = *scan;
03048         SKIP_DIGITS(next);
03049         int num = __kmp_str_to_int(*scan, *next);
03050         KMP_ASSERT(num >= 0);
03051         if ((num > maxOsId) ||
03052           (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
03053             if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03054               && (__kmp_affinity_type != affinity_none))) {
03055                 KMP_WARNING(AffIgnoreInvalidProcID, num);
03056             }
03057         }
03058         else {
03059             KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
03060             (*setSize)++;
03061         }
03062         *scan = next;  // skip num
03063         }
03064     else {
03065         KMP_ASSERT2(0, "bad explicit places list");
03066     }
03067 }
03068 
03069 
03070 //static void
03071 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
03072   unsigned int *out_numMasks, const char *placelist,
03073   kmp_affin_mask_t *osId2Mask, int maxOsId)
03074 {
03075     const char *scan = placelist;
03076     const char *next = placelist;
03077 
03078     numNewMasks = 2;
03079     newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
03080       * __kmp_affin_mask_size);
03081     nextNewMask = 0;
03082 
03083     kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
03084       __kmp_affin_mask_size);
03085     KMP_CPU_ZERO(tempMask);
03086     int setSize = 0;
03087 
03088     for (;;) {
03089         int start, count, stride;
03090 
03091         __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
03092 
03093         //
03094         // valid follow sets are ',' ':' and EOL
03095         //
03096         SKIP_WS(scan);
03097         if (*scan == '\0' || *scan == ',') {
03098             if (setSize > 0) {
03099                 ADD_MASK(tempMask);
03100             }
03101             KMP_CPU_ZERO(tempMask);
03102             setSize = 0;
03103             if (*scan == '\0') {
03104                 break;
03105             }
03106             scan++;     // skip ','
03107             continue;
03108         }
03109 
03110         KMP_ASSERT2(*scan == ':', "bad explicit places list");
03111         scan++;         // skip ':'
03112 
03113         //
03114         // Read count parameter
03115         //
03116         SKIP_WS(scan);
03117         KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
03118           "bad explicit places list");
03119         next = scan;
03120         SKIP_DIGITS(next);
03121         count = __kmp_str_to_int(scan, *next);
03122         KMP_ASSERT(count >= 0);
03123         scan = next;
03124 
03125         //
03126         // valid follow sets are ',' ':' and EOL
03127         //
03128         SKIP_WS(scan);
03129         if (*scan == '\0' || *scan == ',') {
03130             int i;
03131             for (i = 0; i < count; i++) {
03132                 int j;
03133                 if (setSize == 0) {
03134                     break;
03135                 }
03136                 ADD_MASK(tempMask);
03137                 setSize = 0;
03138                 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j > 0; j--) {
03139                     //
03140                     // Use a temp var in case macro is changed to evaluate
03141                     // args multiple times.
03142                     //
03143                     if (KMP_CPU_ISSET(j - stride, tempMask)) {
03144                         KMP_CPU_SET(j, tempMask);
03145                         setSize++;
03146                     }
03147                     else {
03148                         KMP_CPU_CLR(j, tempMask);
03149                     }
03150                 }
03151                 for (; j >= 0; j--) {
03152                     KMP_CPU_CLR(j, tempMask);
03153                 }
03154             }
03155             KMP_CPU_ZERO(tempMask);
03156             setSize = 0;
03157 
03158             if (*scan == '\0') {
03159                 break;
03160             }
03161             scan++;     // skip ','
03162             continue;
03163         }
03164 
03165         KMP_ASSERT2(*scan == ':', "bad explicit places list");
03166         scan++;         // skip ':'
03167 
03168         //
03169         // Read stride parameter
03170         //
03171         int sign = +1;
03172         for (;;) {
03173             SKIP_WS(scan);
03174             if (*scan == '+') {
03175                 scan++; // skip '+'
03176                 continue;
03177             }
03178             if (*scan == '-') {
03179                 sign *= -1;
03180                 scan++; // skip '-'
03181                 continue;
03182             }
03183             break;
03184         }
03185         SKIP_WS(scan);
03186         KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
03187           "bad explicit places list");
03188         next = scan;
03189         SKIP_DIGITS(next);
03190         stride = __kmp_str_to_int(scan, *next);
03191         KMP_DEBUG_ASSERT(stride >= 0);
03192         scan = next;
03193         stride *= sign;
03194 
03195         if (stride > 0) {
03196             int i;
03197             for (i = 0; i < count; i++) {
03198                 int j;
03199                 if (setSize == 0) {
03200                     break;
03201                 }
03202                 ADD_MASK(tempMask);
03203                 setSize = 0;
03204                 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
03205                     if (KMP_CPU_ISSET(j - stride, tempMask)) {
03206                         KMP_CPU_SET(j, tempMask);
03207                         setSize++;
03208                     }
03209                     else {
03210                         KMP_CPU_CLR(j, tempMask);
03211                     }
03212                 }
03213                 for (; j >= 0; j--) {
03214                     KMP_CPU_CLR(j, tempMask);
03215                 }
03216             }
03217         }
03218         else {
03219             int i;
03220             for (i = 0; i < count; i++) {
03221                 int j;
03222                 if (setSize == 0) {
03223                     break;
03224                 }
03225                 ADD_MASK(tempMask);
03226                 setSize = 0;
03227                 for (j = 0; j < (__kmp_affin_mask_size * CHAR_BIT) + stride;
03228                   j++) {
03229                     if (KMP_CPU_ISSET(j - stride, tempMask)) {
03230                         KMP_CPU_SET(j, tempMask);
03231                         setSize++;
03232                     }
03233                     else {
03234                         KMP_CPU_CLR(j, tempMask);
03235                     }
03236                 }
03237                 for (; j < __kmp_affin_mask_size * CHAR_BIT; j++) {
03238                     KMP_CPU_CLR(j, tempMask);
03239                 }
03240             }
03241         }
03242         KMP_CPU_ZERO(tempMask);
03243         setSize = 0;
03244 
03245         //
03246         // valid follow sets are ',' and EOL
03247         //
03248         SKIP_WS(scan);
03249         if (*scan == '\0') {
03250             break;
03251         }
03252         if (*scan == ',') {
03253             scan++;     // skip ','
03254             continue;
03255         }
03256 
03257         KMP_ASSERT2(0, "bad explicit places list");
03258     }
03259 
03260     *out_numMasks = nextNewMask;
03261     if (nextNewMask == 0) {
03262         *out_masks = NULL;
03263         KMP_INTERNAL_FREE(newMasks);
03264         return;
03265     }
03266     *out_masks
03267       = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
03268     memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
03269     __kmp_free(tempMask);
03270     KMP_INTERNAL_FREE(newMasks);
03271 }
03272 
03273 
03274 # endif /* OMP_40_ENABLED */
03275 
03276 # undef ADD_MASK
03277 # undef ADD_MASK_OSID
03278 
03279 
03280 # if KMP_MIC
03281 
03282 static void
03283 __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
03284 {
03285     if ( __kmp_place_num_cores == 0 ) {
03286         if ( __kmp_place_num_threads_per_core == 0 ) {
03287             return;   // no cores limiting actions requested, exit
03288         }
03289         __kmp_place_num_cores = nCoresPerPkg;   // use all available cores
03290     }
03291     if ( !__kmp_affinity_uniform_topology() || depth != 3 ) {
03292         KMP_WARNING( AffThrPlaceUnsupported );
03293         return; // don't support non-uniform topology or not-3-level architecture
03294     }
03295     if ( __kmp_place_num_threads_per_core == 0 ) {
03296         __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;  // use all HW contexts
03297     }
03298     if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
03299         KMP_WARNING( AffThrPlaceManyCores );
03300         return;
03301     }
03302 
03303     AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
03304                             nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
03305     int i, j, k, n_old = 0, n_new = 0;
03306     for ( i = 0; i < nPackages; ++i ) {
03307         for ( j = 0; j < nCoresPerPkg; ++j ) {
03308             if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
03309                 n_old += __kmp_nThreadsPerCore;   // skip not-requested core
03310             } else {
03311                 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
03312                     if ( k < __kmp_place_num_threads_per_core ) {
03313                         newAddr[n_new] = (*pAddr)[n_old];   // copy requested core' data to new location
03314                         n_new++;
03315                     }
03316                     n_old++;
03317                 }
03318             }
03319         }
03320     }
03321     nCoresPerPkg = __kmp_place_num_cores;                     // correct nCoresPerPkg
03322     __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
03323     __kmp_avail_proc = n_new;                                 // correct avail_proc
03324     __kmp_ncores = nPackages * __kmp_place_num_cores;         // correct ncores
03325 
03326     __kmp_free( *pAddr );
03327     *pAddr = newAddr;      // replace old topology with new one
03328 }
03329 # endif
03330 
03331 static AddrUnsPair *address2os = NULL;
03332 static int           * procarr = NULL;
03333 static int     __kmp_aff_depth = 0;
03334 
03335 static void
03336 __kmp_aux_affinity_initialize(void)
03337 {
03338     if (__kmp_affinity_masks != NULL) {
03339         KMP_ASSERT(fullMask != NULL);
03340         return;
03341     }
03342 
03343     //
03344     // Create the "full" mask - this defines all of the processors that we
03345     // consider to be in the machine model.  If respect is set, then it is
03346     // the initialization thread's affinity mask.  Otherwise, it is all
03347     // processors that we know about on the machine.
03348     //
03349     if (fullMask == NULL) {
03350         fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
03351     }
03352     if (KMP_AFFINITY_CAPABLE()) {
03353         if (__kmp_affinity_respect_mask) {
03354             __kmp_get_system_affinity(fullMask, TRUE);
03355 
03356             //
03357             // Count the number of available processors.
03358             //
03359             unsigned i;
03360             __kmp_avail_proc = 0;
03361             for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
03362                 if (! KMP_CPU_ISSET(i, fullMask)) {
03363                     continue;
03364                 }
03365                 __kmp_avail_proc++;
03366             }
03367             if (__kmp_avail_proc > __kmp_xproc) {
03368                 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03369                   && (__kmp_affinity_type != affinity_none))) {
03370                     KMP_WARNING(ErrorInitializeAffinity);
03371                 }
03372                 __kmp_affinity_type = affinity_none;
03373                 __kmp_affin_mask_size = 0;
03374                 return;
03375             }
03376         }
03377         else {
03378             __kmp_affinity_entire_machine_mask(fullMask);
03379             __kmp_avail_proc = __kmp_xproc;
03380         }
03381     }
03382 
03383     int depth = -1;
03384     kmp_i18n_id_t msg_id = kmp_i18n_null;
03385 
03386     //
03387     // For backward compatiblity, setting KMP_CPUINFO_FILE =>
03388     // KMP_TOPOLOGY_METHOD=cpuinfo
03389     //
03390     if ((__kmp_cpuinfo_file != NULL) &&
03391       (__kmp_affinity_top_method == affinity_top_method_all)) {
03392         __kmp_affinity_top_method = affinity_top_method_cpuinfo;
03393     }
03394 
03395     if (__kmp_affinity_top_method == affinity_top_method_all) {
03396         //
03397         // In the default code path, errors are not fatal - we just try using
03398         // another method.  We only emit a warning message if affinity is on,
03399         // or the verbose flag is set, an the nowarnings flag was not set.
03400         //
03401         const char *file_name = NULL;
03402         int line = 0;
03403 
03404 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
03405 
03406         if (__kmp_affinity_verbose) {
03407             KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
03408         }
03409 
03410         file_name = NULL;
03411         depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
03412         if (depth == 0) {
03413             KMP_ASSERT(__kmp_affinity_type == affinity_none);
03414             KMP_ASSERT(address2os == NULL);
03415             return;
03416         }
03417 
03418         if (depth < 0) {
03419             if ((msg_id != kmp_i18n_null)
03420               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03421               && (__kmp_affinity_type != affinity_none)))) {
03422 #  if KMP_MIC
03423                 if (__kmp_affinity_verbose) {
03424                     KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
03425                       KMP_I18N_STR(DecodingLegacyAPIC));
03426                 }
03427 #  else
03428                 KMP_WARNING(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
03429                   KMP_I18N_STR(DecodingLegacyAPIC));
03430 #  endif
03431             }
03432 
03433             file_name = NULL;
03434             depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
03435             if (depth == 0) {
03436                 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03437                 KMP_ASSERT(address2os == NULL);
03438                 return;
03439             }
03440         }
03441 
03442 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
03443 
03444 # if KMP_OS_LINUX
03445 
03446         if (depth < 0) {
03447             if ((msg_id != kmp_i18n_null)
03448               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03449               && (__kmp_affinity_type != affinity_none)))) {
03450 #   if KMP_MIC
03451                 if (__kmp_affinity_verbose) {
03452                     KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
03453                 }
03454 #   else
03455                 KMP_WARNING(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
03456 #   endif
03457             }
03458             else if (__kmp_affinity_verbose) {
03459                 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
03460             }
03461 
03462             FILE *f = fopen("/proc/cpuinfo", "r");
03463             if (f == NULL) {
03464                 msg_id = kmp_i18n_str_CantOpenCpuinfo;
03465             }
03466             else {
03467                 file_name = "/proc/cpuinfo";
03468                 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
03469                 fclose(f);
03470                 if (depth == 0) {
03471                     KMP_ASSERT(__kmp_affinity_type == affinity_none);
03472                     KMP_ASSERT(address2os == NULL);
03473                     return;
03474                 }
03475             }
03476         }
03477 
03478 # endif /* KMP_OS_LINUX */
03479 
03480         if (depth < 0) {
03481             if (msg_id != kmp_i18n_null
03482               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03483               && (__kmp_affinity_type != affinity_none)))) {
03484                 if (file_name == NULL) {
03485                     KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
03486                 }
03487                 else if (line == 0) {
03488                     KMP_WARNING(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
03489                 }
03490                 else {
03491                     KMP_WARNING(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
03492                 }
03493             }
03494 
03495             file_name = "";
03496             depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
03497             if (depth == 0) {
03498                 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03499                 KMP_ASSERT(address2os == NULL);
03500                 return;
03501             }
03502             KMP_ASSERT(depth > 0);
03503             KMP_ASSERT(address2os != NULL);
03504         }
03505     }
03506 
03507     //
03508     // If the user has specified that a paricular topology discovery method
03509     // is to be used, then we abort if that method fails.  The exception is
03510     // group affinity, which might have been implicitly set.
03511     //
03512 
03513 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
03514 
03515     else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
03516         if (__kmp_affinity_verbose) {
03517             KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
03518               KMP_I18N_STR(Decodingx2APIC));
03519         }
03520 
03521         depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
03522         if (depth == 0) {
03523             KMP_ASSERT(__kmp_affinity_type == affinity_none);
03524             KMP_ASSERT(address2os == NULL);
03525             return;
03526         }
03527 
03528         if (depth < 0) {
03529             KMP_ASSERT(msg_id != kmp_i18n_null);
03530             KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
03531         }
03532     }
03533     else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
03534         if (__kmp_affinity_verbose) {
03535             KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
03536               KMP_I18N_STR(DecodingLegacyAPIC));
03537         }
03538 
03539         depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
03540         if (depth == 0) {
03541             KMP_ASSERT(__kmp_affinity_type == affinity_none);
03542             KMP_ASSERT(address2os == NULL);
03543             return;
03544         }
03545 
03546         if (depth < 0) {
03547             KMP_ASSERT(msg_id != kmp_i18n_null);
03548             KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
03549         }
03550     }
03551 
03552 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
03553 
03554     else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
03555         const char *filename;
03556         if (__kmp_cpuinfo_file != NULL) {
03557             filename = __kmp_cpuinfo_file;
03558         }
03559         else {
03560             filename = "/proc/cpuinfo";
03561         }
03562 
03563         if (__kmp_affinity_verbose) {
03564             KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
03565         }
03566 
03567         FILE *f = fopen(filename, "r");
03568         if (f == NULL) {
03569             int code = errno;
03570             if (__kmp_cpuinfo_file != NULL) {
03571                 __kmp_msg(
03572                     kmp_ms_fatal,
03573                     KMP_MSG(CantOpenFileForReading, filename),
03574                     KMP_ERR(code),
03575                     KMP_HNT(NameComesFrom_CPUINFO_FILE),
03576                     __kmp_msg_null
03577                 );
03578             }
03579             else {
03580                 __kmp_msg(
03581                     kmp_ms_fatal,
03582                     KMP_MSG(CantOpenFileForReading, filename),
03583                     KMP_ERR(code),
03584                     __kmp_msg_null
03585                 );
03586             }
03587         }
03588         int line = 0;
03589         depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
03590         fclose(f);
03591         if (depth < 0) {
03592             KMP_ASSERT(msg_id != kmp_i18n_null);
03593             if (line > 0) {
03594                 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
03595             }
03596             else {
03597                 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
03598             }
03599         }
03600         if (__kmp_affinity_type == affinity_none) {
03601             KMP_ASSERT(depth == 0);
03602             KMP_ASSERT(address2os == NULL);
03603             return;
03604         }
03605     }
03606 
03607 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
03608 
03609     else if (__kmp_affinity_top_method == affinity_top_method_group) {
03610         if (__kmp_affinity_verbose) {
03611             KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
03612         }
03613 
03614         depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
03615         KMP_ASSERT(depth != 0);
03616 
03617         if (depth < 0) {
03618             if ((msg_id != kmp_i18n_null)
03619               && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03620               && (__kmp_affinity_type != affinity_none)))) {
03621                 KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
03622             }
03623 
03624             depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
03625             if (depth == 0) {
03626                 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03627                 KMP_ASSERT(address2os == NULL);
03628                 return;
03629             }
03630             // should not fail
03631             KMP_ASSERT(depth > 0);
03632             KMP_ASSERT(address2os != NULL);
03633         }
03634     }
03635 
03636 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
03637 
03638     else if (__kmp_affinity_top_method == affinity_top_method_flat) {
03639         if (__kmp_affinity_verbose) {
03640             KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
03641         }
03642 
03643         depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
03644         if (depth == 0) {
03645             KMP_ASSERT(__kmp_affinity_type == affinity_none);
03646             KMP_ASSERT(address2os == NULL);
03647             return;
03648         }
03649         // should not fail
03650         KMP_ASSERT(depth > 0);
03651         KMP_ASSERT(address2os != NULL);
03652     }
03653 
03654     if (address2os == NULL) {
03655         if (KMP_AFFINITY_CAPABLE()
03656           && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03657           && (__kmp_affinity_type != affinity_none)))) {
03658             KMP_WARNING(ErrorInitializeAffinity);
03659         }
03660         __kmp_affinity_type = affinity_none;
03661         __kmp_affin_mask_size = 0;
03662         return;
03663     }
03664 
03665 # if KMP_MIC
03666     __kmp_apply_thread_places(&address2os, depth);
03667 # endif
03668 
03669     //
03670     // Create the table of masks, indexed by thread Id.
03671     //
03672     unsigned maxIndex;
03673     unsigned numUnique;
03674     kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
03675       address2os, __kmp_avail_proc);
03676     if (__kmp_affinity_gran_levels == 0) {
03677         KMP_DEBUG_ASSERT(numUnique == __kmp_avail_proc);
03678     }
03679 
03680     //
03681     // Set the childNums vector in all Address objects.  This must be done
03682     // before we can sort using __kmp_affinity_cmp_Address_child_num(),
03683     // which takes into account the setting of __kmp_affinity_compact.
03684     //
03685     __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
03686 
03687     switch (__kmp_affinity_type) {
03688 
03689         case affinity_explicit:
03690         KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
03691 # if OMP_40_ENABLED
03692         if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
03693 # endif
03694         {
03695             __kmp_affinity_process_proclist(&__kmp_affinity_masks,
03696               &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
03697               maxIndex);
03698         }
03699 # if OMP_40_ENABLED
03700         else {
03701             __kmp_affinity_process_placelist(&__kmp_affinity_masks,
03702               &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
03703               maxIndex);
03704         }
03705 # endif
03706         if (__kmp_affinity_num_masks == 0) {
03707             if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03708               && (__kmp_affinity_type != affinity_none))) {
03709                 KMP_WARNING(AffNoValidProcID);
03710             }
03711             __kmp_affinity_type = affinity_none;
03712             return;
03713         }
03714         break;
03715 
03716         //
03717         // The other affinity types rely on sorting the Addresses according
03718         // to some permutation of the machine topology tree.  Set
03719         // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
03720         // then jump to a common code fragment to do the sort and create
03721         // the array of affinity masks.
03722         //
03723 
03724         case affinity_logical:
03725         __kmp_affinity_compact = 0;
03726         if (__kmp_affinity_offset) {
03727             __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
03728               % __kmp_avail_proc;
03729         }
03730         goto sortAddresses;
03731 
03732         case affinity_physical:
03733         if (__kmp_nThreadsPerCore > 1) {
03734             __kmp_affinity_compact = 1;
03735             if (__kmp_affinity_compact >= depth) {
03736                 __kmp_affinity_compact = 0;
03737             }
03738         } else {
03739             __kmp_affinity_compact = 0;
03740         }
03741         if (__kmp_affinity_offset) {
03742             __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
03743               % __kmp_avail_proc;
03744         }
03745         goto sortAddresses;
03746 
03747         case affinity_scatter:
03748         if (__kmp_affinity_compact >= depth) {
03749             __kmp_affinity_compact = 0;
03750         }
03751         else {
03752             __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
03753         }
03754         goto sortAddresses;
03755 
03756         case affinity_compact:
03757         if (__kmp_affinity_compact >= depth) {
03758             __kmp_affinity_compact = depth - 1;
03759         }
03760         goto sortAddresses;
03761 
03762 # if KMP_MIC
03763         case affinity_balanced:
03764         // Balanced works only for the case of a single package and uniform topology
03765         if( nPackages > 1 ) {
03766             if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
03767                 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
03768             }
03769             __kmp_affinity_type = affinity_none;
03770             return;
03771         } else if( __kmp_affinity_uniform_topology() ) {
03772             break;
03773         } else { // Non-uniform topology
03774 
03775             // Save the depth for further usage
03776             __kmp_aff_depth = depth;
03777 
03778             // Number of hyper threads per core in HT machine
03779             int nth_per_core = __kmp_nThreadsPerCore;
03780 
03781             int core_level;
03782             if( nth_per_core > 1 ) {
03783                 core_level = depth - 2;
03784             } else {
03785                 core_level = depth - 1;
03786             }
03787             int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
03788             int nproc = nth_per_core * ncores;
03789 
03790             procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
03791             for( int i = 0; i < nproc; i++ ) {
03792                 procarr[ i ] = -1;
03793             }
03794 
03795             int k = 0;
03796             for( int i = 0; i < __kmp_avail_proc; i++ ) {
03797                 int proc = address2os[ i ].second;
03798                 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
03799                 // If there is only one thread per core then depth == 2: level 0 - package,
03800                 // level 1 - core.
03801                 int level = depth - 1;
03802 
03803                 // __kmp_nth_per_core == 1
03804                 int thread = 0;
03805                 int core = address2os[ i ].first.labels[ level ];
03806                 // If the thread level exists, that is we have more than one thread context per core
03807                 if( nth_per_core > 1 ) {
03808                     thread = address2os[ i ].first.labels[ level ] % nth_per_core;
03809                     core = address2os[ i ].first.labels[ level - 1 ];
03810                 }
03811                 k = core * nth_per_core + thread;
03812                 procarr[ k ] = proc;
03813                 k++;
03814             }
03815 
03816             break;
03817         }
03818 # endif
03819 
03820         sortAddresses:
03821         //
03822         // Allocate the gtid->affinity mask table.
03823         //
03824         if (__kmp_affinity_dups) {
03825             __kmp_affinity_num_masks = __kmp_avail_proc;
03826         }
03827         else {
03828             __kmp_affinity_num_masks = numUnique;
03829         }
03830 
03831 # if OMP_40_ENABLED
03832         if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
03833           && ( __kmp_affinity_num_places > 0 )
03834           && ( __kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
03835             __kmp_affinity_num_masks = __kmp_affinity_num_places;
03836         }
03837 # endif
03838 
03839         __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
03840           __kmp_affinity_num_masks * __kmp_affin_mask_size);
03841 
03842         //
03843         // Sort the address2os table according to the current setting of
03844         // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
03845         //
03846         qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
03847           __kmp_affinity_cmp_Address_child_num);
03848         {
03849             unsigned i;
03850             unsigned j;
03851             for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
03852                 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
03853                     continue;
03854                 }
03855                 unsigned osId = address2os[i].second;
03856                 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
03857                 kmp_affin_mask_t *dest
03858                   = KMP_CPU_INDEX(__kmp_affinity_masks, j);
03859                 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
03860                 KMP_CPU_COPY(dest, src);
03861                 if (++j >= __kmp_affinity_num_masks) {
03862                     break;
03863                 }
03864             }
03865             KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
03866         }
03867         break;
03868 
03869         default:
03870         KMP_ASSERT2(0, "Unexpected affinity setting");
03871     }
03872 
03873     __kmp_free(osId2Mask);
03874 }
03875 
03876 
03877 void
03878 __kmp_affinity_initialize(void)
03879 {
03880     //
03881     // Much of the code above was written assumming that if a machine was not
03882     // affinity capable, then __kmp_affinity_type == affinity_none.  We now
03883     // explicitly represent this as __kmp_affinity_type == affinity_disabled.
03884     //
03885     // There are too many checks for __kmp_affinity_type == affinity_none
03886     // in this code.  Instead of trying to change them all, check if
03887     // __kmp_affinity_type == affinity_disabled, and if so, slam it with
03888     // affinity_none, call the real initialization routine, then restore
03889     // __kmp_affinity_type to affinity_disabled.
03890     //
03891     int disabled = (__kmp_affinity_type == affinity_disabled);
03892     if (! KMP_AFFINITY_CAPABLE()) {
03893         KMP_ASSERT(disabled);
03894     }
03895     if (disabled) {
03896         __kmp_affinity_type = affinity_none;
03897     }
03898     __kmp_aux_affinity_initialize();
03899     if (disabled) {
03900         __kmp_affinity_type = affinity_disabled;
03901     }
03902 }
03903 
03904 
03905 void
03906 __kmp_affinity_uninitialize(void)
03907 {
03908     if (__kmp_affinity_masks != NULL) {
03909         __kmp_free(__kmp_affinity_masks);
03910         __kmp_affinity_masks = NULL;
03911     }
03912     if (fullMask != NULL) {
03913         KMP_CPU_FREE(fullMask);
03914         fullMask = NULL;
03915     }
03916     __kmp_affinity_num_masks = 0;
03917 # if OMP_40_ENABLED
03918     __kmp_affinity_num_places = 0;
03919 # endif
03920     if (__kmp_affinity_proclist != NULL) {
03921         __kmp_free(__kmp_affinity_proclist);
03922         __kmp_affinity_proclist = NULL;
03923     }
03924     if( address2os != NULL ) {
03925         __kmp_free( address2os );
03926         address2os = NULL;
03927     }
03928     if( procarr != NULL ) {
03929         __kmp_free( procarr );
03930         procarr = NULL;
03931     }
03932 }
03933 
03934 
03935 void
03936 __kmp_affinity_set_init_mask(int gtid, int isa_root)
03937 {
03938     if (! KMP_AFFINITY_CAPABLE()) {
03939         return;
03940     }
03941 
03942     kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
03943     if (th->th.th_affin_mask == NULL) {
03944         KMP_CPU_ALLOC(th->th.th_affin_mask);
03945     }
03946     else {
03947         KMP_CPU_ZERO(th->th.th_affin_mask);
03948     }
03949 
03950     //
03951     // Copy the thread mask to the kmp_info_t strucuture.
03952     // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
03953     // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
03954     // is set, then the full mask is the same as the mask of the initialization
03955     // thread.
03956     //
03957     kmp_affin_mask_t *mask;
03958     int i;
03959 
03960 # if OMP_40_ENABLED
03961     if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
03962 # endif
03963     {
03964         if ((__kmp_affinity_type == affinity_none)
03965 # if KMP_MIC
03966           || (__kmp_affinity_type == affinity_balanced)
03967 # endif
03968           ) {
03969 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
03970             if (__kmp_num_proc_groups > 1) {
03971                 return;
03972             }
03973 # endif
03974             KMP_ASSERT(fullMask != NULL);
03975             i = -1;
03976             mask = fullMask;
03977         }
03978         else {
03979             KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
03980             i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
03981             mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
03982         }
03983     }
03984 # if OMP_40_ENABLED
03985     else {
03986         if ((! isa_root)
03987           || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
03988 #  if KMP_OS_WINDOWS && KMP_ARCH_X86_64
03989             if (__kmp_num_proc_groups > 1) {
03990                 return;
03991             }
03992 #  endif
03993             KMP_ASSERT(fullMask != NULL);
03994             i = KMP_PLACE_ALL;
03995             mask = fullMask;
03996         }
03997         else {
03998             //
03999             // int i = some hash function or just a counter that doesn't
04000             // always start at 0.  Use gtid for now.
04001             //
04002             KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
04003             i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
04004             mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
04005         }
04006     }
04007 # endif
04008 
04009 # if OMP_40_ENABLED
04010     th->th.th_current_place = i;
04011     if (isa_root) {
04012         th->th.th_new_place = i;
04013         th->th.th_first_place = 0;
04014         th->th.th_last_place = __kmp_affinity_num_masks - 1;
04015     }
04016 
04017     if (i == KMP_PLACE_ALL) {
04018         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
04019           gtid));
04020     }
04021     else {
04022         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
04023           gtid, i));
04024     }
04025 # else
04026     if (i == -1) {
04027         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
04028           gtid));
04029     }
04030     else {
04031         KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
04032           gtid, i));
04033     }
04034 # endif /* OMP_40_ENABLED */
04035 
04036     KMP_CPU_COPY(th->th.th_affin_mask, mask);
04037 
04038     if (__kmp_affinity_verbose) {
04039         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04040         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04041           th->th.th_affin_mask);
04042         KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", gtid, buf);
04043     }
04044 
04045 # if KMP_OS_WINDOWS
04046     //
04047     // On Windows* OS, the process affinity mask might have changed.
04048     // If the user didn't request affinity and this call fails,
04049     // just continue silently.  See CQ171393.
04050     //
04051     if ( __kmp_affinity_type == affinity_none ) {
04052         __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
04053     }
04054     else
04055 # endif
04056     __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
04057 }
04058 
04059 
04060 # if OMP_40_ENABLED
04061 
04062 void
04063 __kmp_affinity_set_place(int gtid)
04064 {
04065     int retval;
04066 
04067     if (! KMP_AFFINITY_CAPABLE()) {
04068         return;
04069     }
04070 
04071     kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
04072 
04073     KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
04074       gtid, th->th.th_new_place, th->th.th_current_place));
04075 
04076     //
04077     // Check that the new place is withing this thread's partition.
04078     //
04079     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
04080     KMP_DEBUG_ASSERT(th->th.th_new_place >= 0);
04081     KMP_DEBUG_ASSERT(th->th.th_new_place <= __kmp_affinity_num_masks);
04082     if (th->th.th_first_place <= th->th.th_last_place) {
04083         KMP_DEBUG_ASSERT((th->th.th_new_place >= th->th.th_first_place)
04084          && (th->th.th_new_place <= th->th.th_last_place));
04085     }
04086     else {
04087         KMP_DEBUG_ASSERT((th->th.th_new_place <= th->th.th_first_place)
04088          || (th->th.th_new_place >= th->th.th_last_place));
04089     }
04090 
04091     //
04092     // Copy the thread mask to the kmp_info_t strucuture,
04093     // and set this thread's affinity.
04094     //
04095     kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
04096       th->th.th_new_place);
04097     KMP_CPU_COPY(th->th.th_affin_mask, mask);
04098     th->th.th_current_place = th->th.th_new_place;
04099 
04100     if (__kmp_affinity_verbose) {
04101         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04102         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04103           th->th.th_affin_mask);
04104         KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", gtid, buf);
04105     }
04106     __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
04107 }
04108 
04109 # endif /* OMP_40_ENABLED */
04110 
04111 
04112 int
04113 __kmp_aux_set_affinity(void **mask)
04114 {
04115     int gtid;
04116     kmp_info_t *th;
04117     int retval;
04118 
04119     if (! KMP_AFFINITY_CAPABLE()) {
04120         return -1;
04121     }
04122 
04123     gtid = __kmp_entry_gtid();
04124     KA_TRACE(1000, ;{
04125         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04126         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04127           (kmp_affin_mask_t *)(*mask));
04128         __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
04129           gtid, buf);
04130     });
04131 
04132     if (__kmp_env_consistency_check) {
04133         if ((mask == NULL) || (*mask == NULL)) {
04134             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04135         }
04136         else {
04137             int proc;
04138             int num_procs = 0;
04139 
04140             for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
04141                 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
04142                     continue;
04143                 }
04144                 num_procs++;
04145                 if (! KMP_CPU_ISSET(proc, fullMask)) {
04146                     KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04147                     break;
04148                 }
04149             }
04150             if (num_procs == 0) {
04151                 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04152             }
04153 
04154 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
04155             if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
04156                 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04157             }
04158 # endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
04159 
04160         }
04161     }
04162 
04163     th = __kmp_threads[gtid];
04164     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
04165     retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
04166     if (retval == 0) {
04167         KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
04168     }
04169 
04170 # if OMP_40_ENABLED
04171     th->th.th_current_place = KMP_PLACE_UNDEFINED;
04172     th->th.th_new_place = KMP_PLACE_UNDEFINED;
04173     th->th.th_first_place = 0;
04174     th->th.th_last_place = __kmp_affinity_num_masks - 1;
04175 # endif
04176 
04177     return retval;
04178 }
04179 
04180 
04181 int
04182 __kmp_aux_get_affinity(void **mask)
04183 {
04184     int gtid;
04185     int retval;
04186     kmp_info_t *th;
04187 
04188     if (! KMP_AFFINITY_CAPABLE()) {
04189         return -1;
04190     }
04191 
04192     gtid = __kmp_entry_gtid();
04193     th = __kmp_threads[gtid];
04194     KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
04195 
04196     KA_TRACE(1000, ;{
04197         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04198         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04199           th->th.th_affin_mask);
04200         __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
04201     });
04202 
04203     if (__kmp_env_consistency_check) {
04204         if ((mask == NULL) || (*mask == NULL)) {
04205             KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
04206         }
04207     }
04208 
04209 # if ! KMP_OS_WINDOWS
04210 
04211     retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
04212     KA_TRACE(1000, ;{
04213         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04214         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04215           (kmp_affin_mask_t *)(*mask));
04216         __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
04217     });
04218     return retval;
04219 
04220 # else
04221 
04222     KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
04223     return 0;
04224 
04225 # endif /* KMP_OS_WINDOWS */
04226 
04227 }
04228 
04229 
04230 int
04231 __kmp_aux_set_affinity_mask_proc(int proc, void **mask)
04232 {
04233     int retval;
04234 
04235     if (! KMP_AFFINITY_CAPABLE()) {
04236         return -1;
04237     }
04238 
04239     KA_TRACE(1000, ;{
04240         int gtid = __kmp_entry_gtid();
04241         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04242         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04243           (kmp_affin_mask_t *)(*mask));
04244         __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
04245           proc, gtid, buf);
04246     });
04247 
04248     if (__kmp_env_consistency_check) {
04249         if ((mask == NULL) || (*mask == NULL)) {
04250             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
04251         }
04252     }
04253 
04254     if ((proc < 0) || (proc >= KMP_CPU_SETSIZE)) {
04255         return -1;
04256     }
04257     if (! KMP_CPU_ISSET(proc, fullMask)) {
04258         return -2;
04259     }
04260 
04261     KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
04262     return 0;
04263 }
04264 
04265 
04266 int
04267 __kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
04268 {
04269     int retval;
04270 
04271     if (! KMP_AFFINITY_CAPABLE()) {
04272         return -1;
04273     }
04274 
04275     KA_TRACE(1000, ;{
04276         int gtid = __kmp_entry_gtid();
04277         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04278         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04279           (kmp_affin_mask_t *)(*mask));
04280         __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
04281           proc, gtid, buf);
04282     });
04283 
04284     if (__kmp_env_consistency_check) {
04285         if ((mask == NULL) || (*mask == NULL)) {
04286             KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
04287         }
04288     }
04289 
04290     if ((proc < 0) || (proc >= KMP_CPU_SETSIZE)) {
04291         return -1;
04292     }
04293     if (! KMP_CPU_ISSET(proc, fullMask)) {
04294         return -2;
04295     }
04296 
04297     KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
04298     return 0;
04299 }
04300 
04301 
04302 int
04303 __kmp_aux_get_affinity_mask_proc(int proc, void **mask)
04304 {
04305     int retval;
04306 
04307     if (! KMP_AFFINITY_CAPABLE()) {
04308         return -1;
04309     }
04310 
04311     KA_TRACE(1000, ;{
04312         int gtid = __kmp_entry_gtid();
04313         char buf[KMP_AFFIN_MASK_PRINT_LEN];
04314         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04315           (kmp_affin_mask_t *)(*mask));
04316         __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
04317           proc, gtid, buf);
04318     });
04319 
04320     if (__kmp_env_consistency_check) {
04321         if ((mask == NULL) || (*mask == NULL)) {
04322             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
04323         }
04324     }
04325 
04326     if ((proc < 0) || (proc >= KMP_CPU_SETSIZE)) {
04327         return 0;
04328     }
04329     if (! KMP_CPU_ISSET(proc, fullMask)) {
04330         return 0;
04331     }
04332 
04333     return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
04334 }
04335 
04336 # if KMP_MIC
04337 
04338 // Dynamic affinity settings - Affinity balanced
04339 void __kmp_balanced_affinity( int tid, int nthreads )
04340 {
04341     if( __kmp_affinity_uniform_topology() ) {
04342         int coreID;
04343         int threadID;
04344         // Number of hyper threads per core in HT machine
04345         int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
04346         // Number of cores
04347         int ncores = __kmp_ncores;
04348         // How many threads will be bound to each core
04349         int chunk = nthreads / ncores;
04350         // How many cores will have an additional thread bound to it - "big cores"
04351         int big_cores = nthreads % ncores;
04352         // Number of threads on the big cores
04353         int big_nth = ( chunk + 1 ) * big_cores;
04354         if( tid < big_nth ) {
04355             coreID = tid / (chunk + 1 );
04356             threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
04357         } else { //tid >= big_nth
04358             coreID = ( tid - big_cores ) / chunk;
04359             threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
04360         }
04361 
04362         KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
04363           "Illegal set affinity operation when not capable");
04364 
04365         kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
04366         KMP_CPU_ZERO(mask);
04367 
04368         // Granularity == thread
04369         if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04370             int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
04371             KMP_CPU_SET( osID, mask);
04372         } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
04373             for( int i = 0; i < __kmp_nth_per_core; i++ ) {
04374                 int osID;
04375                 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
04376                 KMP_CPU_SET( osID, mask);
04377             }
04378         }
04379         if (__kmp_affinity_verbose) {
04380             char buf[KMP_AFFIN_MASK_PRINT_LEN];
04381             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
04382             KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
04383         }
04384         __kmp_set_system_affinity( mask, TRUE );
04385     } else { // Non-uniform topology
04386 
04387         kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
04388         KMP_CPU_ZERO(mask);
04389 
04390         // Number of hyper threads per core in HT machine
04391         int nth_per_core = __kmp_nThreadsPerCore;
04392         int core_level;
04393         if( nth_per_core > 1 ) {
04394             core_level = __kmp_aff_depth - 2;
04395         } else {
04396             core_level = __kmp_aff_depth - 1;
04397         }
04398 
04399         // Number of cores - maximum value; it does not count trail cores with 0 processors
04400         int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
04401 
04402         // For performance gain consider the special case nthreads == __kmp_avail_proc
04403         if( nthreads == __kmp_avail_proc ) {
04404             if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04405                 int osID = address2os[ tid ].second;
04406                 KMP_CPU_SET( osID, mask);
04407             } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
04408                 int coreID = address2os[ tid ].first.labels[ core_level ];
04409                 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
04410                 // since the address2os is sortied we can break when cnt==nth_per_core
04411                 int cnt = 0;
04412                 for( int i = 0; i < __kmp_avail_proc; i++ ) {
04413                     int osID = address2os[ i ].second;
04414                     int core = address2os[ i ].first.labels[ core_level ];
04415                     if( core == coreID ) {
04416                         KMP_CPU_SET( osID, mask);
04417                         cnt++;
04418                         if( cnt == nth_per_core ) {
04419                             break;
04420                         }
04421                     }
04422                 }
04423             }
04424         } else if( nthreads <= __kmp_ncores ) {
04425 
04426             int core = 0;
04427             for( int i = 0; i < ncores; i++ ) {
04428                 // Check if this core from procarr[] is in the mask
04429                 int in_mask = 0;
04430                 for( int j = 0; j < nth_per_core; j++ ) {
04431                     if( procarr[ i * nth_per_core + j ] != - 1 ) {
04432                         in_mask = 1;
04433                         break;
04434                     }
04435                 }
04436                 if( in_mask ) {
04437                     if( tid == core ) {
04438                         for( int j = 0; j < nth_per_core; j++ ) {
04439                             int osID = procarr[ i * nth_per_core + j ];
04440                             if( osID != -1 ) {
04441                                 KMP_CPU_SET( osID, mask );
04442                                 // For granularity=thread it is enough to set the first available osID for this core
04443                                 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04444                                     break;
04445                                 }
04446                             }
04447                         }
04448                         break;
04449                     } else {
04450                         core++;
04451                     }
04452                 }
04453             }
04454 
04455         } else { // nthreads > __kmp_ncores
04456 
04457             // Array to save the number of processors at each core
04458             int nproc_at_core[ ncores ];
04459             // Array to save the number of cores with "x" available processors;
04460             int ncores_with_x_procs[ nth_per_core + 1 ];
04461             // Array to save the number of cores with # procs from x to nth_per_core
04462             int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
04463 
04464             for( int i = 0; i <= nth_per_core; i++ ) {
04465                 ncores_with_x_procs[ i ] = 0;
04466                 ncores_with_x_to_max_procs[ i ] = 0;
04467             }
04468 
04469             for( int i = 0; i < ncores; i++ ) {
04470                 int cnt = 0;
04471                 for( int j = 0; j < nth_per_core; j++ ) {
04472                     if( procarr[ i * nth_per_core + j ] != -1 ) {
04473                         cnt++;
04474                     }
04475                 }
04476                 nproc_at_core[ i ] = cnt;
04477                 ncores_with_x_procs[ cnt ]++;
04478             }
04479 
04480             for( int i = 0; i <= nth_per_core; i++ ) {
04481                 for( int j = i; j <= nth_per_core; j++ ) {
04482                     ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
04483                 }
04484             }
04485 
04486             // Max number of processors
04487             int nproc = nth_per_core * ncores;
04488             // An array to keep number of threads per each context
04489             int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
04490             for( int i = 0; i < nproc; i++ ) {
04491                 newarr[ i ] = 0;
04492             }
04493 
04494             int nth = nthreads;
04495             int flag = 0;
04496             while( nth > 0 ) {
04497                 for( int j = 1; j <= nth_per_core; j++ ) {
04498                     int cnt = ncores_with_x_to_max_procs[ j ];
04499                     for( int i = 0; i < ncores; i++ ) {
04500                         // Skip the core with 0 processors
04501                         if( nproc_at_core[ i ] == 0 ) {
04502                             continue;
04503                         }
04504                         for( int k = 0; k < nth_per_core; k++ ) {
04505                             if( procarr[ i * nth_per_core + k ] != -1 ) {
04506                                 if( newarr[ i * nth_per_core + k ] == 0 ) {
04507                                     newarr[ i * nth_per_core + k ] = 1;
04508                                     cnt--;
04509                                     nth--;
04510                                     break;
04511                                 } else {
04512                                     if( flag != 0 ) {
04513                                         newarr[ i * nth_per_core + k ] ++;
04514                                         cnt--;
04515                                         nth--;
04516                                         break;
04517                                     }
04518                                 }
04519                             }
04520                         }
04521                         if( cnt == 0 || nth == 0 ) {
04522                             break;
04523                         }
04524                     }
04525                     if( nth == 0 ) {
04526                         break;
04527                     }
04528                 }
04529                 flag = 1;
04530             }
04531             int sum = 0;
04532             for( int i = 0; i < nproc; i++ ) {
04533                 sum += newarr[ i ];
04534                 if( sum > tid ) {
04535                     // Granularity == thread
04536                     if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04537                         int osID = procarr[ i ];
04538                         KMP_CPU_SET( osID, mask);
04539                     } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
04540                         int coreID = i / nth_per_core;
04541                         for( int ii = 0; ii < nth_per_core; ii++ ) {
04542                             int osID = procarr[ coreID * nth_per_core + ii ];
04543                             if( osID != -1 ) {
04544                                 KMP_CPU_SET( osID, mask);
04545                             }
04546                         }
04547                     }
04548                     break;
04549                 }
04550             }
04551             __kmp_free( newarr );
04552         }
04553 
04554         if (__kmp_affinity_verbose) {
04555             char buf[KMP_AFFIN_MASK_PRINT_LEN];
04556             __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
04557             KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
04558         }
04559         __kmp_set_system_affinity( mask, TRUE );
04560     }
04561 }
04562 # endif
04563 
04564 #elif KMP_OS_DARWIN
04565     // affinity not supported
04566 #else
04567     #error "Unknown or unsupported OS"
04568 #endif // KMP_OS_WINDOWS || KMP_OS_LINUX

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1