00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "kmp.h"
00048 #include "kmp_i18n.h"
00049 #include "kmp_io.h"
00050 #include "kmp_str.h"
00051
00052
00053 #if KMP_OS_WINDOWS || KMP_OS_LINUX
00054
00055
00056
00057
00058 char *
00059 __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
00060 {
00061 KMP_ASSERT(buf_len >= 40);
00062 char *scan = buf;
00063 char *end = buf + buf_len - 1;
00064
00065
00066
00067
00068 int i;
00069 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
00070 if (KMP_CPU_ISSET(i, mask)) {
00071 break;
00072 }
00073 }
00074 if (i == KMP_CPU_SETSIZE) {
00075 sprintf(scan, "{<empty>}");
00076 while (*scan != '\0') scan++;
00077 KMP_ASSERT(scan <= end);
00078 return buf;
00079 }
00080
00081 sprintf(scan, "{%d", i);
00082 while (*scan != '\0') scan++;
00083 i++;
00084 for (; i < KMP_CPU_SETSIZE; i++) {
00085 if (! KMP_CPU_ISSET(i, mask)) {
00086 continue;
00087 }
00088
00089
00090
00091
00092
00093
00094
00095 if (end - scan < 15) {
00096 break;
00097 }
00098 sprintf(scan, ",%-d", i);
00099 while (*scan != '\0') scan++;
00100 }
00101 if (i < KMP_CPU_SETSIZE) {
00102 sprintf(scan, ",...");
00103 while (*scan != '\0') scan++;
00104 }
00105 sprintf(scan, "}");
00106 while (*scan != '\0') scan++;
00107 KMP_ASSERT(scan <= end);
00108 return buf;
00109 }
00110
00111
00112 void
00113 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
00114 {
00115 KMP_CPU_ZERO(mask);
00116
00117 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
00118
00119 if (__kmp_num_proc_groups > 1) {
00120 int group;
00121 struct GROUP_AFFINITY ga;
00122 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
00123 for (group = 0; group < __kmp_num_proc_groups; group++) {
00124 int i;
00125 int num = __kmp_GetActiveProcessorCount(group);
00126 for (i = 0; i < num; i++) {
00127 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
00128 }
00129 }
00130 }
00131 else
00132
00133 # endif
00134
00135 {
00136 int proc;
00137 for (proc = 0; proc < __kmp_xproc; proc++) {
00138 KMP_CPU_SET(proc, mask);
00139 }
00140 }
00141 }
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166 # if !defined(KMP_DEBUG) && !defined(COVER)
00167
00168 class Address {
00169 public:
00170 static const unsigned maxDepth = 32;
00171 unsigned labels[maxDepth];
00172 unsigned childNums[maxDepth];
00173 unsigned depth;
00174 unsigned leader;
00175 Address(unsigned _depth)
00176 : depth(_depth), leader(FALSE) {
00177 }
00178 Address &operator=(const Address &b) {
00179 depth = b.depth;
00180 for (unsigned i = 0; i < depth; i++) {
00181 labels[i] = b.labels[i];
00182 childNums[i] = b.childNums[i];
00183 }
00184 leader = FALSE;
00185 return *this;
00186 }
00187 bool operator==(const Address &b) const {
00188 if (depth != b.depth)
00189 return false;
00190 for (unsigned i = 0; i < depth; i++)
00191 if(labels[i] != b.labels[i])
00192 return false;
00193 return true;
00194 }
00195 bool isClose(const Address &b, int level) const {
00196 if (depth != b.depth)
00197 return false;
00198 if (level >= depth)
00199 return true;
00200 for (unsigned i = 0; i < (depth - level); i++)
00201 if(labels[i] != b.labels[i])
00202 return false;
00203 return true;
00204 }
00205 bool operator!=(const Address &b) const {
00206 return !operator==(b);
00207 }
00208 };
00209
00210 class AddrUnsPair {
00211 public:
00212 Address first;
00213 unsigned second;
00214 AddrUnsPair(Address _first, unsigned _second)
00215 : first(_first), second(_second) {
00216 }
00217 AddrUnsPair &operator=(const AddrUnsPair &b)
00218 {
00219 first = b.first;
00220 second = b.second;
00221 return *this;
00222 }
00223 };
00224
00225 # else
00226
00227 class Address {
00228 public:
00229 static const unsigned maxDepth = 32;
00230 unsigned labels[maxDepth];
00231 unsigned childNums[maxDepth];
00232 unsigned depth;
00233 unsigned leader;
00234 Address(unsigned _depth);
00235 Address &operator=(const Address &b);
00236 bool operator==(const Address &b) const;
00237 bool isClose(const Address &b, int level) const;
00238 bool operator!=(const Address &b) const;
00239 };
00240
00241 Address::Address(unsigned _depth)
00242 {
00243 depth = _depth;
00244 leader = FALSE;
00245 }
00246
00247 Address &Address::operator=(const Address &b) {
00248 depth = b.depth;
00249 for (unsigned i = 0; i < depth; i++) {
00250 labels[i] = b.labels[i];
00251 childNums[i] = b.childNums[i];
00252 }
00253 leader = FALSE;
00254 return *this;
00255 }
00256
00257 bool Address::operator==(const Address &b) const {
00258 if (depth != b.depth)
00259 return false;
00260 for (unsigned i = 0; i < depth; i++)
00261 if(labels[i] != b.labels[i])
00262 return false;
00263 return true;
00264 }
00265
00266 bool Address::isClose(const Address &b, int level) const {
00267 if (depth != b.depth)
00268 return false;
00269 if (level >= depth)
00270 return true;
00271 for (unsigned i = 0; i < (depth - level); i++)
00272 if(labels[i] != b.labels[i])
00273 return false;
00274 return true;
00275 }
00276
00277 bool Address::operator!=(const Address &b) const {
00278 return !operator==(b);
00279 }
00280
00281 class AddrUnsPair {
00282 public:
00283 Address first;
00284 unsigned second;
00285 AddrUnsPair(Address _first, unsigned _second);
00286 AddrUnsPair &operator=(const AddrUnsPair &b);
00287 };
00288
00289 AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
00290 : first(_first), second(_second)
00291 {
00292 }
00293
00294 AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
00295 {
00296 first = b.first;
00297 second = b.second;
00298 return *this;
00299 }
00300
00301 # endif
00302
00303
00304 static int
00305 __kmp_affinity_cmp_Address_labels(const void *a, const void *b)
00306 {
00307 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
00308 ->first);
00309 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
00310 ->first);
00311 unsigned depth = aa->depth;
00312 unsigned i;
00313 KMP_DEBUG_ASSERT(depth == bb->depth);
00314 for (i = 0; i < depth; i++) {
00315 if (aa->labels[i] < bb->labels[i]) return -1;
00316 if (aa->labels[i] > bb->labels[i]) return 1;
00317 }
00318 return 0;
00319 }
00320
00321
00322 static int
00323 __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
00324 {
00325 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
00326 ->first);
00327 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
00328 ->first);
00329 unsigned depth = aa->depth;
00330 unsigned i;
00331 KMP_DEBUG_ASSERT(depth == bb->depth);
00332 KMP_DEBUG_ASSERT(__kmp_affinity_compact <= depth);
00333 for (i = 0; i < __kmp_affinity_compact; i++) {
00334 int j = depth - i - 1;
00335 if (aa->childNums[j] < bb->childNums[j]) return -1;
00336 if (aa->childNums[j] > bb->childNums[j]) return 1;
00337 }
00338 for (; i < depth; i++) {
00339 int j = i - __kmp_affinity_compact;
00340 if (aa->childNums[j] < bb->childNums[j]) return -1;
00341 if (aa->childNums[j] > bb->childNums[j]) return 1;
00342 }
00343 return 0;
00344 }
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360 static void
00361 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
00362 int numAddrs)
00363 {
00364 KMP_DEBUG_ASSERT(numAddrs > 0);
00365 int depth = address2os->first.depth;
00366 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
00367 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
00368 * sizeof(unsigned));
00369 int labCt;
00370 for (labCt = 0; labCt < depth; labCt++) {
00371 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
00372 lastLabel[labCt] = address2os[0].first.labels[labCt];
00373 }
00374 int i;
00375 for (i = 1; i < numAddrs; i++) {
00376 for (labCt = 0; labCt < depth; labCt++) {
00377 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
00378 int labCt2;
00379 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
00380 counts[labCt2] = 0;
00381 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
00382 }
00383 counts[labCt]++;
00384 lastLabel[labCt] = address2os[i].first.labels[labCt];
00385 break;
00386 }
00387 }
00388 for (labCt = 0; labCt < depth; labCt++) {
00389 address2os[i].first.childNums[labCt] = counts[labCt];
00390 }
00391 for (; labCt < Address::maxDepth; labCt++) {
00392 address2os[i].first.childNums[labCt] = 0;
00393 }
00394 }
00395 }
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 static kmp_affin_mask_t *fullMask = NULL;
00411
00412 kmp_affin_mask_t *
00413 __kmp_affinity_get_fullMask() { return fullMask; }
00414
00415
00416 static int nCoresPerPkg, nPackages;
00417 int __kmp_nThreadsPerCore;
00418
00419
00420
00421
00422
00423
00424
00425 inline static bool
00426 __kmp_affinity_uniform_topology()
00427 {
00428 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
00429 }
00430
00431
00432
00433
00434
00435
00436 static void
00437 __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
00438 int pkgLevel, int coreLevel, int threadLevel)
00439 {
00440 int proc;
00441
00442 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
00443 for (proc = 0; proc < len; proc++) {
00444 int level;
00445 kmp_str_buf_t buf;
00446 __kmp_str_buf_init(&buf);
00447 for (level = 0; level < depth; level++) {
00448 if (level == threadLevel) {
00449 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
00450 }
00451 else if (level == coreLevel) {
00452 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
00453 }
00454 else if (level == pkgLevel) {
00455 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
00456 }
00457 else if (level > pkgLevel) {
00458 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
00459 level - pkgLevel - 1);
00460 }
00461 else {
00462 __kmp_str_buf_print(&buf, "L%d ", level);
00463 }
00464 __kmp_str_buf_print(&buf, "%d ",
00465 address2os[proc].first.labels[level]);
00466 }
00467 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
00468 buf.str);
00469 __kmp_str_buf_free(&buf);
00470 }
00471 }
00472
00473
00474
00475
00476
00477
00478
00479 static int
00480 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
00481 kmp_i18n_id_t *const msg_id)
00482 {
00483 *address2os = NULL;
00484 *msg_id = kmp_i18n_null;
00485
00486
00487
00488
00489
00490
00491 if (! KMP_AFFINITY_CAPABLE()) {
00492 KMP_ASSERT(__kmp_affinity_type == affinity_none);
00493 __kmp_ncores = nPackages = __kmp_xproc;
00494 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
00495 __kmp_ht_enabled = FALSE;
00496 if (__kmp_affinity_verbose) {
00497 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
00498 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00499 KMP_INFORM(Uniform, "KMP_AFFINITY");
00500 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00501 __kmp_nThreadsPerCore, __kmp_ncores);
00502 }
00503 return 0;
00504 }
00505
00506
00507
00508
00509
00510
00511
00512 __kmp_ncores = nPackages = __kmp_avail_proc;
00513 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
00514 __kmp_ht_enabled = FALSE;
00515 if (__kmp_affinity_verbose) {
00516 char buf[KMP_AFFIN_MASK_PRINT_LEN];
00517 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
00518
00519 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
00520 if (__kmp_affinity_respect_mask) {
00521 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
00522 } else {
00523 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
00524 }
00525 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00526 KMP_INFORM(Uniform, "KMP_AFFINITY");
00527 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00528 __kmp_nThreadsPerCore, __kmp_ncores);
00529 }
00530 if (__kmp_affinity_type == affinity_none) {
00531 return 0;
00532 }
00533
00534
00535
00536
00537 *address2os = (AddrUnsPair*)
00538 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
00539 int avail_ct = 0;
00540 int i;
00541 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
00542
00543
00544
00545 if (! KMP_CPU_ISSET(i, fullMask)) {
00546 continue;
00547 }
00548
00549 Address addr(1);
00550 addr.labels[0] = i;
00551 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
00552 }
00553 if (__kmp_affinity_verbose) {
00554 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
00555 }
00556
00557 if (__kmp_affinity_gran_levels < 0) {
00558
00559
00560
00561
00562 if (__kmp_affinity_gran > affinity_gran_package) {
00563 __kmp_affinity_gran_levels = 1;
00564 }
00565 else {
00566 __kmp_affinity_gran_levels = 0;
00567 }
00568 }
00569 return 1;
00570 }
00571
00572
00573 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583 static int
00584 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
00585 kmp_i18n_id_t *const msg_id)
00586 {
00587 *address2os = NULL;
00588 *msg_id = kmp_i18n_null;
00589
00590
00591
00592
00593
00594 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
00595
00596 return -1;
00597 }
00598
00599
00600
00601
00602 *address2os = (AddrUnsPair*)
00603 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
00604 int avail_ct = 0;
00605 int i;
00606 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
00607
00608
00609
00610 if (! KMP_CPU_ISSET(i, fullMask)) {
00611 continue;
00612 }
00613
00614 Address addr(2);
00615 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
00616 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
00617 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
00618
00619 if (__kmp_affinity_verbose) {
00620 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
00621 addr.labels[1]);
00622 }
00623 }
00624
00625 if (__kmp_affinity_gran_levels < 0) {
00626 if (__kmp_affinity_gran == affinity_gran_group) {
00627 __kmp_affinity_gran_levels = 1;
00628 }
00629 else if ((__kmp_affinity_gran == affinity_gran_fine)
00630 || (__kmp_affinity_gran == affinity_gran_thread)) {
00631 __kmp_affinity_gran_levels = 0;
00632 }
00633 else {
00634 const char *gran_str = NULL;
00635 if (__kmp_affinity_gran == affinity_gran_core) {
00636 gran_str = "core";
00637 }
00638 else if (__kmp_affinity_gran == affinity_gran_package) {
00639 gran_str = "package";
00640 }
00641 else if (__kmp_affinity_gran == affinity_gran_node) {
00642 gran_str = "node";
00643 }
00644 else {
00645 KMP_ASSERT(0);
00646 }
00647
00648
00649 __kmp_affinity_gran_levels = 0;
00650 }
00651 }
00652 return 2;
00653 }
00654
00655 # endif
00656
00657
00658 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
00659
00660 static int
00661 __kmp_cpuid_mask_width(int count) {
00662 int r = 0;
00663
00664 while((1<<r) < count)
00665 ++r;
00666 return r;
00667 }
00668
00669
00670 class apicThreadInfo {
00671 public:
00672 unsigned osId;
00673 unsigned apicId;
00674 unsigned maxCoresPerPkg;
00675 unsigned maxThreadsPerPkg;
00676 unsigned pkgId;
00677 unsigned coreId;
00678 unsigned threadId;
00679 };
00680
00681
00682 static int
00683 __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
00684 {
00685 const apicThreadInfo *aa = (const apicThreadInfo *)a;
00686 const apicThreadInfo *bb = (const apicThreadInfo *)b;
00687 if (aa->osId < bb->osId) return -1;
00688 if (aa->osId > bb->osId) return 1;
00689 return 0;
00690 }
00691
00692
00693 static int
00694 __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
00695 {
00696 const apicThreadInfo *aa = (const apicThreadInfo *)a;
00697 const apicThreadInfo *bb = (const apicThreadInfo *)b;
00698 if (aa->pkgId < bb->pkgId) return -1;
00699 if (aa->pkgId > bb->pkgId) return 1;
00700 if (aa->coreId < bb->coreId) return -1;
00701 if (aa->coreId > bb->coreId) return 1;
00702 if (aa->threadId < bb->threadId) return -1;
00703 if (aa->threadId > bb->threadId) return 1;
00704 return 0;
00705 }
00706
00707
00708
00709
00710
00711
00712
00713
00714 static int
00715 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
00716 kmp_i18n_id_t *const msg_id)
00717 {
00718 int rc;
00719 *address2os = NULL;
00720 *msg_id = kmp_i18n_null;
00721
00722 # if KMP_MIC
00723 {
00724
00725
00726
00727 kmp_cpuid buf;
00728 __kmp_x86_cpuid(0, 0, &buf);
00729 if (buf.eax < 4) {
00730 *msg_id = kmp_i18n_str_NoLeaf4Support;
00731 return -1;
00732 }
00733 }
00734 # endif // KMP_MIC
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746 if (! KMP_AFFINITY_CAPABLE()) {
00747
00748
00749
00750
00751 KMP_ASSERT(__kmp_affinity_type == affinity_none);
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761 kmp_cpuid buf;
00762 __kmp_x86_cpuid(1, 0, &buf);
00763 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
00764 if (maxThreadsPerPkg == 0) {
00765 maxThreadsPerPkg = 1;
00766 }
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781 __kmp_x86_cpuid(0, 0, &buf);
00782 if (buf.eax >= 4) {
00783 __kmp_x86_cpuid(4, 0, &buf);
00784 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
00785 }
00786 else {
00787 nCoresPerPkg = 1;
00788 }
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807 __kmp_ncores = __kmp_xproc;
00808 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
00809 __kmp_nThreadsPerCore = 1;
00810 __kmp_ht_enabled = FALSE;
00811 if (__kmp_affinity_verbose) {
00812 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
00813 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00814 if (__kmp_affinity_uniform_topology()) {
00815 KMP_INFORM(Uniform, "KMP_AFFINITY");
00816 } else {
00817 KMP_INFORM(NonUniform, "KMP_AFFINITY");
00818 }
00819 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00820 __kmp_nThreadsPerCore, __kmp_ncores);
00821 }
00822 return 0;
00823 }
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835 kmp_affin_mask_t *oldMask;
00836 KMP_CPU_ALLOC(oldMask);
00837 KMP_ASSERT(oldMask != NULL);
00838 __kmp_get_system_affinity(oldMask, TRUE);
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872 unsigned i;
00873 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
00874 __kmp_avail_proc * sizeof(apicThreadInfo));
00875 unsigned nApics = 0;
00876 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
00877
00878
00879
00880 if (! KMP_CPU_ISSET(i, fullMask)) {
00881 continue;
00882 }
00883 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
00884
00885 __kmp_affinity_bind_thread(i);
00886 threadInfo[nApics].osId = i;
00887
00888
00889
00890
00891 kmp_cpuid buf;
00892 __kmp_x86_cpuid(1, 0, &buf);
00893 if (! (buf.edx >> 9) & 1) {
00894 __kmp_set_system_affinity(oldMask, TRUE);
00895 __kmp_free(threadInfo);
00896 KMP_CPU_FREE(oldMask);
00897 *msg_id = kmp_i18n_str_ApicNotPresent;
00898 return -1;
00899 }
00900 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
00901 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
00902 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
00903 threadInfo[nApics].maxThreadsPerPkg = 1;
00904 }
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914 __kmp_x86_cpuid(0, 0, &buf);
00915 if (buf.eax >= 4) {
00916 __kmp_x86_cpuid(4, 0, &buf);
00917 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
00918 }
00919 else {
00920 threadInfo[nApics].maxCoresPerPkg = 1;
00921 }
00922
00923
00924
00925
00926
00927 int widthCT = __kmp_cpuid_mask_width(
00928 threadInfo[nApics].maxThreadsPerPkg);
00929 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
00930
00931 int widthC = __kmp_cpuid_mask_width(
00932 threadInfo[nApics].maxCoresPerPkg);
00933 int widthT = widthCT - widthC;
00934 if (widthT < 0) {
00935
00936
00937
00938
00939
00940 __kmp_set_system_affinity(oldMask, TRUE);
00941 __kmp_free(threadInfo);
00942 KMP_CPU_FREE(oldMask);
00943 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
00944 return -1;
00945 }
00946
00947 int maskC = (1 << widthC) - 1;
00948 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
00949 &maskC;
00950
00951 int maskT = (1 << widthT) - 1;
00952 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
00953
00954 nApics++;
00955 }
00956
00957
00958
00959
00960
00961 __kmp_set_system_affinity(oldMask, TRUE);
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973 KMP_ASSERT(nApics > 0);
00974 if (nApics == 1) {
00975 __kmp_ncores = nPackages = 1;
00976 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
00977 __kmp_ht_enabled = FALSE;
00978 if (__kmp_affinity_verbose) {
00979 char buf[KMP_AFFIN_MASK_PRINT_LEN];
00980 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
00981
00982 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
00983 if (__kmp_affinity_respect_mask) {
00984 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
00985 } else {
00986 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
00987 }
00988 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
00989 KMP_INFORM(Uniform, "KMP_AFFINITY");
00990 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
00991 __kmp_nThreadsPerCore, __kmp_ncores);
00992 }
00993
00994 if (__kmp_affinity_type == affinity_none) {
00995 __kmp_free(threadInfo);
00996 KMP_CPU_FREE(oldMask);
00997 return 0;
00998 }
00999
01000 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
01001 Address addr(1);
01002 addr.labels[0] = threadInfo[0].pkgId;
01003 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
01004
01005 if (__kmp_affinity_gran_levels < 0) {
01006 __kmp_affinity_gran_levels = 0;
01007 }
01008
01009 if (__kmp_affinity_verbose) {
01010 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
01011 }
01012
01013 __kmp_free(threadInfo);
01014 KMP_CPU_FREE(oldMask);
01015 return 1;
01016 }
01017
01018
01019
01020
01021 qsort(threadInfo, nApics, sizeof(*threadInfo),
01022 __kmp_affinity_cmp_apicThreadInfo_phys_id);
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039 nPackages = 1;
01040 nCoresPerPkg = 1;
01041 __kmp_nThreadsPerCore = 1;
01042 unsigned nCores = 1;
01043
01044 unsigned pkgCt = 1;
01045 unsigned lastPkgId = threadInfo[0].pkgId;
01046 unsigned coreCt = 1;
01047 unsigned lastCoreId = threadInfo[0].coreId;
01048 unsigned threadCt = 1;
01049 unsigned lastThreadId = threadInfo[0].threadId;
01050
01051
01052 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
01053 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
01054
01055 for (i = 1; i < nApics; i++) {
01056 if (threadInfo[i].pkgId != lastPkgId) {
01057 nCores++;
01058 pkgCt++;
01059 lastPkgId = threadInfo[i].pkgId;
01060 if (coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
01061 coreCt = 1;
01062 lastCoreId = threadInfo[i].coreId;
01063 if (threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
01064 threadCt = 1;
01065 lastThreadId = threadInfo[i].threadId;
01066
01067
01068
01069
01070
01071
01072 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
01073 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
01074 continue;
01075 }
01076
01077 if (threadInfo[i].coreId != lastCoreId) {
01078 nCores++;
01079 coreCt++;
01080 lastCoreId = threadInfo[i].coreId;
01081 if (threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
01082 threadCt = 1;
01083 lastThreadId = threadInfo[i].threadId;
01084 }
01085 else if (threadInfo[i].threadId != lastThreadId) {
01086 threadCt++;
01087 lastThreadId = threadInfo[i].threadId;
01088 }
01089 else {
01090 __kmp_free(threadInfo);
01091 KMP_CPU_FREE(oldMask);
01092 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
01093 return -1;
01094 }
01095
01096
01097
01098
01099
01100 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
01101 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
01102 __kmp_free(threadInfo);
01103 KMP_CPU_FREE(oldMask);
01104 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
01105 return -1;
01106 }
01107 }
01108 nPackages = pkgCt;
01109 if (coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
01110 if (threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
01111
01112
01113
01114
01115
01116
01117
01118 __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
01119 __kmp_ncores = nCores;
01120 if (__kmp_affinity_verbose) {
01121 char buf[KMP_AFFIN_MASK_PRINT_LEN];
01122 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
01123
01124 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
01125 if (__kmp_affinity_respect_mask) {
01126 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
01127 } else {
01128 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
01129 }
01130 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01131 if (__kmp_affinity_uniform_topology()) {
01132 KMP_INFORM(Uniform, "KMP_AFFINITY");
01133 } else {
01134 KMP_INFORM(NonUniform, "KMP_AFFINITY");
01135 }
01136 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
01137 __kmp_nThreadsPerCore, __kmp_ncores);
01138
01139 }
01140
01141 if (__kmp_affinity_type == affinity_none) {
01142 __kmp_free(threadInfo);
01143 KMP_CPU_FREE(oldMask);
01144 return 0;
01145 }
01146
01147
01148
01149
01150
01151
01152 int pkgLevel = 0;
01153 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
01154 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
01155 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
01156
01157 KMP_ASSERT(depth > 0);
01158 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
01159
01160 for (i = 0; i < nApics; ++i) {
01161 Address addr(depth);
01162 unsigned os = threadInfo[i].osId;
01163 int d = 0;
01164
01165 if (pkgLevel >= 0) {
01166 addr.labels[d++] = threadInfo[i].pkgId;
01167 }
01168 if (coreLevel >= 0) {
01169 addr.labels[d++] = threadInfo[i].coreId;
01170 }
01171 if (threadLevel >= 0) {
01172 addr.labels[d++] = threadInfo[i].threadId;
01173 }
01174 (*address2os)[i] = AddrUnsPair(addr, os);
01175 }
01176
01177 if (__kmp_affinity_gran_levels < 0) {
01178
01179
01180
01181
01182 __kmp_affinity_gran_levels = 0;
01183 if ((threadLevel >= 0)
01184 && (__kmp_affinity_gran > affinity_gran_thread)) {
01185 __kmp_affinity_gran_levels++;
01186 }
01187 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
01188 __kmp_affinity_gran_levels++;
01189 }
01190 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
01191 __kmp_affinity_gran_levels++;
01192 }
01193 }
01194
01195 if (__kmp_affinity_verbose) {
01196 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
01197 coreLevel, threadLevel);
01198 }
01199
01200 __kmp_free(threadInfo);
01201 KMP_CPU_FREE(oldMask);
01202 return depth;
01203 }
01204
01205
01206
01207
01208
01209
01210
01211 static int
01212 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
01213 kmp_i18n_id_t *const msg_id)
01214 {
01215 kmp_cpuid buf;
01216
01217 *address2os = NULL;
01218 *msg_id = kmp_i18n_null;
01219
01220
01221
01222
01223 __kmp_x86_cpuid(0, 0, &buf);
01224 if (buf.eax < 11) {
01225 *msg_id = kmp_i18n_str_NoLeaf11Support;
01226 return -1;
01227 }
01228 __kmp_x86_cpuid(11, 0, &buf);
01229 if (buf.ebx == 0) {
01230 *msg_id = kmp_i18n_str_NoLeaf11Support;
01231 return -1;
01232 }
01233
01234
01235
01236
01237
01238
01239
01240 int level;
01241 int threadLevel = -1;
01242 int coreLevel = -1;
01243 int pkgLevel = -1;
01244 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
01245
01246 for (level = 0;; level++) {
01247 if (level > 31) {
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01260 return -1;
01261 }
01262 __kmp_x86_cpuid(11, level, &buf);
01263 if (buf.ebx == 0) {
01264 if (pkgLevel < 0) {
01265
01266
01267
01268 pkgLevel = level;
01269 level++;
01270 }
01271 break;
01272 }
01273 int kind = (buf.ecx >> 8) & 0xff;
01274 if (kind == 1) {
01275
01276
01277
01278 threadLevel = level;
01279 coreLevel = -1;
01280 pkgLevel = -1;
01281 __kmp_nThreadsPerCore = buf.ebx & 0xff;
01282 if (__kmp_nThreadsPerCore == 0) {
01283 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01284 return -1;
01285 }
01286 }
01287 else if (kind == 2) {
01288
01289
01290
01291 coreLevel = level;
01292 pkgLevel = -1;
01293 nCoresPerPkg = buf.ebx & 0xff;
01294 if (nCoresPerPkg == 0) {
01295 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01296 return -1;
01297 }
01298 }
01299 else {
01300 if (level <= 0) {
01301 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01302 return -1;
01303 }
01304 if (pkgLevel >= 0) {
01305 continue;
01306 }
01307 pkgLevel = level;
01308 nPackages = buf.ebx & 0xff;
01309 if (nPackages == 0) {
01310 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
01311 return -1;
01312 }
01313 }
01314 }
01315 int depth = level;
01316
01317
01318
01319
01320
01321
01322
01323 if (threadLevel >= 0) {
01324 threadLevel = depth - threadLevel - 1;
01325 }
01326 if (coreLevel >= 0) {
01327 coreLevel = depth - coreLevel - 1;
01328 }
01329 KMP_DEBUG_ASSERT(pkgLevel >= 0);
01330 pkgLevel = depth - pkgLevel - 1;
01331
01332
01333
01334
01335
01336
01337
01338
01339 if (! KMP_AFFINITY_CAPABLE())
01340 {
01341
01342
01343
01344
01345 KMP_ASSERT(__kmp_affinity_type == affinity_none);
01346
01347 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
01348 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
01349 __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
01350 if (__kmp_affinity_verbose) {
01351 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
01352 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01353 if (__kmp_affinity_uniform_topology()) {
01354 KMP_INFORM(Uniform, "KMP_AFFINITY");
01355 } else {
01356 KMP_INFORM(NonUniform, "KMP_AFFINITY");
01357 }
01358 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
01359 __kmp_nThreadsPerCore, __kmp_ncores);
01360 }
01361 return 0;
01362 }
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374 kmp_affin_mask_t *oldMask;
01375 KMP_CPU_ALLOC(oldMask);
01376 __kmp_get_system_affinity(oldMask, TRUE);
01377
01378
01379
01380
01381 AddrUnsPair *retval = (AddrUnsPair *)
01382 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
01383
01384
01385
01386
01387
01388 int proc;
01389 int nApics = 0;
01390 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
01391
01392
01393
01394 if (! KMP_CPU_ISSET(proc, fullMask)) {
01395 continue;
01396 }
01397 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
01398
01399 __kmp_affinity_bind_thread(proc);
01400
01401
01402
01403
01404
01405 Address addr(depth);
01406 int prev_shift = 0;
01407
01408 for (level = 0; level < depth; level++) {
01409 __kmp_x86_cpuid(11, level, &buf);
01410 unsigned apicId = buf.edx;
01411 if (buf.ebx == 0) {
01412 if (level != depth - 1) {
01413 KMP_CPU_FREE(oldMask);
01414 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
01415 return -1;
01416 }
01417 addr.labels[depth - level - 1] = apicId >> prev_shift;
01418 level++;
01419 break;
01420 }
01421 int shift = buf.eax & 0x1f;
01422 int mask = (1 << shift) - 1;
01423 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
01424 prev_shift = shift;
01425 }
01426 if (level != depth) {
01427 KMP_CPU_FREE(oldMask);
01428 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
01429 return -1;
01430 }
01431
01432 retval[nApics] = AddrUnsPair(addr, proc);
01433 nApics++;
01434 }
01435
01436
01437
01438
01439
01440 __kmp_set_system_affinity(oldMask, TRUE);
01441
01442
01443
01444
01445 KMP_ASSERT(nApics > 0);
01446 if (nApics == 1) {
01447 __kmp_ncores = nPackages = 1;
01448 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
01449 __kmp_ht_enabled = FALSE;
01450 if (__kmp_affinity_verbose) {
01451 char buf[KMP_AFFIN_MASK_PRINT_LEN];
01452 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
01453
01454 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
01455 if (__kmp_affinity_respect_mask) {
01456 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
01457 } else {
01458 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
01459 }
01460 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01461 KMP_INFORM(Uniform, "KMP_AFFINITY");
01462 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
01463 __kmp_nThreadsPerCore, __kmp_ncores);
01464 }
01465
01466 if (__kmp_affinity_type == affinity_none) {
01467 __kmp_free(retval);
01468 KMP_CPU_FREE(oldMask);
01469 return 0;
01470 }
01471
01472
01473
01474
01475 Address addr(1);
01476 addr.labels[0] = retval[0].first.labels[pkgLevel];
01477 retval[0].first = addr;
01478
01479 if (__kmp_affinity_gran_levels < 0) {
01480 __kmp_affinity_gran_levels = 0;
01481 }
01482
01483 if (__kmp_affinity_verbose) {
01484 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
01485 }
01486
01487 *address2os = retval;
01488 KMP_CPU_FREE(oldMask);
01489 return 1;
01490 }
01491
01492
01493
01494
01495 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
01496
01497
01498
01499
01500 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01501 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01502 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01503 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
01504 for (level = 0; level < depth; level++) {
01505 totals[level] = 1;
01506 maxCt[level] = 1;
01507 counts[level] = 1;
01508 last[level] = retval[0].first.labels[level];
01509 }
01510
01511
01512
01513
01514
01515
01516
01517 for (proc = 1; proc < nApics; proc++) {
01518 int level;
01519 for (level = 0; level < depth; level++) {
01520 if (retval[proc].first.labels[level] != last[level]) {
01521 unsigned j;
01522 for (j = level + 1; j < depth; j++) {
01523 totals[j]++;
01524 counts[j] = 1;
01525
01526
01527
01528
01529
01530
01531
01532 last[j] = retval[proc].first.labels[j];
01533 }
01534 totals[level]++;
01535 counts[level]++;
01536 if (counts[level] > maxCt[level]) {
01537 maxCt[level] = counts[level];
01538 }
01539 last[level] = retval[proc].first.labels[level];
01540 break;
01541 }
01542 else if (level == depth - 1) {
01543 __kmp_free(last);
01544 __kmp_free(maxCt);
01545 __kmp_free(counts);
01546 __kmp_free(totals);
01547 __kmp_free(retval);
01548 KMP_CPU_FREE(oldMask);
01549 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
01550 return -1;
01551 }
01552 }
01553 }
01554
01555
01556
01557
01558
01559
01560
01561 if (threadLevel >= 0) {
01562 __kmp_nThreadsPerCore = maxCt[threadLevel];
01563 }
01564 else {
01565 __kmp_nThreadsPerCore = 1;
01566 }
01567 __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
01568
01569 nPackages = totals[pkgLevel];
01570
01571 if (coreLevel >= 0) {
01572 __kmp_ncores = totals[coreLevel];
01573 nCoresPerPkg = maxCt[coreLevel];
01574 }
01575 else {
01576 __kmp_ncores = nPackages;
01577 nCoresPerPkg = 1;
01578 }
01579
01580
01581
01582
01583 unsigned prod = maxCt[0];
01584 for (level = 1; level < depth; level++) {
01585 prod *= maxCt[level];
01586 }
01587 bool uniform = (prod == totals[level - 1]);
01588
01589
01590
01591
01592 if (__kmp_affinity_verbose) {
01593 char mask[KMP_AFFIN_MASK_PRINT_LEN];
01594 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
01595
01596 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
01597 if (__kmp_affinity_respect_mask) {
01598 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
01599 } else {
01600 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
01601 }
01602 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
01603 if (uniform) {
01604 KMP_INFORM(Uniform, "KMP_AFFINITY");
01605 } else {
01606 KMP_INFORM(NonUniform, "KMP_AFFINITY");
01607 }
01608
01609 kmp_str_buf_t buf;
01610 __kmp_str_buf_init(&buf);
01611
01612 __kmp_str_buf_print(&buf, "%d", totals[0]);
01613 for (level = 1; level <= pkgLevel; level++) {
01614 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
01615 }
01616 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
01617 __kmp_nThreadsPerCore, __kmp_ncores);
01618
01619 __kmp_str_buf_free(&buf);
01620 }
01621
01622 if (__kmp_affinity_type == affinity_none) {
01623 __kmp_free(last);
01624 __kmp_free(maxCt);
01625 __kmp_free(counts);
01626 __kmp_free(totals);
01627 __kmp_free(retval);
01628 KMP_CPU_FREE(oldMask);
01629 return 0;
01630 }
01631
01632
01633
01634
01635
01636 int new_depth = 0;
01637 for (level = 0; level < depth; level++) {
01638 if ((maxCt[level] == 1) && (level != pkgLevel)) {
01639 continue;
01640 }
01641 new_depth++;
01642 }
01643
01644
01645
01646
01647
01648 if (new_depth != depth) {
01649 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
01650 sizeof(AddrUnsPair) * nApics);
01651 for (proc = 0; proc < nApics; proc++) {
01652 Address addr(new_depth);
01653 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
01654 }
01655 int new_level = 0;
01656 for (level = 0; level < depth; level++) {
01657 if ((maxCt[level] == 1) && (level != pkgLevel)) {
01658 if (level == threadLevel) {
01659 threadLevel = -1;
01660 }
01661 else if ((threadLevel >= 0) && (level < threadLevel)) {
01662 threadLevel--;
01663 }
01664 if (level == coreLevel) {
01665 coreLevel = -1;
01666 }
01667 else if ((coreLevel >= 0) && (level < coreLevel)) {
01668 coreLevel--;
01669 }
01670 if (level < pkgLevel) {
01671 pkgLevel--;
01672 }
01673 continue;
01674 }
01675 for (proc = 0; proc < nApics; proc++) {
01676 new_retval[proc].first.labels[new_level]
01677 = retval[proc].first.labels[level];
01678 }
01679 new_level++;
01680 }
01681
01682 __kmp_free(retval);
01683 retval = new_retval;
01684 depth = new_depth;
01685 }
01686
01687 if (__kmp_affinity_gran_levels < 0) {
01688
01689
01690
01691
01692 __kmp_affinity_gran_levels = 0;
01693 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
01694 __kmp_affinity_gran_levels++;
01695 }
01696 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
01697 __kmp_affinity_gran_levels++;
01698 }
01699 if (__kmp_affinity_gran > affinity_gran_package) {
01700 __kmp_affinity_gran_levels++;
01701 }
01702 }
01703
01704 if (__kmp_affinity_verbose) {
01705 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
01706 coreLevel, threadLevel);
01707 }
01708
01709 __kmp_free(last);
01710 __kmp_free(maxCt);
01711 __kmp_free(counts);
01712 __kmp_free(totals);
01713 KMP_CPU_FREE(oldMask);
01714 *address2os = retval;
01715 return depth;
01716 }
01717
01718
01719 # endif
01720
01721
01722 #define osIdIndex 0
01723 #define threadIdIndex 1
01724 #define coreIdIndex 2
01725 #define pkgIdIndex 3
01726 #define nodeIdIndex 4
01727
01728 typedef unsigned *ProcCpuInfo;
01729 static unsigned maxIndex = pkgIdIndex;
01730
01731
01732 static int
01733 __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
01734 {
01735 const unsigned *aa = (const unsigned *)a;
01736 const unsigned *bb = (const unsigned *)b;
01737 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
01738 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
01739 return 0;
01740 };
01741
01742
01743 static int
01744 __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
01745 {
01746 unsigned i;
01747 const unsigned *aa = *((const unsigned **)a);
01748 const unsigned *bb = *((const unsigned **)b);
01749 for (i = maxIndex; ; i--) {
01750 if (aa[i] < bb[i]) return -1;
01751 if (aa[i] > bb[i]) return 1;
01752 if (i == osIdIndex) break;
01753 }
01754 return 0;
01755 }
01756
01757
01758
01759
01760
01761
01762 static int
01763 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
01764 kmp_i18n_id_t *const msg_id, FILE *f)
01765 {
01766 *address2os = NULL;
01767 *msg_id = kmp_i18n_null;
01768
01769
01770
01771
01772
01773 char buf[256];
01774 unsigned num_records = 0;
01775 while (! feof(f)) {
01776 buf[sizeof(buf) - 1] = 1;
01777 if (! fgets(buf, sizeof(buf), f)) {
01778
01779
01780
01781 break;
01782 }
01783
01784 char s1[] = "processor";
01785 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
01786 num_records++;
01787 continue;
01788 }
01789
01790
01791
01792
01793 int level;
01794 if (sscanf(buf, "node_%d id", &level) == 1) {
01795 if (nodeIdIndex + level >= maxIndex) {
01796 maxIndex = nodeIdIndex + level;
01797 }
01798 continue;
01799 }
01800 }
01801
01802
01803
01804
01805
01806
01807 if (num_records == 0) {
01808 *line = 0;
01809 *msg_id = kmp_i18n_str_NoProcRecords;
01810 return -1;
01811 }
01812 if (num_records > __kmp_xproc) {
01813 *line = 0;
01814 *msg_id = kmp_i18n_str_TooManyProcRecords;
01815 return -1;
01816 }
01817
01818
01819
01820
01821
01822
01823
01824
01825 if (fseek(f, 0, SEEK_SET) != 0) {
01826 *line = 0;
01827 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
01828 return -1;
01829 }
01830
01831
01832
01833
01834
01835 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
01836 * sizeof(unsigned *));
01837 unsigned i;
01838 for (i = 0; i <= num_records; i++) {
01839 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
01840 * sizeof(unsigned));
01841 }
01842
01843 #define CLEANUP_THREAD_INFO \
01844 for (i = 0; i <= num_records; i++) { \
01845 __kmp_free(threadInfo[i]); \
01846 } \
01847 __kmp_free(threadInfo);
01848
01849
01850
01851
01852 unsigned __index;
01853
01854 #define INIT_PROC_INFO(p) \
01855 for (__index = 0; __index <= maxIndex; __index++) { \
01856 (p)[__index] = UINT_MAX; \
01857 }
01858
01859 for (i = 0; i <= num_records; i++) {
01860 INIT_PROC_INFO(threadInfo[i]);
01861 }
01862
01863 unsigned num_avail = 0;
01864 *line = 0;
01865 while (! feof(f)) {
01866
01867
01868
01869
01870
01871
01872 {
01873 buf[sizeof(buf) - 1] = 1;
01874 bool long_line = false;
01875 if (! fgets(buf, sizeof(buf), f)) {
01876
01877
01878
01879
01880
01881
01882 bool valid = false;
01883 for (i = 0; i <= maxIndex; i++) {
01884 if (threadInfo[num_avail][i] != UINT_MAX) {
01885 valid = true;
01886 }
01887 }
01888 if (! valid) {
01889 break;
01890 }
01891 buf[0] = 0;
01892 } else if (!buf[sizeof(buf) - 1]) {
01893
01894
01895
01896
01897 long_line = true;
01898
01899 #define CHECK_LINE \
01900 if (long_line) { \
01901 CLEANUP_THREAD_INFO; \
01902 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
01903 return -1; \
01904 }
01905 }
01906 (*line)++;
01907
01908 char s1[] = "processor";
01909 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
01910 CHECK_LINE;
01911 char *p = strchr(buf + sizeof(s1) - 1, ':');
01912 unsigned val;
01913 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01914 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
01915 threadInfo[num_avail][osIdIndex] = val;
01916 continue;
01917 }
01918 char s2[] = "physical id";
01919 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
01920 CHECK_LINE;
01921 char *p = strchr(buf + sizeof(s2) - 1, ':');
01922 unsigned val;
01923 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01924 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
01925 threadInfo[num_avail][pkgIdIndex] = val;
01926 continue;
01927 }
01928 char s3[] = "core id";
01929 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
01930 CHECK_LINE;
01931 char *p = strchr(buf + sizeof(s3) - 1, ':');
01932 unsigned val;
01933 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01934 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
01935 threadInfo[num_avail][coreIdIndex] = val;
01936 continue;
01937 }
01938 char s4[] = "thread id";
01939 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
01940 CHECK_LINE;
01941 char *p = strchr(buf + sizeof(s4) - 1, ':');
01942 unsigned val;
01943 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01944 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
01945 threadInfo[num_avail][threadIdIndex] = val;
01946 continue;
01947 }
01948 int level;
01949 if (sscanf(buf, "node_%d id", &level) == 1) {
01950 CHECK_LINE;
01951 char *p = strchr(buf + sizeof(s4) - 1, ':');
01952 unsigned val;
01953 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
01954 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
01955 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
01956 threadInfo[num_avail][nodeIdIndex + level] = val;
01957 continue;
01958 }
01959
01960
01961
01962
01963
01964
01965 if ((*buf != 0) && (*buf != '\n')) {
01966
01967
01968
01969
01970 if (long_line) {
01971 int ch;
01972 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
01973 }
01974 continue;
01975 }
01976
01977
01978
01979
01980
01981 if (num_avail == __kmp_xproc) {
01982 CLEANUP_THREAD_INFO;
01983 *msg_id = kmp_i18n_str_TooManyEntries;
01984 return -1;
01985 }
01986
01987
01988
01989
01990
01991 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
01992 CLEANUP_THREAD_INFO;
01993 *msg_id = kmp_i18n_str_MissingProcField;
01994 return -1;
01995 }
01996 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
01997 CLEANUP_THREAD_INFO;
01998 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
01999 return -1;
02000 }
02001
02002
02003
02004
02005 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
02006 INIT_PROC_INFO(threadInfo[num_avail]);
02007 continue;
02008 }
02009
02010
02011
02012
02013
02014 num_avail++;
02015 KMP_ASSERT(num_avail <= num_records);
02016 INIT_PROC_INFO(threadInfo[num_avail]);
02017 }
02018 continue;
02019
02020 no_val:
02021 CLEANUP_THREAD_INFO;
02022 *msg_id = kmp_i18n_str_MissingValCpuinfo;
02023 return -1;
02024
02025 dup_field:
02026 CLEANUP_THREAD_INFO;
02027 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
02028 return -1;
02029 }
02030 *line = 0;
02031
02032 # if KMP_MIC && REDUCE_TEAM_SIZE
02033 unsigned teamSize = 0;
02034 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02035
02036
02037
02038
02039
02040
02041
02042
02043
02044
02045
02046
02047
02048 KMP_ASSERT(num_avail > 0);
02049 KMP_ASSERT(num_avail <= num_records);
02050 if (num_avail == 1) {
02051 __kmp_ncores = 1;
02052 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
02053 __kmp_ht_enabled = FALSE;
02054 if (__kmp_affinity_verbose) {
02055 if (! KMP_AFFINITY_CAPABLE()) {
02056 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
02057 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02058 KMP_INFORM(Uniform, "KMP_AFFINITY");
02059 }
02060 else {
02061 char buf[KMP_AFFIN_MASK_PRINT_LEN];
02062 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
02063 fullMask);
02064 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
02065 if (__kmp_affinity_respect_mask) {
02066 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
02067 } else {
02068 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
02069 }
02070 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02071 KMP_INFORM(Uniform, "KMP_AFFINITY");
02072 }
02073 int index;
02074 kmp_str_buf_t buf;
02075 __kmp_str_buf_init(&buf);
02076 __kmp_str_buf_print(&buf, "1");
02077 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
02078 __kmp_str_buf_print(&buf, " x 1");
02079 }
02080 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
02081 __kmp_str_buf_free(&buf);
02082 }
02083
02084 if (__kmp_affinity_type == affinity_none) {
02085 CLEANUP_THREAD_INFO;
02086 return 0;
02087 }
02088
02089 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
02090 Address addr(1);
02091 addr.labels[0] = threadInfo[0][pkgIdIndex];
02092 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
02093
02094 if (__kmp_affinity_gran_levels < 0) {
02095 __kmp_affinity_gran_levels = 0;
02096 }
02097
02098 if (__kmp_affinity_verbose) {
02099 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
02100 }
02101
02102 CLEANUP_THREAD_INFO;
02103 return 1;
02104 }
02105
02106
02107
02108
02109 qsort(threadInfo, num_avail, sizeof(*threadInfo),
02110 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
02111
02112
02113
02114
02115
02116
02117
02118
02119
02120
02121
02122
02123 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
02124 * sizeof(unsigned));
02125 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
02126 * sizeof(unsigned));
02127 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
02128 * sizeof(unsigned));
02129 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
02130 * sizeof(unsigned));
02131
02132 bool assign_thread_ids = false;
02133 int threadIdCt;
02134 int index;
02135
02136 restart_radix_check:
02137 threadIdCt = 0;
02138
02139
02140
02141
02142 if (assign_thread_ids) {
02143 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
02144 threadInfo[0][threadIdIndex] = threadIdCt++;
02145 }
02146 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
02147 threadIdCt = threadInfo[0][threadIdIndex] + 1;
02148 }
02149 }
02150 for (index = 0; index <= maxIndex; index++) {
02151 counts[index] = 1;
02152 maxCt[index] = 1;
02153 totals[index] = 1;
02154 lastId[index] = threadInfo[0][index];;
02155 }
02156
02157
02158
02159
02160 for (i = 1; i < num_avail; i++) {
02161
02162
02163
02164
02165 for (index = maxIndex; index >= threadIdIndex; index--) {
02166 if (assign_thread_ids && (index == threadIdIndex)) {
02167
02168
02169
02170 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
02171 threadInfo[i][threadIdIndex] = threadIdCt++;
02172 }
02173
02174
02175
02176
02177
02178
02179 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
02180 threadIdCt = threadInfo[i][threadIdIndex] + 1;
02181 }
02182 }
02183 if (threadInfo[i][index] != lastId[index]) {
02184
02185
02186
02187
02188
02189
02190
02191 int index2;
02192 for (index2 = threadIdIndex; index2 < index; index2++) {
02193 totals[index2]++;
02194 if (counts[index2] > maxCt[index2]) {
02195 maxCt[index2] = counts[index2];
02196 }
02197 counts[index2] = 1;
02198 lastId[index2] = threadInfo[i][index2];
02199 }
02200 counts[index]++;
02201 totals[index]++;
02202 lastId[index] = threadInfo[i][index];
02203
02204 if (assign_thread_ids && (index > threadIdIndex)) {
02205
02206 # if KMP_MIC && REDUCE_TEAM_SIZE
02207
02208
02209
02210
02211 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
02212 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02213
02214
02215
02216
02217 threadIdCt = 0;
02218
02219
02220
02221
02222 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
02223 threadInfo[i][threadIdIndex] = threadIdCt++;
02224 }
02225
02226
02227
02228
02229
02230
02231 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
02232 threadIdCt = threadInfo[i][threadIdIndex] + 1;
02233 }
02234 }
02235 break;
02236 }
02237 }
02238 if (index < threadIdIndex) {
02239
02240
02241
02242
02243
02244 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
02245 || assign_thread_ids) {
02246 __kmp_free(lastId);
02247 __kmp_free(totals);
02248 __kmp_free(maxCt);
02249 __kmp_free(counts);
02250 CLEANUP_THREAD_INFO;
02251 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
02252 return -1;
02253 }
02254
02255
02256
02257
02258
02259
02260 assign_thread_ids = true;
02261 goto restart_radix_check;
02262 }
02263 }
02264
02265 # if KMP_MIC && REDUCE_TEAM_SIZE
02266
02267
02268
02269
02270 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
02271 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02272
02273 for (index = threadIdIndex; index <= maxIndex; index++) {
02274 if (counts[index] > maxCt[index]) {
02275 maxCt[index] = counts[index];
02276 }
02277 }
02278
02279 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
02280 nCoresPerPkg = maxCt[coreIdIndex];
02281 nPackages = totals[pkgIdIndex];
02282
02283
02284
02285
02286 unsigned prod = totals[maxIndex];
02287 for (index = threadIdIndex; index < maxIndex; index++) {
02288 prod *= maxCt[index];
02289 }
02290 bool uniform = (prod == totals[threadIdIndex]);
02291
02292
02293
02294
02295
02296
02297
02298 __kmp_ht_enabled = (maxCt[threadIdIndex] > 1);
02299 __kmp_ncores = totals[coreIdIndex];
02300
02301 if (__kmp_affinity_verbose) {
02302 if (! KMP_AFFINITY_CAPABLE()) {
02303 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
02304 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02305 if (uniform) {
02306 KMP_INFORM(Uniform, "KMP_AFFINITY");
02307 } else {
02308 KMP_INFORM(NonUniform, "KMP_AFFINITY");
02309 }
02310 }
02311 else {
02312 char buf[KMP_AFFIN_MASK_PRINT_LEN];
02313 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
02314 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
02315 if (__kmp_affinity_respect_mask) {
02316 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
02317 } else {
02318 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
02319 }
02320 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
02321 if (uniform) {
02322 KMP_INFORM(Uniform, "KMP_AFFINITY");
02323 } else {
02324 KMP_INFORM(NonUniform, "KMP_AFFINITY");
02325 }
02326 }
02327 kmp_str_buf_t buf;
02328 __kmp_str_buf_init(&buf);
02329
02330 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
02331 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
02332 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
02333 }
02334 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
02335 maxCt[threadIdIndex], __kmp_ncores);
02336
02337 __kmp_str_buf_free(&buf);
02338 }
02339
02340 # if KMP_MIC && REDUCE_TEAM_SIZE
02341
02342
02343
02344 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
02345 __kmp_dflt_team_nth = teamSize;
02346 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
02347 __kmp_dflt_team_nth));
02348 }
02349 # endif // KMP_MIC && REDUCE_TEAM_SIZE
02350
02351 if (__kmp_affinity_type == affinity_none) {
02352 __kmp_free(lastId);
02353 __kmp_free(totals);
02354 __kmp_free(maxCt);
02355 __kmp_free(counts);
02356 CLEANUP_THREAD_INFO;
02357 return 0;
02358 }
02359
02360
02361
02362
02363
02364
02365
02366
02367 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
02368 int level = 0;
02369 for (index = threadIdIndex; index < maxIndex; index++) {
02370 KMP_ASSERT(totals[index] >= totals[index + 1]);
02371 inMap[index] = (totals[index] > totals[index + 1]);
02372 }
02373 inMap[maxIndex] = (totals[maxIndex] > 1);
02374 inMap[pkgIdIndex] = true;
02375
02376 int depth = 0;
02377 for (index = threadIdIndex; index <= maxIndex; index++) {
02378 if (inMap[index]) {
02379 depth++;
02380 }
02381 }
02382 KMP_ASSERT(depth > 0);
02383
02384
02385
02386
02387 *address2os = (AddrUnsPair*)
02388 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
02389 int pkgLevel = -1;
02390 int coreLevel = -1;
02391 int threadLevel = -1;
02392
02393 for (i = 0; i < num_avail; ++i) {
02394 Address addr(depth);
02395 unsigned os = threadInfo[i][osIdIndex];
02396 int src_index;
02397 int dst_index = 0;
02398
02399 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
02400 if (! inMap[src_index]) {
02401 continue;
02402 }
02403 addr.labels[dst_index] = threadInfo[i][src_index];
02404 if (src_index == pkgIdIndex) {
02405 pkgLevel = dst_index;
02406 }
02407 else if (src_index == coreIdIndex) {
02408 coreLevel = dst_index;
02409 }
02410 else if (src_index == threadIdIndex) {
02411 threadLevel = dst_index;
02412 }
02413 dst_index++;
02414 }
02415 (*address2os)[i] = AddrUnsPair(addr, os);
02416 }
02417
02418 if (__kmp_affinity_gran_levels < 0) {
02419
02420
02421
02422
02423 int src_index;
02424 __kmp_affinity_gran_levels = 0;
02425 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
02426 if (! inMap[src_index]) {
02427 continue;
02428 }
02429 switch (src_index) {
02430 case threadIdIndex:
02431 if (__kmp_affinity_gran > affinity_gran_thread) {
02432 __kmp_affinity_gran_levels++;
02433 }
02434
02435 break;
02436 case coreIdIndex:
02437 if (__kmp_affinity_gran > affinity_gran_core) {
02438 __kmp_affinity_gran_levels++;
02439 }
02440 break;
02441
02442 case pkgIdIndex:
02443 if (__kmp_affinity_gran > affinity_gran_package) {
02444 __kmp_affinity_gran_levels++;
02445 }
02446 break;
02447 }
02448 }
02449 }
02450
02451 if (__kmp_affinity_verbose) {
02452 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
02453 coreLevel, threadLevel);
02454 }
02455
02456 __kmp_free(inMap);
02457 __kmp_free(lastId);
02458 __kmp_free(totals);
02459 __kmp_free(maxCt);
02460 __kmp_free(counts);
02461 CLEANUP_THREAD_INFO;
02462 return depth;
02463 }
02464
02465
02466
02467
02468
02469
02470
02471 static kmp_affin_mask_t *
02472 __kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
02473 AddrUnsPair *address2os, unsigned numAddrs)
02474 {
02475
02476
02477
02478 unsigned depth;
02479 unsigned maxOsId;
02480 unsigned i;
02481
02482 KMP_ASSERT(numAddrs > 0);
02483 depth = address2os[0].first.depth;
02484
02485 maxOsId = 0;
02486 for (i = 0; i < numAddrs; i++) {
02487 unsigned osId = address2os[i].second;
02488 if (osId > maxOsId) {
02489 maxOsId = osId;
02490 }
02491 }
02492 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
02493 (maxOsId + 1) * __kmp_affin_mask_size);
02494
02495
02496
02497
02498
02499
02500 qsort(address2os, numAddrs, sizeof(*address2os),
02501 __kmp_affinity_cmp_Address_labels);
02502
02503 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
02504 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
02505 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
02506 }
02507 if (__kmp_affinity_gran_levels >= depth) {
02508 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02509 && (__kmp_affinity_type != affinity_none))) {
02510 KMP_WARNING(AffThreadsMayMigrate);
02511 }
02512 }
02513
02514
02515
02516
02517
02518
02519
02520 unsigned unique = 0;
02521 unsigned j = 0;
02522 unsigned leader = 0;
02523 Address *leaderAddr = &(address2os[0].first);
02524 kmp_affin_mask_t *sum
02525 = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
02526 KMP_CPU_ZERO(sum);
02527 KMP_CPU_SET(address2os[0].second, sum);
02528 for (i = 1; i < numAddrs; i++) {
02529
02530
02531
02532
02533
02534 if (leaderAddr->isClose(address2os[i].first,
02535 __kmp_affinity_gran_levels)) {
02536 KMP_CPU_SET(address2os[i].second, sum);
02537 continue;
02538 }
02539
02540
02541
02542
02543
02544
02545 for (; j < i; j++) {
02546 unsigned osId = address2os[j].second;
02547 KMP_DEBUG_ASSERT(osId <= maxOsId);
02548 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
02549 KMP_CPU_COPY(mask, sum);
02550 address2os[j].first.leader = (j == leader);
02551 }
02552 unique++;
02553
02554
02555
02556
02557 leader = i;
02558 leaderAddr = &(address2os[i].first);
02559 KMP_CPU_ZERO(sum);
02560 KMP_CPU_SET(address2os[i].second, sum);
02561 }
02562
02563
02564
02565
02566
02567 for (; j < i; j++) {
02568 unsigned osId = address2os[j].second;
02569 KMP_DEBUG_ASSERT(osId <= maxOsId);
02570 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
02571 KMP_CPU_COPY(mask, sum);
02572 address2os[j].first.leader = (j == leader);
02573 }
02574 unique++;
02575
02576 *maxIndex = maxOsId;
02577 *numUnique = unique;
02578 return osId2Mask;
02579 }
02580
02581
02582
02583
02584
02585
02586
02587 static kmp_affin_mask_t *newMasks;
02588 static int numNewMasks;
02589 static int nextNewMask;
02590
02591 #define ADD_MASK(_mask) \
02592 { \
02593 if (nextNewMask >= numNewMasks) { \
02594 numNewMasks *= 2; \
02595 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
02596 numNewMasks * __kmp_affin_mask_size); \
02597 } \
02598 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
02599 nextNewMask++; \
02600 }
02601
02602 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
02603 { \
02604 if (((_osId) > _maxOsId) || \
02605 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX(_osId2Mask, (_osId))))) {\
02606 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
02607 && (__kmp_affinity_type != affinity_none))) { \
02608 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
02609 } \
02610 } \
02611 else { \
02612 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
02613 } \
02614 }
02615
02616
02617
02618
02619
02620
02621 static void
02622 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
02623 unsigned int *out_numMasks, const char *proclist,
02624 kmp_affin_mask_t *osId2Mask, int maxOsId)
02625 {
02626 const char *scan = proclist;
02627 const char *next = proclist;
02628
02629
02630
02631
02632
02633 numNewMasks = 2;
02634 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
02635 * __kmp_affin_mask_size);
02636 nextNewMask = 0;
02637 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
02638 __kmp_affin_mask_size);
02639 int setSize = 0;
02640
02641 for (;;) {
02642 int start, end, stride;
02643
02644 SKIP_WS(scan);
02645 next = scan;
02646 if (*next == '\0') {
02647 break;
02648 }
02649
02650 if (*next == '{') {
02651 int num;
02652 setSize = 0;
02653 next++;
02654 SKIP_WS(next);
02655 scan = next;
02656
02657
02658
02659
02660 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
02661 "bad proclist");
02662 SKIP_DIGITS(next);
02663 num = __kmp_str_to_int(scan, *next);
02664 KMP_ASSERT2(num >= 0, "bad explicit proc list");
02665
02666
02667
02668
02669 if ((num > maxOsId) ||
02670 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
02671 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02672 && (__kmp_affinity_type != affinity_none))) {
02673 KMP_WARNING(AffIgnoreInvalidProcID, num);
02674 }
02675 KMP_CPU_ZERO(sumMask);
02676 }
02677 else {
02678 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
02679 setSize = 1;
02680 }
02681
02682 for (;;) {
02683
02684
02685
02686 SKIP_WS(next);
02687 if (*next == '}') {
02688 next++;
02689 break;
02690 }
02691
02692
02693
02694
02695 if (*next == ',') {
02696 next++;
02697 }
02698 SKIP_WS(next);
02699
02700
02701
02702
02703 scan = next;
02704 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
02705 "bad explicit proc list");
02706
02707 SKIP_DIGITS(next);
02708 num = __kmp_str_to_int(scan, *next);
02709 KMP_ASSERT2(num >= 0, "bad explicit proc list");
02710
02711
02712
02713
02714 if ((num > maxOsId) ||
02715 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
02716 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02717 && (__kmp_affinity_type != affinity_none))) {
02718 KMP_WARNING(AffIgnoreInvalidProcID, num);
02719 }
02720 }
02721 else {
02722 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
02723 setSize++;
02724 }
02725 }
02726 if (setSize > 0) {
02727 ADD_MASK(sumMask);
02728 }
02729
02730 SKIP_WS(next);
02731 if (*next == ',') {
02732 next++;
02733 }
02734 scan = next;
02735 continue;
02736 }
02737
02738
02739
02740
02741 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
02742 SKIP_DIGITS(next);
02743 start = __kmp_str_to_int(scan, *next);
02744 KMP_ASSERT2(start >= 0, "bad explicit proc list");
02745 SKIP_WS(next);
02746
02747
02748
02749
02750 if (*next != '-') {
02751 ADD_MASK_OSID(start, osId2Mask, maxOsId);
02752
02753
02754
02755
02756 if (*next == ',') {
02757 next++;
02758 }
02759 scan = next;
02760 continue;
02761 }
02762
02763
02764
02765
02766 next++;
02767 SKIP_WS(next);
02768 scan = next;
02769 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
02770 SKIP_DIGITS(next);
02771 end = __kmp_str_to_int(scan, *next);
02772 KMP_ASSERT2(end >= 0, "bad explicit proc list");
02773
02774
02775
02776
02777 stride = 1;
02778 SKIP_WS(next);
02779 if (*next == ':') {
02780
02781
02782
02783 int sign = +1;
02784 next++;
02785 SKIP_WS(next);
02786 scan = next;
02787 if (*next == '-') {
02788 sign = -1;
02789 next++;
02790 SKIP_WS(next);
02791 scan = next;
02792 }
02793 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
02794 "bad explicit proc list");
02795 SKIP_DIGITS(next);
02796 stride = __kmp_str_to_int(scan, *next);
02797 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
02798 stride *= sign;
02799 }
02800
02801
02802
02803
02804 KMP_ASSERT2(stride != 0, "bad explicit proc list");
02805 if (stride > 0) {
02806 KMP_ASSERT2(start <= end, "bad explicit proc list");
02807 }
02808 else {
02809 KMP_ASSERT2(start >= end, "bad explicit proc list");
02810 }
02811 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
02812
02813
02814
02815
02816 if (stride > 0) {
02817 do {
02818 ADD_MASK_OSID(start, osId2Mask, maxOsId);
02819 start += stride;
02820 } while (start <= end);
02821 }
02822 else {
02823 do {
02824 ADD_MASK_OSID(start, osId2Mask, maxOsId);
02825 start += stride;
02826 } while (start >= end);
02827 }
02828
02829
02830
02831
02832 SKIP_WS(next);
02833 if (*next == ',') {
02834 next++;
02835 }
02836 scan = next;
02837 }
02838
02839 *out_numMasks = nextNewMask;
02840 if (nextNewMask == 0) {
02841 *out_masks = NULL;
02842 KMP_INTERNAL_FREE(newMasks);
02843 return;
02844 }
02845 *out_masks
02846 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
02847 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
02848 __kmp_free(sumMask);
02849 KMP_INTERNAL_FREE(newMasks);
02850 }
02851
02852
02853 # if OMP_40_ENABLED
02854
02855
02856
02857
02858
02859
02860
02861
02862
02863
02864
02865
02866
02867
02868
02869
02870
02871
02872
02873
02874
02875
02876
02877
02878 static void
02879 __kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
02880 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
02881 {
02882 const char *next;
02883
02884 for (;;) {
02885 int start, count, stride, i;
02886
02887
02888
02889
02890 SKIP_WS(*scan);
02891 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
02892 "bad explicit places list");
02893 next = *scan;
02894 SKIP_DIGITS(next);
02895 start = __kmp_str_to_int(*scan, *next);
02896 KMP_ASSERT(start >= 0);
02897 *scan = next;
02898
02899
02900
02901
02902 SKIP_WS(*scan);
02903 if (**scan == '}' || **scan == ',') {
02904 if ((start > maxOsId) ||
02905 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
02906 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02907 && (__kmp_affinity_type != affinity_none))) {
02908 KMP_WARNING(AffIgnoreInvalidProcID, start);
02909 }
02910 }
02911 else {
02912 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
02913 (*setSize)++;
02914 }
02915 if (**scan == '}') {
02916 break;
02917 }
02918 (*scan)++;
02919 continue;
02920 }
02921 KMP_ASSERT2(**scan == ':', "bad explicit places list");
02922 (*scan)++;
02923
02924
02925
02926
02927 SKIP_WS(*scan);
02928 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
02929 "bad explicit places list");
02930 next = *scan;
02931 SKIP_DIGITS(next);
02932 count = __kmp_str_to_int(*scan, *next);
02933 KMP_ASSERT(count >= 0);
02934 *scan = next;
02935
02936
02937
02938
02939 SKIP_WS(*scan);
02940 if (**scan == '}' || **scan == ',') {
02941 for (i = 0; i < count; i++) {
02942 if ((start > maxOsId) ||
02943 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
02944 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
02945 && (__kmp_affinity_type != affinity_none))) {
02946 KMP_WARNING(AffIgnoreInvalidProcID, start);
02947 }
02948 break;
02949 }
02950 else {
02951 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
02952 start++;
02953 (*setSize)++;
02954 }
02955 }
02956 if (**scan == '}') {
02957 break;
02958 }
02959 (*scan)++;
02960 continue;
02961 }
02962 KMP_ASSERT2(**scan == ':', "bad explicit places list");
02963 (*scan)++;
02964
02965
02966
02967
02968 int sign = +1;
02969 for (;;) {
02970 SKIP_WS(*scan);
02971 if (**scan == '+') {
02972 (*scan)++;
02973 continue;
02974 }
02975 if (**scan == '-') {
02976 sign *= -1;
02977 (*scan)++;
02978 continue;
02979 }
02980 break;
02981 }
02982 SKIP_WS(*scan);
02983 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
02984 "bad explicit places list");
02985 next = *scan;
02986 SKIP_DIGITS(next);
02987 stride = __kmp_str_to_int(*scan, *next);
02988 KMP_ASSERT(stride >= 0);
02989 *scan = next;
02990 stride *= sign;
02991
02992
02993
02994
02995 SKIP_WS(*scan);
02996 if (**scan == '}' || **scan == ',') {
02997 for (i = 0; i < count; i++) {
02998 if ((start > maxOsId) ||
02999 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
03000 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03001 && (__kmp_affinity_type != affinity_none))) {
03002 KMP_WARNING(AffIgnoreInvalidProcID, start);
03003 }
03004 break;
03005 }
03006 else {
03007 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
03008 start += stride;
03009 (*setSize)++;
03010 }
03011 }
03012 if (**scan == '}') {
03013 break;
03014 }
03015 (*scan)++;
03016 continue;
03017 }
03018
03019 KMP_ASSERT2(0, "bad explicit places list");
03020 }
03021 }
03022
03023
03024 static void
03025 __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
03026 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
03027 {
03028 const char *next;
03029
03030
03031
03032
03033 SKIP_WS(*scan);
03034 if (**scan == '{') {
03035 (*scan)++;
03036 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
03037 setSize);
03038 KMP_ASSERT2(**scan == '}', "bad explicit places list");
03039 (*scan)++;
03040 }
03041 else if (**scan == '!') {
03042 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
03043 KMP_CPU_COMPLEMENT(tempMask);
03044 (*scan)++;
03045 }
03046 else if ((**scan >= '0') && (**scan <= '9')) {
03047 next = *scan;
03048 SKIP_DIGITS(next);
03049 int num = __kmp_str_to_int(*scan, *next);
03050 KMP_ASSERT(num >= 0);
03051 if ((num > maxOsId) ||
03052 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
03053 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03054 && (__kmp_affinity_type != affinity_none))) {
03055 KMP_WARNING(AffIgnoreInvalidProcID, num);
03056 }
03057 }
03058 else {
03059 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
03060 (*setSize)++;
03061 }
03062 *scan = next;
03063 }
03064 else {
03065 KMP_ASSERT2(0, "bad explicit places list");
03066 }
03067 }
03068
03069
03070
03071 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
03072 unsigned int *out_numMasks, const char *placelist,
03073 kmp_affin_mask_t *osId2Mask, int maxOsId)
03074 {
03075 const char *scan = placelist;
03076 const char *next = placelist;
03077
03078 numNewMasks = 2;
03079 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
03080 * __kmp_affin_mask_size);
03081 nextNewMask = 0;
03082
03083 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
03084 __kmp_affin_mask_size);
03085 KMP_CPU_ZERO(tempMask);
03086 int setSize = 0;
03087
03088 for (;;) {
03089 int start, count, stride;
03090
03091 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
03092
03093
03094
03095
03096 SKIP_WS(scan);
03097 if (*scan == '\0' || *scan == ',') {
03098 if (setSize > 0) {
03099 ADD_MASK(tempMask);
03100 }
03101 KMP_CPU_ZERO(tempMask);
03102 setSize = 0;
03103 if (*scan == '\0') {
03104 break;
03105 }
03106 scan++;
03107 continue;
03108 }
03109
03110 KMP_ASSERT2(*scan == ':', "bad explicit places list");
03111 scan++;
03112
03113
03114
03115
03116 SKIP_WS(scan);
03117 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
03118 "bad explicit places list");
03119 next = scan;
03120 SKIP_DIGITS(next);
03121 count = __kmp_str_to_int(scan, *next);
03122 KMP_ASSERT(count >= 0);
03123 scan = next;
03124
03125
03126
03127
03128 SKIP_WS(scan);
03129 if (*scan == '\0' || *scan == ',') {
03130 int i;
03131 for (i = 0; i < count; i++) {
03132 int j;
03133 if (setSize == 0) {
03134 break;
03135 }
03136 ADD_MASK(tempMask);
03137 setSize = 0;
03138 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j > 0; j--) {
03139
03140
03141
03142
03143 if (KMP_CPU_ISSET(j - stride, tempMask)) {
03144 KMP_CPU_SET(j, tempMask);
03145 setSize++;
03146 }
03147 else {
03148 KMP_CPU_CLR(j, tempMask);
03149 }
03150 }
03151 for (; j >= 0; j--) {
03152 KMP_CPU_CLR(j, tempMask);
03153 }
03154 }
03155 KMP_CPU_ZERO(tempMask);
03156 setSize = 0;
03157
03158 if (*scan == '\0') {
03159 break;
03160 }
03161 scan++;
03162 continue;
03163 }
03164
03165 KMP_ASSERT2(*scan == ':', "bad explicit places list");
03166 scan++;
03167
03168
03169
03170
03171 int sign = +1;
03172 for (;;) {
03173 SKIP_WS(scan);
03174 if (*scan == '+') {
03175 scan++;
03176 continue;
03177 }
03178 if (*scan == '-') {
03179 sign *= -1;
03180 scan++;
03181 continue;
03182 }
03183 break;
03184 }
03185 SKIP_WS(scan);
03186 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
03187 "bad explicit places list");
03188 next = scan;
03189 SKIP_DIGITS(next);
03190 stride = __kmp_str_to_int(scan, *next);
03191 KMP_DEBUG_ASSERT(stride >= 0);
03192 scan = next;
03193 stride *= sign;
03194
03195 if (stride > 0) {
03196 int i;
03197 for (i = 0; i < count; i++) {
03198 int j;
03199 if (setSize == 0) {
03200 break;
03201 }
03202 ADD_MASK(tempMask);
03203 setSize = 0;
03204 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
03205 if (KMP_CPU_ISSET(j - stride, tempMask)) {
03206 KMP_CPU_SET(j, tempMask);
03207 setSize++;
03208 }
03209 else {
03210 KMP_CPU_CLR(j, tempMask);
03211 }
03212 }
03213 for (; j >= 0; j--) {
03214 KMP_CPU_CLR(j, tempMask);
03215 }
03216 }
03217 }
03218 else {
03219 int i;
03220 for (i = 0; i < count; i++) {
03221 int j;
03222 if (setSize == 0) {
03223 break;
03224 }
03225 ADD_MASK(tempMask);
03226 setSize = 0;
03227 for (j = 0; j < (__kmp_affin_mask_size * CHAR_BIT) + stride;
03228 j++) {
03229 if (KMP_CPU_ISSET(j - stride, tempMask)) {
03230 KMP_CPU_SET(j, tempMask);
03231 setSize++;
03232 }
03233 else {
03234 KMP_CPU_CLR(j, tempMask);
03235 }
03236 }
03237 for (; j < __kmp_affin_mask_size * CHAR_BIT; j++) {
03238 KMP_CPU_CLR(j, tempMask);
03239 }
03240 }
03241 }
03242 KMP_CPU_ZERO(tempMask);
03243 setSize = 0;
03244
03245
03246
03247
03248 SKIP_WS(scan);
03249 if (*scan == '\0') {
03250 break;
03251 }
03252 if (*scan == ',') {
03253 scan++;
03254 continue;
03255 }
03256
03257 KMP_ASSERT2(0, "bad explicit places list");
03258 }
03259
03260 *out_numMasks = nextNewMask;
03261 if (nextNewMask == 0) {
03262 *out_masks = NULL;
03263 KMP_INTERNAL_FREE(newMasks);
03264 return;
03265 }
03266 *out_masks
03267 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
03268 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
03269 __kmp_free(tempMask);
03270 KMP_INTERNAL_FREE(newMasks);
03271 }
03272
03273
03274 # endif
03275
03276 # undef ADD_MASK
03277 # undef ADD_MASK_OSID
03278
03279
03280 # if KMP_MIC
03281
03282 static void
03283 __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
03284 {
03285 if ( __kmp_place_num_cores == 0 ) {
03286 if ( __kmp_place_num_threads_per_core == 0 ) {
03287 return;
03288 }
03289 __kmp_place_num_cores = nCoresPerPkg;
03290 }
03291 if ( !__kmp_affinity_uniform_topology() || depth != 3 ) {
03292 KMP_WARNING( AffThrPlaceUnsupported );
03293 return;
03294 }
03295 if ( __kmp_place_num_threads_per_core == 0 ) {
03296 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;
03297 }
03298 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
03299 KMP_WARNING( AffThrPlaceManyCores );
03300 return;
03301 }
03302
03303 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
03304 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
03305 int i, j, k, n_old = 0, n_new = 0;
03306 for ( i = 0; i < nPackages; ++i ) {
03307 for ( j = 0; j < nCoresPerPkg; ++j ) {
03308 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
03309 n_old += __kmp_nThreadsPerCore;
03310 } else {
03311 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
03312 if ( k < __kmp_place_num_threads_per_core ) {
03313 newAddr[n_new] = (*pAddr)[n_old];
03314 n_new++;
03315 }
03316 n_old++;
03317 }
03318 }
03319 }
03320 }
03321 nCoresPerPkg = __kmp_place_num_cores;
03322 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core;
03323 __kmp_avail_proc = n_new;
03324 __kmp_ncores = nPackages * __kmp_place_num_cores;
03325
03326 __kmp_free( *pAddr );
03327 *pAddr = newAddr;
03328 }
03329 # endif
03330
03331 static AddrUnsPair *address2os = NULL;
03332 static int * procarr = NULL;
03333 static int __kmp_aff_depth = 0;
03334
03335 static void
03336 __kmp_aux_affinity_initialize(void)
03337 {
03338 if (__kmp_affinity_masks != NULL) {
03339 KMP_ASSERT(fullMask != NULL);
03340 return;
03341 }
03342
03343
03344
03345
03346
03347
03348
03349 if (fullMask == NULL) {
03350 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
03351 }
03352 if (KMP_AFFINITY_CAPABLE()) {
03353 if (__kmp_affinity_respect_mask) {
03354 __kmp_get_system_affinity(fullMask, TRUE);
03355
03356
03357
03358
03359 unsigned i;
03360 __kmp_avail_proc = 0;
03361 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
03362 if (! KMP_CPU_ISSET(i, fullMask)) {
03363 continue;
03364 }
03365 __kmp_avail_proc++;
03366 }
03367 if (__kmp_avail_proc > __kmp_xproc) {
03368 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03369 && (__kmp_affinity_type != affinity_none))) {
03370 KMP_WARNING(ErrorInitializeAffinity);
03371 }
03372 __kmp_affinity_type = affinity_none;
03373 __kmp_affin_mask_size = 0;
03374 return;
03375 }
03376 }
03377 else {
03378 __kmp_affinity_entire_machine_mask(fullMask);
03379 __kmp_avail_proc = __kmp_xproc;
03380 }
03381 }
03382
03383 int depth = -1;
03384 kmp_i18n_id_t msg_id = kmp_i18n_null;
03385
03386
03387
03388
03389
03390 if ((__kmp_cpuinfo_file != NULL) &&
03391 (__kmp_affinity_top_method == affinity_top_method_all)) {
03392 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
03393 }
03394
03395 if (__kmp_affinity_top_method == affinity_top_method_all) {
03396
03397
03398
03399
03400
03401 const char *file_name = NULL;
03402 int line = 0;
03403
03404 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
03405
03406 if (__kmp_affinity_verbose) {
03407 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
03408 }
03409
03410 file_name = NULL;
03411 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
03412 if (depth == 0) {
03413 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03414 KMP_ASSERT(address2os == NULL);
03415 return;
03416 }
03417
03418 if (depth < 0) {
03419 if ((msg_id != kmp_i18n_null)
03420 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03421 && (__kmp_affinity_type != affinity_none)))) {
03422 # if KMP_MIC
03423 if (__kmp_affinity_verbose) {
03424 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
03425 KMP_I18N_STR(DecodingLegacyAPIC));
03426 }
03427 # else
03428 KMP_WARNING(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
03429 KMP_I18N_STR(DecodingLegacyAPIC));
03430 # endif
03431 }
03432
03433 file_name = NULL;
03434 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
03435 if (depth == 0) {
03436 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03437 KMP_ASSERT(address2os == NULL);
03438 return;
03439 }
03440 }
03441
03442 # endif
03443
03444 # if KMP_OS_LINUX
03445
03446 if (depth < 0) {
03447 if ((msg_id != kmp_i18n_null)
03448 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03449 && (__kmp_affinity_type != affinity_none)))) {
03450 # if KMP_MIC
03451 if (__kmp_affinity_verbose) {
03452 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
03453 }
03454 # else
03455 KMP_WARNING(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
03456 # endif
03457 }
03458 else if (__kmp_affinity_verbose) {
03459 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
03460 }
03461
03462 FILE *f = fopen("/proc/cpuinfo", "r");
03463 if (f == NULL) {
03464 msg_id = kmp_i18n_str_CantOpenCpuinfo;
03465 }
03466 else {
03467 file_name = "/proc/cpuinfo";
03468 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
03469 fclose(f);
03470 if (depth == 0) {
03471 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03472 KMP_ASSERT(address2os == NULL);
03473 return;
03474 }
03475 }
03476 }
03477
03478 # endif
03479
03480 if (depth < 0) {
03481 if (msg_id != kmp_i18n_null
03482 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03483 && (__kmp_affinity_type != affinity_none)))) {
03484 if (file_name == NULL) {
03485 KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
03486 }
03487 else if (line == 0) {
03488 KMP_WARNING(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
03489 }
03490 else {
03491 KMP_WARNING(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
03492 }
03493 }
03494
03495 file_name = "";
03496 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
03497 if (depth == 0) {
03498 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03499 KMP_ASSERT(address2os == NULL);
03500 return;
03501 }
03502 KMP_ASSERT(depth > 0);
03503 KMP_ASSERT(address2os != NULL);
03504 }
03505 }
03506
03507
03508
03509
03510
03511
03512
03513 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
03514
03515 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
03516 if (__kmp_affinity_verbose) {
03517 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
03518 KMP_I18N_STR(Decodingx2APIC));
03519 }
03520
03521 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
03522 if (depth == 0) {
03523 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03524 KMP_ASSERT(address2os == NULL);
03525 return;
03526 }
03527
03528 if (depth < 0) {
03529 KMP_ASSERT(msg_id != kmp_i18n_null);
03530 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
03531 }
03532 }
03533 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
03534 if (__kmp_affinity_verbose) {
03535 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
03536 KMP_I18N_STR(DecodingLegacyAPIC));
03537 }
03538
03539 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
03540 if (depth == 0) {
03541 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03542 KMP_ASSERT(address2os == NULL);
03543 return;
03544 }
03545
03546 if (depth < 0) {
03547 KMP_ASSERT(msg_id != kmp_i18n_null);
03548 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
03549 }
03550 }
03551
03552 # endif
03553
03554 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
03555 const char *filename;
03556 if (__kmp_cpuinfo_file != NULL) {
03557 filename = __kmp_cpuinfo_file;
03558 }
03559 else {
03560 filename = "/proc/cpuinfo";
03561 }
03562
03563 if (__kmp_affinity_verbose) {
03564 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
03565 }
03566
03567 FILE *f = fopen(filename, "r");
03568 if (f == NULL) {
03569 int code = errno;
03570 if (__kmp_cpuinfo_file != NULL) {
03571 __kmp_msg(
03572 kmp_ms_fatal,
03573 KMP_MSG(CantOpenFileForReading, filename),
03574 KMP_ERR(code),
03575 KMP_HNT(NameComesFrom_CPUINFO_FILE),
03576 __kmp_msg_null
03577 );
03578 }
03579 else {
03580 __kmp_msg(
03581 kmp_ms_fatal,
03582 KMP_MSG(CantOpenFileForReading, filename),
03583 KMP_ERR(code),
03584 __kmp_msg_null
03585 );
03586 }
03587 }
03588 int line = 0;
03589 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
03590 fclose(f);
03591 if (depth < 0) {
03592 KMP_ASSERT(msg_id != kmp_i18n_null);
03593 if (line > 0) {
03594 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
03595 }
03596 else {
03597 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
03598 }
03599 }
03600 if (__kmp_affinity_type == affinity_none) {
03601 KMP_ASSERT(depth == 0);
03602 KMP_ASSERT(address2os == NULL);
03603 return;
03604 }
03605 }
03606
03607 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
03608
03609 else if (__kmp_affinity_top_method == affinity_top_method_group) {
03610 if (__kmp_affinity_verbose) {
03611 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
03612 }
03613
03614 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
03615 KMP_ASSERT(depth != 0);
03616
03617 if (depth < 0) {
03618 if ((msg_id != kmp_i18n_null)
03619 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03620 && (__kmp_affinity_type != affinity_none)))) {
03621 KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
03622 }
03623
03624 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
03625 if (depth == 0) {
03626 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03627 KMP_ASSERT(address2os == NULL);
03628 return;
03629 }
03630
03631 KMP_ASSERT(depth > 0);
03632 KMP_ASSERT(address2os != NULL);
03633 }
03634 }
03635
03636 # endif
03637
03638 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
03639 if (__kmp_affinity_verbose) {
03640 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
03641 }
03642
03643 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
03644 if (depth == 0) {
03645 KMP_ASSERT(__kmp_affinity_type == affinity_none);
03646 KMP_ASSERT(address2os == NULL);
03647 return;
03648 }
03649
03650 KMP_ASSERT(depth > 0);
03651 KMP_ASSERT(address2os != NULL);
03652 }
03653
03654 if (address2os == NULL) {
03655 if (KMP_AFFINITY_CAPABLE()
03656 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
03657 && (__kmp_affinity_type != affinity_none)))) {
03658 KMP_WARNING(ErrorInitializeAffinity);
03659 }
03660 __kmp_affinity_type = affinity_none;
03661 __kmp_affin_mask_size = 0;
03662 return;
03663 }
03664
03665 # if KMP_MIC
03666 __kmp_apply_thread_places(&address2os, depth);
03667 # endif
03668
03669
03670
03671
03672 unsigned maxIndex;
03673 unsigned numUnique;
03674 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
03675 address2os, __kmp_avail_proc);
03676 if (__kmp_affinity_gran_levels == 0) {
03677 KMP_DEBUG_ASSERT(numUnique == __kmp_avail_proc);
03678 }
03679
03680
03681
03682
03683
03684
03685 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
03686
03687 switch (__kmp_affinity_type) {
03688
03689 case affinity_explicit:
03690 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
03691 # if OMP_40_ENABLED
03692 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
03693 # endif
03694 {
03695 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
03696 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
03697 maxIndex);
03698 }
03699 # if OMP_40_ENABLED
03700 else {
03701 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
03702 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
03703 maxIndex);
03704 }
03705 # endif
03706 if (__kmp_affinity_num_masks == 0) {
03707 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
03708 && (__kmp_affinity_type != affinity_none))) {
03709 KMP_WARNING(AffNoValidProcID);
03710 }
03711 __kmp_affinity_type = affinity_none;
03712 return;
03713 }
03714 break;
03715
03716
03717
03718
03719
03720
03721
03722
03723
03724 case affinity_logical:
03725 __kmp_affinity_compact = 0;
03726 if (__kmp_affinity_offset) {
03727 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
03728 % __kmp_avail_proc;
03729 }
03730 goto sortAddresses;
03731
03732 case affinity_physical:
03733 if (__kmp_nThreadsPerCore > 1) {
03734 __kmp_affinity_compact = 1;
03735 if (__kmp_affinity_compact >= depth) {
03736 __kmp_affinity_compact = 0;
03737 }
03738 } else {
03739 __kmp_affinity_compact = 0;
03740 }
03741 if (__kmp_affinity_offset) {
03742 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
03743 % __kmp_avail_proc;
03744 }
03745 goto sortAddresses;
03746
03747 case affinity_scatter:
03748 if (__kmp_affinity_compact >= depth) {
03749 __kmp_affinity_compact = 0;
03750 }
03751 else {
03752 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
03753 }
03754 goto sortAddresses;
03755
03756 case affinity_compact:
03757 if (__kmp_affinity_compact >= depth) {
03758 __kmp_affinity_compact = depth - 1;
03759 }
03760 goto sortAddresses;
03761
03762 # if KMP_MIC
03763 case affinity_balanced:
03764
03765 if( nPackages > 1 ) {
03766 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
03767 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
03768 }
03769 __kmp_affinity_type = affinity_none;
03770 return;
03771 } else if( __kmp_affinity_uniform_topology() ) {
03772 break;
03773 } else {
03774
03775
03776 __kmp_aff_depth = depth;
03777
03778
03779 int nth_per_core = __kmp_nThreadsPerCore;
03780
03781 int core_level;
03782 if( nth_per_core > 1 ) {
03783 core_level = depth - 2;
03784 } else {
03785 core_level = depth - 1;
03786 }
03787 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
03788 int nproc = nth_per_core * ncores;
03789
03790 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
03791 for( int i = 0; i < nproc; i++ ) {
03792 procarr[ i ] = -1;
03793 }
03794
03795 int k = 0;
03796 for( int i = 0; i < __kmp_avail_proc; i++ ) {
03797 int proc = address2os[ i ].second;
03798
03799
03800
03801 int level = depth - 1;
03802
03803
03804 int thread = 0;
03805 int core = address2os[ i ].first.labels[ level ];
03806
03807 if( nth_per_core > 1 ) {
03808 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
03809 core = address2os[ i ].first.labels[ level - 1 ];
03810 }
03811 k = core * nth_per_core + thread;
03812 procarr[ k ] = proc;
03813 k++;
03814 }
03815
03816 break;
03817 }
03818 # endif
03819
03820 sortAddresses:
03821
03822
03823
03824 if (__kmp_affinity_dups) {
03825 __kmp_affinity_num_masks = __kmp_avail_proc;
03826 }
03827 else {
03828 __kmp_affinity_num_masks = numUnique;
03829 }
03830
03831 # if OMP_40_ENABLED
03832 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
03833 && ( __kmp_affinity_num_places > 0 )
03834 && ( __kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
03835 __kmp_affinity_num_masks = __kmp_affinity_num_places;
03836 }
03837 # endif
03838
03839 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
03840 __kmp_affinity_num_masks * __kmp_affin_mask_size);
03841
03842
03843
03844
03845
03846 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
03847 __kmp_affinity_cmp_Address_child_num);
03848 {
03849 unsigned i;
03850 unsigned j;
03851 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
03852 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
03853 continue;
03854 }
03855 unsigned osId = address2os[i].second;
03856 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
03857 kmp_affin_mask_t *dest
03858 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
03859 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
03860 KMP_CPU_COPY(dest, src);
03861 if (++j >= __kmp_affinity_num_masks) {
03862 break;
03863 }
03864 }
03865 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
03866 }
03867 break;
03868
03869 default:
03870 KMP_ASSERT2(0, "Unexpected affinity setting");
03871 }
03872
03873 __kmp_free(osId2Mask);
03874 }
03875
03876
03877 void
03878 __kmp_affinity_initialize(void)
03879 {
03880
03881
03882
03883
03884
03885
03886
03887
03888
03889
03890
03891 int disabled = (__kmp_affinity_type == affinity_disabled);
03892 if (! KMP_AFFINITY_CAPABLE()) {
03893 KMP_ASSERT(disabled);
03894 }
03895 if (disabled) {
03896 __kmp_affinity_type = affinity_none;
03897 }
03898 __kmp_aux_affinity_initialize();
03899 if (disabled) {
03900 __kmp_affinity_type = affinity_disabled;
03901 }
03902 }
03903
03904
03905 void
03906 __kmp_affinity_uninitialize(void)
03907 {
03908 if (__kmp_affinity_masks != NULL) {
03909 __kmp_free(__kmp_affinity_masks);
03910 __kmp_affinity_masks = NULL;
03911 }
03912 if (fullMask != NULL) {
03913 KMP_CPU_FREE(fullMask);
03914 fullMask = NULL;
03915 }
03916 __kmp_affinity_num_masks = 0;
03917 # if OMP_40_ENABLED
03918 __kmp_affinity_num_places = 0;
03919 # endif
03920 if (__kmp_affinity_proclist != NULL) {
03921 __kmp_free(__kmp_affinity_proclist);
03922 __kmp_affinity_proclist = NULL;
03923 }
03924 if( address2os != NULL ) {
03925 __kmp_free( address2os );
03926 address2os = NULL;
03927 }
03928 if( procarr != NULL ) {
03929 __kmp_free( procarr );
03930 procarr = NULL;
03931 }
03932 }
03933
03934
03935 void
03936 __kmp_affinity_set_init_mask(int gtid, int isa_root)
03937 {
03938 if (! KMP_AFFINITY_CAPABLE()) {
03939 return;
03940 }
03941
03942 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
03943 if (th->th.th_affin_mask == NULL) {
03944 KMP_CPU_ALLOC(th->th.th_affin_mask);
03945 }
03946 else {
03947 KMP_CPU_ZERO(th->th.th_affin_mask);
03948 }
03949
03950
03951
03952
03953
03954
03955
03956
03957 kmp_affin_mask_t *mask;
03958 int i;
03959
03960 # if OMP_40_ENABLED
03961 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
03962 # endif
03963 {
03964 if ((__kmp_affinity_type == affinity_none)
03965 # if KMP_MIC
03966 || (__kmp_affinity_type == affinity_balanced)
03967 # endif
03968 ) {
03969 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
03970 if (__kmp_num_proc_groups > 1) {
03971 return;
03972 }
03973 # endif
03974 KMP_ASSERT(fullMask != NULL);
03975 i = -1;
03976 mask = fullMask;
03977 }
03978 else {
03979 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
03980 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
03981 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
03982 }
03983 }
03984 # if OMP_40_ENABLED
03985 else {
03986 if ((! isa_root)
03987 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
03988 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
03989 if (__kmp_num_proc_groups > 1) {
03990 return;
03991 }
03992 # endif
03993 KMP_ASSERT(fullMask != NULL);
03994 i = KMP_PLACE_ALL;
03995 mask = fullMask;
03996 }
03997 else {
03998
03999
04000
04001
04002 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
04003 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
04004 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
04005 }
04006 }
04007 # endif
04008
04009 # if OMP_40_ENABLED
04010 th->th.th_current_place = i;
04011 if (isa_root) {
04012 th->th.th_new_place = i;
04013 th->th.th_first_place = 0;
04014 th->th.th_last_place = __kmp_affinity_num_masks - 1;
04015 }
04016
04017 if (i == KMP_PLACE_ALL) {
04018 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
04019 gtid));
04020 }
04021 else {
04022 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
04023 gtid, i));
04024 }
04025 # else
04026 if (i == -1) {
04027 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
04028 gtid));
04029 }
04030 else {
04031 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
04032 gtid, i));
04033 }
04034 # endif
04035
04036 KMP_CPU_COPY(th->th.th_affin_mask, mask);
04037
04038 if (__kmp_affinity_verbose) {
04039 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04040 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04041 th->th.th_affin_mask);
04042 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", gtid, buf);
04043 }
04044
04045 # if KMP_OS_WINDOWS
04046
04047
04048
04049
04050
04051 if ( __kmp_affinity_type == affinity_none ) {
04052 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
04053 }
04054 else
04055 # endif
04056 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
04057 }
04058
04059
04060 # if OMP_40_ENABLED
04061
04062 void
04063 __kmp_affinity_set_place(int gtid)
04064 {
04065 int retval;
04066
04067 if (! KMP_AFFINITY_CAPABLE()) {
04068 return;
04069 }
04070
04071 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
04072
04073 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
04074 gtid, th->th.th_new_place, th->th.th_current_place));
04075
04076
04077
04078
04079 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
04080 KMP_DEBUG_ASSERT(th->th.th_new_place >= 0);
04081 KMP_DEBUG_ASSERT(th->th.th_new_place <= __kmp_affinity_num_masks);
04082 if (th->th.th_first_place <= th->th.th_last_place) {
04083 KMP_DEBUG_ASSERT((th->th.th_new_place >= th->th.th_first_place)
04084 && (th->th.th_new_place <= th->th.th_last_place));
04085 }
04086 else {
04087 KMP_DEBUG_ASSERT((th->th.th_new_place <= th->th.th_first_place)
04088 || (th->th.th_new_place >= th->th.th_last_place));
04089 }
04090
04091
04092
04093
04094
04095 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
04096 th->th.th_new_place);
04097 KMP_CPU_COPY(th->th.th_affin_mask, mask);
04098 th->th.th_current_place = th->th.th_new_place;
04099
04100 if (__kmp_affinity_verbose) {
04101 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04102 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04103 th->th.th_affin_mask);
04104 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", gtid, buf);
04105 }
04106 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
04107 }
04108
04109 # endif
04110
04111
04112 int
04113 __kmp_aux_set_affinity(void **mask)
04114 {
04115 int gtid;
04116 kmp_info_t *th;
04117 int retval;
04118
04119 if (! KMP_AFFINITY_CAPABLE()) {
04120 return -1;
04121 }
04122
04123 gtid = __kmp_entry_gtid();
04124 KA_TRACE(1000, ;{
04125 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04126 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04127 (kmp_affin_mask_t *)(*mask));
04128 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
04129 gtid, buf);
04130 });
04131
04132 if (__kmp_env_consistency_check) {
04133 if ((mask == NULL) || (*mask == NULL)) {
04134 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04135 }
04136 else {
04137 int proc;
04138 int num_procs = 0;
04139
04140 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
04141 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
04142 continue;
04143 }
04144 num_procs++;
04145 if (! KMP_CPU_ISSET(proc, fullMask)) {
04146 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04147 break;
04148 }
04149 }
04150 if (num_procs == 0) {
04151 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04152 }
04153
04154 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
04155 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
04156 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
04157 }
04158 # endif
04159
04160 }
04161 }
04162
04163 th = __kmp_threads[gtid];
04164 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
04165 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
04166 if (retval == 0) {
04167 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
04168 }
04169
04170 # if OMP_40_ENABLED
04171 th->th.th_current_place = KMP_PLACE_UNDEFINED;
04172 th->th.th_new_place = KMP_PLACE_UNDEFINED;
04173 th->th.th_first_place = 0;
04174 th->th.th_last_place = __kmp_affinity_num_masks - 1;
04175 # endif
04176
04177 return retval;
04178 }
04179
04180
04181 int
04182 __kmp_aux_get_affinity(void **mask)
04183 {
04184 int gtid;
04185 int retval;
04186 kmp_info_t *th;
04187
04188 if (! KMP_AFFINITY_CAPABLE()) {
04189 return -1;
04190 }
04191
04192 gtid = __kmp_entry_gtid();
04193 th = __kmp_threads[gtid];
04194 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
04195
04196 KA_TRACE(1000, ;{
04197 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04198 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04199 th->th.th_affin_mask);
04200 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
04201 });
04202
04203 if (__kmp_env_consistency_check) {
04204 if ((mask == NULL) || (*mask == NULL)) {
04205 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
04206 }
04207 }
04208
04209 # if ! KMP_OS_WINDOWS
04210
04211 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
04212 KA_TRACE(1000, ;{
04213 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04214 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04215 (kmp_affin_mask_t *)(*mask));
04216 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
04217 });
04218 return retval;
04219
04220 # else
04221
04222 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
04223 return 0;
04224
04225 # endif
04226
04227 }
04228
04229
04230 int
04231 __kmp_aux_set_affinity_mask_proc(int proc, void **mask)
04232 {
04233 int retval;
04234
04235 if (! KMP_AFFINITY_CAPABLE()) {
04236 return -1;
04237 }
04238
04239 KA_TRACE(1000, ;{
04240 int gtid = __kmp_entry_gtid();
04241 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04242 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04243 (kmp_affin_mask_t *)(*mask));
04244 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
04245 proc, gtid, buf);
04246 });
04247
04248 if (__kmp_env_consistency_check) {
04249 if ((mask == NULL) || (*mask == NULL)) {
04250 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
04251 }
04252 }
04253
04254 if ((proc < 0) || (proc >= KMP_CPU_SETSIZE)) {
04255 return -1;
04256 }
04257 if (! KMP_CPU_ISSET(proc, fullMask)) {
04258 return -2;
04259 }
04260
04261 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
04262 return 0;
04263 }
04264
04265
04266 int
04267 __kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
04268 {
04269 int retval;
04270
04271 if (! KMP_AFFINITY_CAPABLE()) {
04272 return -1;
04273 }
04274
04275 KA_TRACE(1000, ;{
04276 int gtid = __kmp_entry_gtid();
04277 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04278 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04279 (kmp_affin_mask_t *)(*mask));
04280 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
04281 proc, gtid, buf);
04282 });
04283
04284 if (__kmp_env_consistency_check) {
04285 if ((mask == NULL) || (*mask == NULL)) {
04286 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
04287 }
04288 }
04289
04290 if ((proc < 0) || (proc >= KMP_CPU_SETSIZE)) {
04291 return -1;
04292 }
04293 if (! KMP_CPU_ISSET(proc, fullMask)) {
04294 return -2;
04295 }
04296
04297 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
04298 return 0;
04299 }
04300
04301
04302 int
04303 __kmp_aux_get_affinity_mask_proc(int proc, void **mask)
04304 {
04305 int retval;
04306
04307 if (! KMP_AFFINITY_CAPABLE()) {
04308 return -1;
04309 }
04310
04311 KA_TRACE(1000, ;{
04312 int gtid = __kmp_entry_gtid();
04313 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04314 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
04315 (kmp_affin_mask_t *)(*mask));
04316 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
04317 proc, gtid, buf);
04318 });
04319
04320 if (__kmp_env_consistency_check) {
04321 if ((mask == NULL) || (*mask == NULL)) {
04322 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
04323 }
04324 }
04325
04326 if ((proc < 0) || (proc >= KMP_CPU_SETSIZE)) {
04327 return 0;
04328 }
04329 if (! KMP_CPU_ISSET(proc, fullMask)) {
04330 return 0;
04331 }
04332
04333 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
04334 }
04335
04336 # if KMP_MIC
04337
04338
04339 void __kmp_balanced_affinity( int tid, int nthreads )
04340 {
04341 if( __kmp_affinity_uniform_topology() ) {
04342 int coreID;
04343 int threadID;
04344
04345 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
04346
04347 int ncores = __kmp_ncores;
04348
04349 int chunk = nthreads / ncores;
04350
04351 int big_cores = nthreads % ncores;
04352
04353 int big_nth = ( chunk + 1 ) * big_cores;
04354 if( tid < big_nth ) {
04355 coreID = tid / (chunk + 1 );
04356 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
04357 } else {
04358 coreID = ( tid - big_cores ) / chunk;
04359 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
04360 }
04361
04362 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
04363 "Illegal set affinity operation when not capable");
04364
04365 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
04366 KMP_CPU_ZERO(mask);
04367
04368
04369 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04370 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
04371 KMP_CPU_SET( osID, mask);
04372 } else if( __kmp_affinity_gran == affinity_gran_core ) {
04373 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
04374 int osID;
04375 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
04376 KMP_CPU_SET( osID, mask);
04377 }
04378 }
04379 if (__kmp_affinity_verbose) {
04380 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04381 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
04382 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
04383 }
04384 __kmp_set_system_affinity( mask, TRUE );
04385 } else {
04386
04387 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
04388 KMP_CPU_ZERO(mask);
04389
04390
04391 int nth_per_core = __kmp_nThreadsPerCore;
04392 int core_level;
04393 if( nth_per_core > 1 ) {
04394 core_level = __kmp_aff_depth - 2;
04395 } else {
04396 core_level = __kmp_aff_depth - 1;
04397 }
04398
04399
04400 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
04401
04402
04403 if( nthreads == __kmp_avail_proc ) {
04404 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04405 int osID = address2os[ tid ].second;
04406 KMP_CPU_SET( osID, mask);
04407 } else if( __kmp_affinity_gran == affinity_gran_core ) {
04408 int coreID = address2os[ tid ].first.labels[ core_level ];
04409
04410
04411 int cnt = 0;
04412 for( int i = 0; i < __kmp_avail_proc; i++ ) {
04413 int osID = address2os[ i ].second;
04414 int core = address2os[ i ].first.labels[ core_level ];
04415 if( core == coreID ) {
04416 KMP_CPU_SET( osID, mask);
04417 cnt++;
04418 if( cnt == nth_per_core ) {
04419 break;
04420 }
04421 }
04422 }
04423 }
04424 } else if( nthreads <= __kmp_ncores ) {
04425
04426 int core = 0;
04427 for( int i = 0; i < ncores; i++ ) {
04428
04429 int in_mask = 0;
04430 for( int j = 0; j < nth_per_core; j++ ) {
04431 if( procarr[ i * nth_per_core + j ] != - 1 ) {
04432 in_mask = 1;
04433 break;
04434 }
04435 }
04436 if( in_mask ) {
04437 if( tid == core ) {
04438 for( int j = 0; j < nth_per_core; j++ ) {
04439 int osID = procarr[ i * nth_per_core + j ];
04440 if( osID != -1 ) {
04441 KMP_CPU_SET( osID, mask );
04442
04443 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04444 break;
04445 }
04446 }
04447 }
04448 break;
04449 } else {
04450 core++;
04451 }
04452 }
04453 }
04454
04455 } else {
04456
04457
04458 int nproc_at_core[ ncores ];
04459
04460 int ncores_with_x_procs[ nth_per_core + 1 ];
04461
04462 int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
04463
04464 for( int i = 0; i <= nth_per_core; i++ ) {
04465 ncores_with_x_procs[ i ] = 0;
04466 ncores_with_x_to_max_procs[ i ] = 0;
04467 }
04468
04469 for( int i = 0; i < ncores; i++ ) {
04470 int cnt = 0;
04471 for( int j = 0; j < nth_per_core; j++ ) {
04472 if( procarr[ i * nth_per_core + j ] != -1 ) {
04473 cnt++;
04474 }
04475 }
04476 nproc_at_core[ i ] = cnt;
04477 ncores_with_x_procs[ cnt ]++;
04478 }
04479
04480 for( int i = 0; i <= nth_per_core; i++ ) {
04481 for( int j = i; j <= nth_per_core; j++ ) {
04482 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
04483 }
04484 }
04485
04486
04487 int nproc = nth_per_core * ncores;
04488
04489 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
04490 for( int i = 0; i < nproc; i++ ) {
04491 newarr[ i ] = 0;
04492 }
04493
04494 int nth = nthreads;
04495 int flag = 0;
04496 while( nth > 0 ) {
04497 for( int j = 1; j <= nth_per_core; j++ ) {
04498 int cnt = ncores_with_x_to_max_procs[ j ];
04499 for( int i = 0; i < ncores; i++ ) {
04500
04501 if( nproc_at_core[ i ] == 0 ) {
04502 continue;
04503 }
04504 for( int k = 0; k < nth_per_core; k++ ) {
04505 if( procarr[ i * nth_per_core + k ] != -1 ) {
04506 if( newarr[ i * nth_per_core + k ] == 0 ) {
04507 newarr[ i * nth_per_core + k ] = 1;
04508 cnt--;
04509 nth--;
04510 break;
04511 } else {
04512 if( flag != 0 ) {
04513 newarr[ i * nth_per_core + k ] ++;
04514 cnt--;
04515 nth--;
04516 break;
04517 }
04518 }
04519 }
04520 }
04521 if( cnt == 0 || nth == 0 ) {
04522 break;
04523 }
04524 }
04525 if( nth == 0 ) {
04526 break;
04527 }
04528 }
04529 flag = 1;
04530 }
04531 int sum = 0;
04532 for( int i = 0; i < nproc; i++ ) {
04533 sum += newarr[ i ];
04534 if( sum > tid ) {
04535
04536 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
04537 int osID = procarr[ i ];
04538 KMP_CPU_SET( osID, mask);
04539 } else if( __kmp_affinity_gran == affinity_gran_core ) {
04540 int coreID = i / nth_per_core;
04541 for( int ii = 0; ii < nth_per_core; ii++ ) {
04542 int osID = procarr[ coreID * nth_per_core + ii ];
04543 if( osID != -1 ) {
04544 KMP_CPU_SET( osID, mask);
04545 }
04546 }
04547 }
04548 break;
04549 }
04550 }
04551 __kmp_free( newarr );
04552 }
04553
04554 if (__kmp_affinity_verbose) {
04555 char buf[KMP_AFFIN_MASK_PRINT_LEN];
04556 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
04557 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
04558 }
04559 __kmp_set_system_affinity( mask, TRUE );
04560 }
04561 }
04562 # endif
04563
04564 #elif KMP_OS_DARWIN
04565
04566 #else
04567 #error "Unknown or unsupported OS"
04568 #endif // KMP_OS_WINDOWS || KMP_OS_LINUX