diff --git a/src/arch/ofi/machine.C b/src/arch/ofi/machine.C index baefce5736..031491b94d 100644 --- a/src/arch/ofi/machine.C +++ b/src/arch/ofi/machine.C @@ -696,6 +696,7 @@ void LrtsInit(int *argc, char ***argv, int *numNodes, int *myNodeID) * should not be considered predictive of proximity. That * relationship has to be detected by other means. + * 2. HWLOC doesn't have a hwloc_get_closest_nic because... NIC * doesn't even rate an object type in their ontology, let * alone get first class treatment. Given that PCI devices @@ -714,7 +715,7 @@ void LrtsInit(int *argc, char ***argv, int *numNodes, int *myNodeID) * do *not* have such convenient labeling as something special * needs to happen to get their linuxfs utilities to inject * that derived information into your topology object. As an - * interim solution we allow the user to map their cxi[0..3] + * interim solution we allow the user to map their cxi[0..7] * selection using command line arguments. * 2b. Likewise the 1:1 relationship we assume here between @@ -741,6 +742,8 @@ void LrtsInit(int *argc, char ***argv, int *numNodes, int *myNodeID) * CPU nodes. The user could easily be confused, so we can't * rely on them telling us. This has to be determined at * run time. + + * 6. Aurora can apparently go up to cxi7. */ char *cximap=NULL; @@ -812,23 +815,34 @@ void LrtsInit(int *argc, char ***argv, int *numNodes, int *myNodeID) /// short hsnOrder[numcxi]={2,1,3,0}; - if(numcxi==4) + if(numcxi == 4) { short hsnOrder[4]= {1,3,0,2}; - if(myRank%quad>numcxi) + if(myRank % quad > numcxi) + { + CmiPrintf("Error: myrank %d quad %d myrank/quad %n",myRank,quad, myRank/quad); + CmiAbort("cxi mapping failure"); + } + myNet = hsnOrder[myRank % quad]; + } + else if(numcxi == 8) + { + // this appears to be a good ordering on aurora + short hsnOrder[8]= {0,1,2,3,4,5,6,7}; + if(myRank % quad > numcxi) { CmiPrintf("Error: myrank %d quad %d myrank/quad %n",myRank,quad, myRank/quad); CmiAbort("cxi mapping failure"); } - myNet=hsnOrder[myRank%quad]; + myNet = hsnOrder[myRank % quad]; } else { - CmiAssert(numcxi==1); - //theoretically there are cases other than 4 and 1, but + CmiAssert(numcxi == 1); + //theoretically there are cases other than 8, 4 and 1, but //until someone sights such an incrayptid on a machine floor, //we're just going to assume they don't exist. - myNet=0; + myNet = 0; } } snprintf(myDomainName,5, "cxi%d", myNet);