Skip to content

Commit 626dc50

Browse files
authoredMar 21, 2025··
Removing the experimental clique kernel files (#1610)
1 parent 90ad586 commit 626dc50

22 files changed

+9
-1804
lines changed
 

‎CMakeLists.txt

+1-17
Original file line numberDiff line numberDiff line change
@@ -424,22 +424,6 @@ set(SRC_FILES
424424
src/proxy.cc
425425
src/register.cc
426426
src/transport.cc
427-
# src/clique/AllReduceCliqueKernel.h
428-
# src/clique/CliqueCommon.h
429-
# src/clique/CliqueManager.cc
430-
# src/clique/CliqueManager.h
431-
# src/clique/CliqueShmNames.h
432-
# src/clique/HandleCache.cc
433-
# src/clique/HandleCache.h
434-
# src/clique/HandleShm.cc
435-
# src/clique/HandleShm.h
436-
# src/clique/Hash.cc
437-
# src/clique/Hash.h
438-
# src/clique/MsgQueue.cc
439-
# src/clique/MsgQueue.h
440-
# src/clique/SharedMemHelper.h
441-
# src/clique/ShmObject.cc
442-
# src/clique/ShmObject.h
443427
src/device/all_gather.h
444428
src/device/all_reduce.h
445429
src/device/alltoall_pivot.h
@@ -888,7 +872,7 @@ else()
888872
execute_process(
889873
COMMAND bash "-c" "free | grep -o '[[:digit:]]*' | head -1"
890874
OUTPUT_VARIABLE memory_max_string)
891-
## memory_max_string holds the free memory in KB
875+
## memory_max_string holds the free memory in KB
892876
if (${memory_max_string} MATCHES "^[0-9]+")
893877
math(EXPR memory_in_gb "${memory_max_string} / (1024 * 1024)") ## KB to GB conversion
894878
else()

‎src/clique/AllReduceCliqueKernel.h

-75
This file was deleted.

‎src/clique/CliqueCommon.h

-93
This file was deleted.

‎src/clique/CliqueManager.cc

-577
This file was deleted.

‎src/clique/CliqueManager.h

-127
This file was deleted.

‎src/clique/CliqueShmNames.h

-37
This file was deleted.

‎src/clique/HandleCache.cc

-31
This file was deleted.

‎src/clique/HandleCache.h

-142
This file was deleted.

‎src/clique/HandleShm.cc

-69
This file was deleted.

‎src/clique/HandleShm.h

-53
This file was deleted.

‎src/clique/Hash.cc

-34
This file was deleted.

‎src/clique/Hash.h

-28
This file was deleted.

‎src/clique/MsgQueue.cc

-101
This file was deleted.

‎src/clique/MsgQueue.h

-39
This file was deleted.

‎src/clique/SharedMemHelper.h

-43
This file was deleted.

‎src/clique/ShmObject.cc

-45
This file was deleted.

‎src/clique/ShmObject.h

-247
This file was deleted.

‎src/device/all_reduce.h

-2
Original file line numberDiff line numberDiff line change
@@ -1059,14 +1059,12 @@ template<typename T, typename RedOp>
10591059
struct RunWorkColl<ncclFuncAllReduce, T, RedOp, NCCL_ALGO_RING, NCCL_PROTO_LL128> {
10601060
__device__ __forceinline__ void run(int tid, int nthreads, struct ncclDevWorkColl* work) {
10611061
runRing<T, RedOp, ProtoLL128>(tid, nthreads, work);
1062-
//LAUNCH_CLIQUE_KERNEL(AllReduceCliqueSplitKernel, RedOp, T, work);
10631062
}
10641063
};
10651064

10661065
template<typename T, typename RedOp>
10671066
struct RunWorkColl<ncclFuncAllReduce, T, RedOp, NCCL_ALGO_TREE, NCCL_PROTO_LL128> {
10681067
__device__ __forceinline__ void run(int tid, int nthreads, struct ncclDevWorkColl* work) {
10691068
runTreeSplit<T, RedOp, ProtoLL128>(tid, nthreads, work);
1070-
//LAUNCH_CLIQUE_KERNEL(AllReduceCliqueSplitKernel, RedOp, T, worrk);
10711069
}
10721070
};

‎src/init.cc

+3-5
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
#include "git_version.h"
4444
#include "rccl_vars.h"
4545
#include "hip_rocm_version_info.h"
46-
//#include "clique/CliqueManager.h"
4746
//#include <hsa/hsa_ext_amd.h>
4847
#ifdef ENABLE_MSCCLPP
4948
#include "mscclpp/mscclpp_nccl.h"
@@ -485,7 +484,6 @@ static ncclResult_t commFree(ncclComm_t comm) {
485484
return ncclSuccess;
486485
}
487486

488-
RCCL_PARAM(CliqueIgnoreTopo, "CLIQUE_IGNORE_TOPO", 0);
489487
RCCL_PARAM(P2pNetDisable, "P2P_NET_DISABLE", 0);
490488
RCCL_PARAM(PivotAlltoallEnable, "PIVOT_ALLTOALL_ENABLE", 1);
491489
RCCL_PARAM(LL128ForceEnable, "LL128_FORCE_ENABLE", 0);
@@ -1578,7 +1576,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
15781576
} else {
15791577
NCCLCHECKGOTO(ncclProxyCreate(comm), ret, fail);
15801578
}
1581-
1579+
15821580
timers[TIMER_INIT_CONNECT] = clockNano();
15831581
do { // Build p2p schedule
15841582
int node = comm->node;
@@ -1979,7 +1977,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
19791977
if (rcclParamMscclppForceEnabled()) {
19801978
comm->mscclppForceEnable = true;
19811979
} else {
1982-
comm->mscclppForceEnable = false;
1980+
comm->mscclppForceEnable = false;
19831981
}
19841982
} else {
19851983
WARN("MSCCL++: Cannot enable MSCCL++ on %s architecture", devProp.gcnArchName);
@@ -2410,7 +2408,7 @@ static ncclResult_t commDestroySync(struct ncclAsyncJob* job_) {
24102408
// And keep polling until all graphs referencing us die.
24112409
while (comm->persistentRefs != 0) {
24122410
NCCLCHECKGOTO(ncclCommPollCallbacks(comm, /*waitSome=*/true), ret, fail);
2413-
}
2411+
}
24142412
}
24152413

24162414
if ((ret = ncclProxyStop(comm)) != ncclSuccess) {

‎test/AllReduceTests.cpp

+3-27
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ namespace RcclUnitTesting
118118
std::vector<bool> const managedMemList = {false};
119119
std::vector<bool> const useHipGraphList = {false, true};
120120
std::vector<const char *> const channelList = {"84", "112"};
121-
bool const enableSweep = false;
121+
bool const enableSweep = false;
122122
for (auto channel : channelList) {
123123
setenv("NCCL_MIN_NCHANNELS", channel, 1);
124124
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
@@ -149,30 +149,6 @@ namespace RcclUnitTesting
149149
testBed.Finalize();
150150
}
151151

152-
TEST(AllReduce, DISABLED_Clique)
153-
{
154-
// Set clique env var prior to TestBed
155-
setenv("RCCL_ENABLE_CLIQUE", "1", 1);
156-
157-
TestBed testBed;
158-
159-
// Configuration
160-
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
161-
std::vector<ncclDataType_t> const dataTypes = testBed.GetAllSupportedDataTypes();
162-
std::vector<ncclRedOp_t> const redOps = testBed.GetAllSupportedRedOps();
163-
std::vector<int> const roots = {0};
164-
std::vector<int> const numElements = {1048576, 1024};
165-
std::vector<bool> const inPlaceList = {false, true};
166-
std::vector<bool> const managedMemList = {false};
167-
std::vector<bool> const useHipGraphList = {false, true};
168-
169-
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
170-
inPlaceList, managedMemList, useHipGraphList);
171-
testBed.Finalize();
172-
173-
unsetenv("RCCL_ENABLE_CLIQUE");
174-
}
175-
176152
// This tests using custom pre-mult scalars reductions
177153
TEST(AllReduce, PreMultScalar)
178154
{
@@ -245,7 +221,7 @@ namespace RcclUnitTesting
245221
}
246222

247223
TEST(AllReduce, UserBufferRegistration)
248-
{
224+
{
249225
const int nranks = 8;
250226
size_t count = 2048;
251227
std::vector<int> sendBuff(count, 0);
@@ -260,7 +236,7 @@ namespace RcclUnitTesting
260236
}
261237

262238
TEST(AllReduce, ManagedMemUserBufferRegistration)
263-
{
239+
{
264240
const int nranks = 8;
265241
size_t count = 2048;
266242
std::vector<int> sendBuff(count, 0);

‎tools/HelloRccl/runTest.sh

+2-11
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,10 @@ RCCL_INSTALL=../../build/release
33
EXE=$PWD/HelloRccl
44
LDPATH=$LD_LIBRARY_PATH:$RCCL_INSTALL
55

6-
echo "Single process - With clique-based kernels:"
7-
RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 LD_LIBRARY_PATH=$LDPATH $EXE 4
8-
9-
echo "Single process - Without clique-based kernels:"
6+
echo "Single process:"
107
NCCL_DEBUG=INFO LD_LIBRARY_PATH=$LDPATH $EXE 4
118

12-
echo "With clique-based kernels:"
13-
RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 0 &
14-
RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 1 &
15-
RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 2 &
16-
RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 3
17-
18-
echo "Without clique-based kernels:"
9+
echo "Multi-process:"
1910
NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 0 &
2011
NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 1 &
2112
NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 2 &

‎tools/topo_expl/utils.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ const char* ncclProtoStr[NCCL_NUM_PROTOCOLS] = { "LL", "LL128", "Simple" };
3838

3939
extern NodeModel *node_model;
4040

41-
RCCL_PARAM(CliqueIgnoreTopo, "CLIQUE_IGNORE_TOPO", 0);
4241
RCCL_PARAM(P2pNetDisable, "P2P_NET_DISABLE", 0);
4342
RCCL_PARAM(PivotAlltoallEnable, "PIVOT_ALLTOALL_ENABLE", 1);
4443
RCCL_PARAM(LL128ForceEnable, "LL128_FORCE_ENABLE", 0);

0 commit comments

Comments
 (0)
Please sign in to comment.