Skip to content

Commit

Permalink
updating launch copper and adding the publication
Browse files Browse the repository at this point in the history
  • Loading branch information
kaushikvelusamy committed Oct 17, 2024
1 parent f61ec69 commit 4079b1d
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 34 deletions.
Binary file added 2024_PDSW_Copper_paper.pdf
Binary file not shown.
4 changes: 1 addition & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,14 @@ set(SOURCES
src/cache/path_status_cache.cpp
)


add_executable(${PROJECT_NAME} ${SOURCES})
target_include_directories(${PROJECT_NAME} PRIVATE "${FUSE3_INCLUDE_DIR}" "${MPI_INCLUDE_PATH}" "${MARGO_INCLUDE_DIRS}" "${THALLIUM_INCLUDE_DIRS}")
target_link_libraries(${PROJECT_NAME} PRIVATE ${MPI_CXX_LIBRARIES} PkgConfig::MARGO PkgConfig::THALLIUM fuse3 MPI::MPI_CXX)


set(SHUTDOWN_PROJECT_NAME cu_fuse_shutdown)
set(SHUTDOWN_SOURCES src/copper/rpc_shutdown.cpp)
add_executable(${SHUTDOWN_PROJECT_NAME} ${SHUTDOWN_SOURCES})
target_link_libraries(${SHUTDOWN_PROJECT_NAME} PRIVATE PkgConfig::MARGO PkgConfig::THALLIUM)

install(TARGETS ${PROJECT_NAME} DESTINATION bin)
install(FILES scripts/launch_copper.sh ${PROJECT_NAME} DESTINATION bin PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ)
install(PROGRAMS scripts/launch_copper.sh DESTINATION bin)
59 changes: 44 additions & 15 deletions examples/example3/launch_copper.sh
Original file line number Diff line number Diff line change
@@ -1,31 +1,60 @@
#!/bin/bash -x

module load copper
log_level=6
log_type="file"
trees=1
max_cacheable_byte_size=$((10*1024*1024))
sleeptime=20
LOGDIR=~/copper-logs/${PBS_JOBID%%.aurora-pbs-0001.hostmgmt.cm.aurora.alcf.anl.gov}
rm -rf ~/copper_logs*
CUPATH=$COPPER_ROOT/bin/cu_fuse
CU_FUSE_MNT_VIEWDIR=/tmp/${USER}/copper
physcpubind="48-51"

rm -rf ~/copper_logs*
LOGDIR=~/copper-logs/${PBS_JOBID%%.aurora-pbs-0001.hostmgmt.cm.aurora.alcf.anl.gov}
mkdir -p ${LOGDIR} #only on head node

CU_FUSE_MNT_VIEWDIR=/tmp/${USER}/copper
clush --hostfile ${PBS_NODEFILE} "fusermount3 -u ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile ${PBS_NODEFILE} "rm -rf ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile ${PBS_NODEFILE} "mkdir -p ${CU_FUSE_MNT_VIEWDIR}" # on all compute nodes
while getopts "l:t:T:M:s:b:" opt; do
case ${opt} in
l ) log_level=$OPTARG ;;
t ) log_type=$OPTARG ;;
T ) trees=$OPTARG ;;
M ) max_cacheable_byte_size=$OPTARG ;;
s ) sleeptime=$OPTARG ;;
b ) physcpubind=$OPTARG ;;
\? ) echo "Usage: cmd [-l] [-t] [-T] [-M] [-s] [-b]" ;;
esac
done

echo "log_level : ${log_level}"
echo "log_type : ${log_type}"
echo "trees : ${trees}"
echo "max_cacheable_byte_size : ${max_cacheable_byte_size}"
echo "sleeptime : ${sleeptime}"
echo "CU_FUSE_MNT_VIEWDIR : ${CU_FUSE_MNT_VIEWDIR}"
echo "LOGDIR : ${LOGDIR}"
echo "PBS_NODEFILE : ${PBS_NODEFILE}"
echo "physcpubind : ${physcpubind}"



mkdir -p "${LOGDIR}" #only on head node
clush --hostfile "${PBS_NODEFILE}" "fusermount3 -u ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile "${PBS_NODEFILE}" "rm -rf ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile "${PBS_NODEFILE}" "mkdir -p ${CU_FUSE_MNT_VIEWDIR}" # on all compute nodes

read -r -d '' CMD << EOM
numactl --physcpubind="0-3"
numactl --physcpubind=${physcpubind}
$CUPATH
-tpath /
-vpath ${CU_FUSE_MNT_VIEWDIR}
-log_level 6
-log_type file
-log_level ${log_level}
-log_type ${log_type}
-log_output_dir ${LOGDIR}
-net_type cxi
-trees 1
-trees ${trees}
-nf ${PBS_NODEFILE}
-max_cacheable_byte_size $((10*1024*1024))
-max_cacheable_byte_size ${max_cacheable_byte_size}
-s ${CU_FUSE_MNT_VIEWDIR}
EOM

clush --hostfile ${PBS_NODEFILE} $CMD
sleep 20s # add 60s if you are running on more than 2k nodes
clush --hostfile "${PBS_NODEFILE}" "$CMD"
sleep "${sleeptime}"s # add 60s if you are running on more than 2k nodes
1 change: 1 addition & 0 deletions examples/example3/simple_with_copper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ cd $PBS_O_WORKDIR
echo Jobid: $PBS_JOBID
echo Running on nodes `cat $PBS_NODEFILE`

module load copper
launch_copper.sh
# Prepend /tmp/${USER}/copper/ to all your absolute paths if you want your I/O to go through copper (including PYTHON_PATH, CONDA_PREFIX, CONDA_ROOT and PATH)

Expand Down
2 changes: 1 addition & 1 deletion scripts/build_helper/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ cp build/compile_commands.json . || { echo "Failed to copy compile commands"; ex

cd build || { echo "Failed to move to build dir"; exit 1; }
make || { echo "Failed to build cu_fuse"; exit 1; }
# make install
make install

59 changes: 44 additions & 15 deletions scripts/launch_copper.sh
Original file line number Diff line number Diff line change
@@ -1,31 +1,60 @@
#!/bin/bash -x

module load copper
log_level=6
log_type="file"
trees=1
max_cacheable_byte_size=$((10*1024*1024))
sleeptime=20
LOGDIR=~/copper-logs/${PBS_JOBID%%.aurora-pbs-0001.hostmgmt.cm.aurora.alcf.anl.gov}
rm -rf ~/copper_logs*
CUPATH=$COPPER_ROOT/bin/cu_fuse
CU_FUSE_MNT_VIEWDIR=/tmp/${USER}/copper
physcpubind="48-51"

rm -rf ~/copper_logs*
LOGDIR=~/copper-logs/${PBS_JOBID%%.aurora-pbs-0001.hostmgmt.cm.aurora.alcf.anl.gov}
mkdir -p ${LOGDIR} #only on head node

CU_FUSE_MNT_VIEWDIR=/tmp/${USER}/copper
clush --hostfile ${PBS_NODEFILE} "fusermount3 -u ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile ${PBS_NODEFILE} "rm -rf ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile ${PBS_NODEFILE} "mkdir -p ${CU_FUSE_MNT_VIEWDIR}" # on all compute nodes
while getopts "l:t:T:M:s:b:" opt; do
case ${opt} in
l ) log_level=$OPTARG ;;
t ) log_type=$OPTARG ;;
T ) trees=$OPTARG ;;
M ) max_cacheable_byte_size=$OPTARG ;;
s ) sleeptime=$OPTARG ;;
b ) physcpubind=$OPTARG ;;
\? ) echo "Usage: cmd [-l] [-t] [-T] [-M] [-s] [-b]" ;;
esac
done

echo "log_level : ${log_level}"
echo "log_type : ${log_type}"
echo "trees : ${trees}"
echo "max_cacheable_byte_size : ${max_cacheable_byte_size}"
echo "sleeptime : ${sleeptime}"
echo "CU_FUSE_MNT_VIEWDIR : ${CU_FUSE_MNT_VIEWDIR}"
echo "LOGDIR : ${LOGDIR}"
echo "PBS_NODEFILE : ${PBS_NODEFILE}"
echo "physcpubind : ${physcpubind}"



mkdir -p "${LOGDIR}" #only on head node
clush --hostfile "${PBS_NODEFILE}" "fusermount3 -u ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile "${PBS_NODEFILE}" "rm -rf ${CU_FUSE_MNT_VIEWDIR}"
clush --hostfile "${PBS_NODEFILE}" "mkdir -p ${CU_FUSE_MNT_VIEWDIR}" # on all compute nodes

read -r -d '' CMD << EOM
numactl --physcpubind="0-3"
numactl --physcpubind=${physcpubind}
$CUPATH
-tpath /
-vpath ${CU_FUSE_MNT_VIEWDIR}
-log_level 6
-log_type file
-log_level ${log_level}
-log_type ${log_type}
-log_output_dir ${LOGDIR}
-net_type cxi
-trees 1
-trees ${trees}
-nf ${PBS_NODEFILE}
-max_cacheable_byte_size $((10*1024*1024))
-max_cacheable_byte_size ${max_cacheable_byte_size}
-s ${CU_FUSE_MNT_VIEWDIR}
EOM

clush --hostfile ${PBS_NODEFILE} $CMD
sleep 20s # add 60s if you are running on more than 2k nodes
clush --hostfile "${PBS_NODEFILE}" "$CMD"
sleep "${sleeptime}"s # add 60s if you are running on more than 2k nodes

0 comments on commit 4079b1d

Please sign in to comment.