Skip to content

Commit

Permalink
Merge pull request #587 from nf-core/sanger_multifarm
Browse files Browse the repository at this point in the history
Updated the Sanger config to list more queues and cover multiple internal clusters
  • Loading branch information
muffato authored Nov 10, 2023
2 parents f1d5daa + 555b99c commit c3c9060
Showing 1 changed file with 59 additions and 11 deletions.
70 changes: 59 additions & 11 deletions conf/sanger.config
Original file line number Diff line number Diff line change
@@ -1,14 +1,50 @@

// Extract the name of the cluster to tune the parameters below
def clustername = "farm5"
try {
clustername = ['/bin/bash', '-c', 'lsid | awk \'$0 ~ /^My cluster name is/ {print $5}\''].execute().text.trim()
} catch (java.io.IOException e) {
System.err.println("WARNING: Could not run lsid to determine current cluster, defaulting to farm")
}

// Profile details
params {
config_profile_description = 'The Wellcome Sanger Institute HPC cluster (farm5) profile'
config_profile_description = "The Wellcome Sanger Institute HPC cluster (${clustername}) profile"
config_profile_contact = 'Priyanka Surana (@priyanka-surana)'
config_profile_url = 'https://www.sanger.ac.uk'
}

// Queue and retry strategy
process{

// Queue and LSF submission options
process {
executor = 'lsf'
queue = { task.time < 20.m ? 'small' : task.time < 12.h ? 'normal' : task.time < 48.h ? 'long' : task.time < 168.h ? 'week' : 'basement' }

// Currently a single set of rules for all clusters, but we could use $clustername to apply
// different rules to different clusters.
queue = {
if ( task.time >= 15.day ) {
if ( task.memory > 680.GB ) {
error "There is no queue for jobs that need >680 GB and >15 days"
} else {
"basement"
}
} else if ( task.memory > 720.GB ) {
"teramem"
} else if ( task.memory > 350.GB ) {
"hugemem"
} else if ( task.time > 7.day ) {
"basement"
} else if ( task.time > 2.day ) {
"week"
} else if ( task.time > 12.hour ) {
"long"
} else if ( task.time > 1.min ) {
"normal"
} else {
"small"
}
}

withLabel: gpu {
clusterOptions = { "-M "+task.memory.toMega()+" -R 'select[ngpus>0 && mem>="+task.memory.toMega()+"] rusage[ngpus_physical=1.00,mem="+task.memory.toMega()+"] span[ptile=1]' -gpu 'mode=exclusive_process'" }
queue = { task.time > 12.h ? 'gpu-huge' : task.time > 48.h ? 'gpu-basement' : 'gpu-normal' }
Expand All @@ -19,21 +55,33 @@ process{
}
}


// Executor details
executor{
executor {
name = 'lsf'
perJobMemLimit = true
poolSize = 4
submitRateLimit = '5 sec'
killBatchSize = 50
}


// Max resources
params {
max_memory = 683.GB
max_cpus = 256
max_time = 720.h
if (clustername.startsWith("tol")) {
// tol cluster
params.max_memory = 1.4.TB
params.max_cpus = 64
params.max_time = 89280.min // 62 days
// As opposite to the farm settings below, we don't mount any filesystem by default.
// Pipelines that need to see certain filesystems have to set singularity.runOptions themselves

} else {
// defaults for the main farm
params.max_memory = 2.9.TB
params.max_cpus = 256
params.max_time = 43200.min // 30 days

// Mount all filesystems by default
singularity.runOptions = '--bind /lustre --bind /nfs --bind /data --bind /software'
}

// For singularity
singularity.runOptions = '--bind /lustre --bind /nfs --bind /software'

0 comments on commit c3c9060

Please sign in to comment.