Skip to content

Commit

Permalink
Merge branch 'dyn-resource'
Browse files Browse the repository at this point in the history
  • Loading branch information
alpae committed Dec 20, 2023
2 parents 69e5df1 + 6ab4ebb commit a0859da
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 2 deletions.
89 changes: 87 additions & 2 deletions FastOMA.nf
Original file line number Diff line number Diff line change
Expand Up @@ -230,12 +230,97 @@ process batch_roothogs{
"""
}


def getMaxFileSize = { Path folderPath ->
def maxFileSize = 0L // Initialize maximum file size

def traverseFolder = { Path currentPath ->
def currentFile = currentPath.toFile()

if (currentFile.isDirectory()) {
def files = currentFile.listFiles()
if (files) {
files.each { file ->
if (file.isDirectory()) {
traverseFolder(file.toPath()) // Recursively traverse subdirectories
} else {
def fileSize = file.length()
if (fileSize > maxFileSize) {
maxFileSize = fileSize // Update maximum file size if larger
}
}
}
}
} else {
def fileSize = currentFile.length()
if (fileSize > maxFileSize) {
maxFileSize = fileSize // Update maximum file size if larger
}
}
}

traverseFolder(folderPath)
return maxFileSize
}

// function to check maximum resources
def check_max(obj, type) {
if (type == 'memory') {
try {
if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
return params.max_memory as nextflow.util.MemoryUnit
else
return obj
} catch (all) {
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'time') {
try {
if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
return params.max_time as nextflow.util.Duration
else
return obj
} catch (all) {
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'cpus') {
try {
return Math.min( obj, params.max_cpus as int )
} catch (all) {
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
return obj
}
}
}

def mem_cat(filesize, nr_genomes){
def fac = 1;
if (nr_genomes > 500) fac = 2;
if (filesize < 1000000) return 12.GB * fac;
else if (filesize < 2000000) return 20.GB * fac;
else if (filesize < 5000000) return 32.GB * fac;
else return 64.GB * fac;
}

def time_cat(filesize, nr_genomes){
def fac = 1;
if (nr_genomes > 500) fac = 2;
if (filesize < 1000000) return 4.h * fac;
else if (filesize < 20000000) return 24.h * fac;
else return 72.h * fac;
}

process hog_big{
label "process_high"
cpus { check_max( 4, "cpus") }
memory { check_max( mem_cat(getMaxFileSize(rhogsbig), nr_species as int) * task.attempt, "memory") }
time { check_max( time_cat(getMaxFileSize(rhogsbig), nr_species as int) * task.attempt, "time") }

input:
each rhogsbig
path species_tree
val nr_species
output:
path "pickle_hogs"
path "msa/*.fa" , optional: true // msa if write True
Expand Down Expand Up @@ -329,7 +414,7 @@ workflow {

(rhogs_rest_batches, rhogs_big_batches) = batch_roothogs(omamer_rhogs)

(pickle_big_rhog, msa_out_big, genetrees_out_rest) = hog_big(rhogs_big_batches.flatten(), species_tree_checked)
(pickle_big_rhog, msa_out_big, genetrees_out_rest) = hog_big(rhogs_big_batches.flatten(), species_tree_checked, nr_species)
(pickle_rest_rhog, msas_out_rest, genetrees_out_test) = hog_rest(rhogs_rest_batches.flatten(), species_tree_checked)
channel.empty().concat(pickle_big_rhog, pickle_rest_rhog).set{ all_rhog_pickle }

Expand Down
5 changes: 5 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ profiles {
process.executor = 'local'
}

slurm {
process.executor = "slurm"
time = 4.h
}

conda {
process.conda = "environment-conda.yml"
conda.enabled = true
Expand Down

0 comments on commit a0859da

Please sign in to comment.