From be14ac2e851382cadbb597c90731177e6e384fc7 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 17 Dec 2024 14:29:39 +0100 Subject: [PATCH 01/16] Initial publish dir offloading implementation Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 22 +++ .../nextflow/executor/ExecutorFactory.groovy | 16 +++ .../nextflow/processor/PublishDir.groovy | 26 ++-- .../processor/PublishOffloadManager.groovy | 130 ++++++++++++++++++ .../nextflow/processor/TaskProcessor.groovy | 2 +- .../nextflow/script/ScriptRunner.groovy | 1 + .../PublishOffloadManagerTest.groovy | 38 +++++ 7 files changed, 226 insertions(+), 9 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index f394245259..56835a9948 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -16,6 +16,7 @@ package nextflow +import nextflow.processor.PublishOffloadManager import java.nio.file.Files import java.nio.file.Path @@ -293,6 +294,12 @@ class Session implements ISession { FilePorter getFilePorter() { filePorter } + boolean publishOffload + + private PublishOffloadManager publishOffloadManager + + PublishOffloadManager getPublishOffloadManager() {publishOffloadManager} + /** * Creates a new session with an 'empty' (default) configuration */ @@ -394,6 +401,21 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) + this.publishOffload = config.publishOffload as boolean + + if ( this.publishOffload ) { + // -- publish offload manager config + log.warn("Publish offload flag enabled. Creating Offload Manager") + this.publishOffloadManager = new PublishOffloadManager(this) + } + + } + + void startPublishOffloadManager() { + if ( this.publishOffload ) { + log.debug("Starting Publish offload manager") + this.publishOffloadManager?.init() + } } protected Path cloudCachePath(Map cloudcache, Path workDir) { diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy index 57a1535b6c..61ec55087b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy @@ -234,6 +234,22 @@ class ExecutorFactory { return result } + @CompileDynamic + static String getDefaultExecutorName(Session session) { + def result = session.config.process?.executor?.toString() + if( !result ) { + if (session.config.executor instanceof String) { + return session.config.executor + } else if (session.config.executor?.name instanceof String) { + return session.config.executor.name + } else { + return DEFAULT_EXECUTOR + } + } + return result + } + + void signalExecutors() { for( Executor exec : executors.values() ) exec.signal() diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 4ca5764519..c35d51dd9d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -409,9 +409,9 @@ class PublishDir { // create target dirs if required makeDirs(destination.parent) - + def offload = false try { - processFileImpl(source, destination) + offload = processFileImpl(source, destination) } catch( FileAlreadyExistsException e ) { // don't copy source path if target is identical, but still emit the publish event @@ -425,11 +425,12 @@ class PublishDir { if( !sameRealPath && shouldOverwrite(source, destination) ) { FileHelper.deletePath(destination) - processFileImpl(source, destination) + offload = processFileImpl(source, destination) } } - - notifyFilePublish(destination, source) + //Don't notify if file publication is offloaded. It will be notified after the offloaded job is finished. + if (!offload) + notifyFilePublish(destination, source) } private String real0(Path p) { @@ -495,7 +496,7 @@ class PublishDir { return sourceHash != targetHash } - protected void processFileImpl( Path source, Path destination ) { + protected boolean processFileImpl( Path source, Path destination ) { log.trace "publishing file: $source -[$mode]-> $destination" if( !mode || mode == Mode.SYMLINK ) { @@ -509,10 +510,18 @@ class PublishDir { FilesEx.mklink(source, [hard:true], destination) } else if( mode == Mode.MOVE ) { - FileHelper.movePath(source, destination) + if ( session.getPublishOffloadManager().tryMoveOffload(source, destination) ){ + return true + } else { + FileHelper.movePath(source, destination) + } } else if( mode == Mode.COPY ) { - FileHelper.copyPath(source, destination) + if ( session.getPublishOffloadManager().tryCopyOffload(source, destination) ){ + return true + } else { + FileHelper.copyPath(source, destination) + } } else if( mode == Mode.COPY_NO_FOLLOW ) { FileHelper.copyPath(source, destination, LinkOption.NOFOLLOW_LINKS) @@ -520,6 +529,7 @@ class PublishDir { else { throw new IllegalArgumentException("Unknown file publish mode: ${mode}") } + return false } protected void createPublishDir() { diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy new file mode 100644 index 0000000000..3cdc58e415 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy @@ -0,0 +1,130 @@ +package nextflow.processor + +import groovy.util.logging.Slf4j +import nextflow.Nextflow +import nextflow.Session +import nextflow.executor.Executor +import nextflow.executor.ExecutorFactory +import nextflow.extension.FilesEx +import nextflow.fusion.FusionHelper +import nextflow.script.BaseScript +import nextflow.script.BodyDef +import nextflow.script.ProcessConfig +import nextflow.script.TokenValCall +import nextflow.util.ArrayTuple + +import java.nio.file.Path + +@Slf4j +class PublishOffloadManager { + Map runningPublications= new HashMap(10) + static final Map SUPPORTED_SCHEMES = [awsbatch:['s3'], local:['file']] + static final String S5CMD_CONTAINER = 'jorgeejarquea/s5cmd_aws:0.0.1' + Session session + PublishTaskProcessor copyProcessor + PublishTaskProcessor moveProcessor + + PublishOffloadManager(Session session) { + this.session = session + } + + void init(){ + + if (useS5cmd()){ + this.copyProcessor = createProcessor( "publish_dir_copy_process", new BodyDef({"s5cmd cp $source $target"},'copy data process') ) + this.moveProcessor = createProcessor( "publish_dir_move_process", new BodyDef({"s5cmd mv $source $target"},'move data process') ) + } else { + this.copyProcessor = createProcessor( "publish_dir_copy_process", new BodyDef({"cp $source $target"},'copy data process') ) + this.moveProcessor = createProcessor( "publish_dir_move_process", new BodyDef({"mv $source $target"},'move data process') ) + } + + } + + private boolean checkOffload(Path source, Path destination, String executor){ + return session.publishOffload && source.scheme in SUPPORTED_SCHEMES[executor] && destination.scheme in SUPPORTED_SCHEMES[executor]; + } + + private synchronized boolean tryInvokeProcessor(TaskProcessor processor, Path origin, Path destination){ + if (checkOffload(origin, destination, processor.executor.name)) { + final params = new TaskStartParams(TaskId.next(), processor.indexCount.incrementAndGet()) + final values = new ArrayList(1) + log.debug("Creating task for file publication: ${origin.toUri().toString()} -> ${destination.toUri().toString()} " ) + values[0] = generateFileValues(origin, destination) + final args = new ArrayList(2) + args[0] = params + args[1] = values + assert args.size() == 2 + processor.invokeTask(args.toArray()) + runningPublications.put(processor.currentTask.get(), Nextflow.tuple(origin, destination)) + return true + } + return false + } + + private isFusionEnabled(){ + return FusionHelper.isFusionEnabled(session) + } + + private useS5cmd(){ + return ( (!isFusionEnabled()) && (ExecutorFactory.getDefaultExecutorName(session) == 'awsbatch') ) + } + + private ArrayTuple generateFileValues(Path origin, Path destination){ + if ( isFusionEnabled() ){ + Nextflow.tuple(FusionHelper.toContainerMount(origin), FusionHelper.toContainerMount(destination)) + } else { + Nextflow.tuple(FilesEx.toUriString(origin), FilesEx.toUriString(destination)) + } + } + + boolean tryMoveOffload(Path origin, Path destination) { + tryInvokeProcessor(moveProcessor, origin, destination) + } + + boolean tryCopyOffload(Path origin, Path destination) { + tryInvokeProcessor(copyProcessor, origin, destination) + } + + private PublishTaskProcessor createProcessor( String name, BodyDef body){ + assert body + assert session.script + log.debug("Creating processor $name") + // -- the config object + final processConfig = new ProcessConfig(session.script, name) + // Invoke the code block which will return the script closure to the executed. + // As side effect will set all the property declarations in the 'taskConfig' object. + if (useS5cmd()) { + processConfig.put('container', S5CMD_CONTAINER); + } + processConfig._in_tuple(new TokenValCall('source'), new TokenValCall('target')) + + if ( !body ) + throw new IllegalArgumentException("Missing script in the specified process block -- make sure it terminates with the script string to be executed") + + // -- apply settings from config file to process config + processConfig.applyConfig((Map)session.config.process, name, name, name) + + // -- get the executor for the given process config + final execObj = session.executorFactory.getExecutor(name, processConfig, body, session) + + // -- create processor class + new PublishTaskProcessor( name, execObj, session, session.script, processConfig, body, this ) + } + +} + +class PublishTaskProcessor extends TaskProcessor{ + + PublishOffloadManager manager + + PublishTaskProcessor(String name, Executor executor, Session session, BaseScript baseScript, ProcessConfig processConfig, BodyDef bodyDef, PublishOffloadManager manager) { + super(name, executor, session, baseScript, processConfig, bodyDef) + this.manager = manager + } + + @Override + void finalizeTask0(TaskRun task){ + final tuple = manager.runningPublications.remove(task) + session.notifyFilePublish((Path)tuple.get(0), (Path)tuple.get(1)) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 4b5fbf2791..8c0a8cddc6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -2422,7 +2422,7 @@ class TaskProcessor { * @param task The {@code TaskRun} instance to finalize * @param producedFiles The map of files to be bind the outputs */ - private void finalizeTask0( TaskRun task ) { + protected void finalizeTask0( TaskRun task ) { log.trace "Finalize process > ${safeTaskName(task)}" // -- bind output (files) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy index 498d50f41f..d9224ea3e0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy @@ -242,6 +242,7 @@ class ScriptRunner { protected run() { log.debug "> Launching execution" assert scriptParser, "Missing script instance to run" + session.startPublishOffloadManager() // -- launch the script execution scriptParser.runScript() // -- normalise output diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy new file mode 100644 index 0000000000..92c93178d3 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy @@ -0,0 +1,38 @@ +package nextflow.processor + +import nextflow.Session +import nextflow.script.BaseScript +import nextflow.script.ScriptBinding +import nextflow.script.ScriptFile +import spock.lang.Specification + +import test.TestHelper + + +class PublishOffloadManagerTest extends Specification { + + def 'should create task processor'(){ + given: + def session = new Session(); + def scriptBinding = new ScriptBinding(session: session) + def script = Stub(BaseScript) + script.getBinding() >> scriptBinding + def folder = TestHelper.createInMemTempDir() + def file = folder.resolve('pipeline.nf'); file.text = 'println "hello"' + def scriptFile = new ScriptFile(file) + session.init(scriptFile) + //session.start() + session.script = script; + def poManager = new PublishOffloadManager(session); + when: + poManager.init() + then: + poManager.copyProcessor != null + poManager.moveProcessor != null + cleanup: + session.classesDir?.deleteDir() + + + } + +} From 1f0dfec401391981e77d685b89eaf04fa091469b Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 17 Dec 2024 15:00:27 +0100 Subject: [PATCH 02/16] fix unit test Signed-off-by: jorgee --- .../src/main/groovy/nextflow/processor/PublishDir.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index c35d51dd9d..5bdd88bc0c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -510,14 +510,14 @@ class PublishDir { FilesEx.mklink(source, [hard:true], destination) } else if( mode == Mode.MOVE ) { - if ( session.getPublishOffloadManager().tryMoveOffload(source, destination) ){ + if ( session.getPublishOffloadManager()?.tryMoveOffload(source, destination) ){ return true } else { FileHelper.movePath(source, destination) } } else if( mode == Mode.COPY ) { - if ( session.getPublishOffloadManager().tryCopyOffload(source, destination) ){ + if ( session.getPublishOffloadManager()?.tryCopyOffload(source, destination) ){ return true } else { FileHelper.copyPath(source, destination) From ed27414ca8be5062444ab42ab29c749f37e622ef Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 17 Dec 2024 16:02:10 +0100 Subject: [PATCH 03/16] add integration test Signed-off-by: jorgee --- tests/checks/publish-offload.nf/.checks | 7 +++++++ tests/checks/publish-offload.nf/.config | 1 + tests/publish-offload.nf | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 tests/checks/publish-offload.nf/.checks create mode 100644 tests/checks/publish-offload.nf/.config create mode 100644 tests/publish-offload.nf diff --git a/tests/checks/publish-offload.nf/.checks b/tests/checks/publish-offload.nf/.checks new file mode 100644 index 0000000000..e53b74e04b --- /dev/null +++ b/tests/checks/publish-offload.nf/.checks @@ -0,0 +1,7 @@ +set -e +cp .config nextflow.config + +$NXF_RUN > stdout +[[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > gen_data'` == 1 ]] || false +[[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > publish_dir_copy_process'` == 1 ]] || false +[[ -f publishDir/chunk_1 ]] || false diff --git a/tests/checks/publish-offload.nf/.config b/tests/checks/publish-offload.nf/.config new file mode 100644 index 0000000000..1840185eb4 --- /dev/null +++ b/tests/checks/publish-offload.nf/.config @@ -0,0 +1 @@ +publishOffload = true diff --git a/tests/publish-offload.nf b/tests/publish-offload.nf new file mode 100644 index 0000000000..f9e682392d --- /dev/null +++ b/tests/publish-offload.nf @@ -0,0 +1,18 @@ +params.outdir = 'publishDir' + +process gen_data { + publishDir "${params.outdir}", mode: 'copy' + input: + val(i) + output: + path 'chunk_*' + + script: + """ + dd if=/dev/urandom of=chunk_$i count=1 bs=10M + """ +} + +workflow { + Channel.of(1) | gen_data +} From 5b9fe688e7b38b868390d45f740cd04ce38375f3 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 17 Dec 2024 18:22:47 +0100 Subject: [PATCH 04/16] add header Signed-off-by: jorgee --- .../processor/PublishOffloadManager.groovy | 16 ++++++++++++++++ .../processor/PublishOffloadManagerTest.groovy | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy index 3cdc58e415..9881e441b7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy @@ -1,3 +1,19 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package nextflow.processor import groovy.util.logging.Slf4j diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy index 92c93178d3..cde87e0ff3 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy @@ -1,3 +1,19 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package nextflow.processor import nextflow.Session From 0f394ec5513c42cdfc42e9b4d76a318d778cb61f Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Fri, 20 Dec 2024 11:48:14 +0100 Subject: [PATCH 05/16] Update modules/nextflow/src/main/groovy/nextflow/Session.groovy [skip ci] Co-authored-by: Paolo Di Tommaso Signed-off-by: Jorge Ejarque --- modules/nextflow/src/main/groovy/nextflow/Session.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 56835a9948..f2e93d6133 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -298,7 +298,7 @@ class Session implements ISession { private PublishOffloadManager publishOffloadManager - PublishOffloadManager getPublishOffloadManager() {publishOffloadManager} + PublishOffloadManager getPublishOffloadManager() { publishOffloadManager } /** * Creates a new session with an 'empty' (default) configuration From 832834f6ffdcdef2af201f9f0d6a6f4a263d5fc2 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 20 Dec 2024 20:13:27 +0100 Subject: [PATCH 06/16] group copies and retries Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 20 ++- .../nextflow/processor/PublishDir.groovy | 4 +- .../processor/PublishOffloadManager.groovy | 159 +++++++++++++----- .../nextflow/processor/copy-group-template.sh | 72 ++++++++ .../PublishOffloadManagerTest.groovy | 3 +- tests/checks/publish-offload.nf/.config | 2 +- 6 files changed, 205 insertions(+), 55 deletions(-) create mode 100644 modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index f2e93d6133..6c5485f6a0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -294,7 +294,7 @@ class Session implements ISession { FilePorter getFilePorter() { filePorter } - boolean publishOffload + int publishOffloadBatchSize private PublishOffloadManager publishOffloadManager @@ -401,9 +401,9 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) - this.publishOffload = config.publishOffload as boolean + this.publishOffloadBatchSize = config.publishOffloadBatchSize ? config.publishOffloadBatchSize as int : 0 - if ( this.publishOffload ) { + if ( this.publishOffloadBatchSize ) { // -- publish offload manager config log.warn("Publish offload flag enabled. Creating Offload Manager") this.publishOffloadManager = new PublishOffloadManager(this) @@ -412,7 +412,7 @@ class Session implements ISession { } void startPublishOffloadManager() { - if ( this.publishOffload ) { + if ( this.publishOffloadBatchSize ) { log.debug("Starting Publish offload manager") this.publishOffloadManager?.init() } @@ -698,13 +698,19 @@ class Session implements ISession { log.debug "Session await" processesBarrier.awaitCompletion() log.debug "Session await > all processes finished" - terminated = true - monitorsBarrier.awaitCompletion() - log.debug "Session await > all barriers passed" + + if( !aborted ) { joinAllOperators() log.trace "Session > all operators finished" } + // shutdown t + publishPoolManager?.shutdown(false) + publishOffloadManager?.close() + log.debug "Session await > Publish phase finished" + terminated = true + monitorsBarrier.awaitCompletion() + log.debug "Session await > all barriers passed" } void destroy() { diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 5bdd88bc0c..24284a7806 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -510,14 +510,14 @@ class PublishDir { FilesEx.mklink(source, [hard:true], destination) } else if( mode == Mode.MOVE ) { - if ( session.getPublishOffloadManager()?.tryMoveOffload(source, destination) ){ + if ( session.getPublishOffloadManager()?.tryMoveOffload(source, destination, retryConfig, failOnError) ){ return true } else { FileHelper.movePath(source, destination) } } else if( mode == Mode.COPY ) { - if ( session.getPublishOffloadManager()?.tryCopyOffload(source, destination) ){ + if ( session.getPublishOffloadManager()?.tryCopyOffload(source, destination, retryConfig, failOnError) ){ return true } else { FileHelper.copyPath(source, destination) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy index 9881e441b7..01aed46d6c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy @@ -16,6 +16,7 @@ package nextflow.processor +import groovy.transform.PackageScope import groovy.util.logging.Slf4j import nextflow.Nextflow import nextflow.Session @@ -26,52 +27,65 @@ import nextflow.fusion.FusionHelper import nextflow.script.BaseScript import nextflow.script.BodyDef import nextflow.script.ProcessConfig -import nextflow.script.TokenValCall import nextflow.util.ArrayTuple import java.nio.file.Path +import java.util.concurrent.atomic.AtomicInteger @Slf4j class PublishOffloadManager { - Map runningPublications= new HashMap(10) + Map runningPublications= new HashMap(10) static final Map SUPPORTED_SCHEMES = [awsbatch:['s3'], local:['file']] static final String S5CMD_CONTAINER = 'jorgeejarquea/s5cmd_aws:0.0.1' - Session session - PublishTaskProcessor copyProcessor - PublishTaskProcessor moveProcessor + static final String PUBLISH_FUNCTION = 'nxf_publish' + private Session session + private PublishTaskProcessor publishProcessor + private List commands = new LinkedList(); + private boolean closed = false; + /** + * Unique offloaded index number + */ + final protected AtomicInteger indexCount = new AtomicInteger() PublishOffloadManager(Session session) { - this.session = session + this.session = session; } + @PackageScope + TaskProcessor getPublishProcessor(){ publishProcessor } void init(){ - - if (useS5cmd()){ - this.copyProcessor = createProcessor( "publish_dir_copy_process", new BodyDef({"s5cmd cp $source $target"},'copy data process') ) - this.moveProcessor = createProcessor( "publish_dir_move_process", new BodyDef({"s5cmd mv $source $target"},'move data process') ) - } else { - this.copyProcessor = createProcessor( "publish_dir_copy_process", new BodyDef({"cp $source $target"},'copy data process') ) - this.moveProcessor = createProcessor( "publish_dir_move_process", new BodyDef({"mv $source $target"},'move data process') ) - } + //Try with template + BodyDef body = new BodyDef({ + def file = PublishOffloadManager.class.getResource('copy-group-template.sh') + return file.text + },'publish file process', 'shell') + this.publishProcessor = createProcessor( "publish_process", body ) } private boolean checkOffload(Path source, Path destination, String executor){ - return session.publishOffload && source.scheme in SUPPORTED_SCHEMES[executor] && destination.scheme in SUPPORTED_SCHEMES[executor]; - } - - private synchronized boolean tryInvokeProcessor(TaskProcessor processor, Path origin, Path destination){ - if (checkOffload(origin, destination, processor.executor.name)) { - final params = new TaskStartParams(TaskId.next(), processor.indexCount.incrementAndGet()) - final values = new ArrayList(1) - log.debug("Creating task for file publication: ${origin.toUri().toString()} -> ${destination.toUri().toString()} " ) - values[0] = generateFileValues(origin, destination) - final args = new ArrayList(2) - args[0] = params - args[1] = values - assert args.size() == 2 - processor.invokeTask(args.toArray()) - runningPublications.put(processor.currentTask.get(), Nextflow.tuple(origin, destination)) + return session.publishOffloadBatchSize > 0 && source.scheme in SUPPORTED_SCHEMES[executor] && destination.scheme in SUPPORTED_SCHEMES[executor]; + } + + private void invokeProcessor(inputValue) { + final params = new TaskStartParams(TaskId.next(), publishProcessor.indexCount.incrementAndGet()) + final values = new ArrayList(1) + values[0] = inputValue + final args = new ArrayList(2) + args[0] = params + args[1] = values + publishProcessor.invokeTask(args.toArray()) + } + + private synchronized boolean tryOffload(String command, Path origin, Path destination, PublishRetryConfig retryConfig, boolean failonError){ + if (checkOffload(origin, destination, publishProcessor.executor.name)) { + final id = indexCount.incrementAndGet() + runningPublications.put(id, Nextflow.tuple(origin, destination, failonError)) + commands.add(generateExecutionCommand(id, command, origin, destination, retryConfig)) + if (commands.size() == session.publishOffloadBatchSize){ + invokeProcessor(commands.join(";")) + commands.clear() + } return true } return false @@ -85,20 +99,33 @@ class PublishOffloadManager { return ( (!isFusionEnabled()) && (ExecutorFactory.getDefaultExecutorName(session) == 'awsbatch') ) } - private ArrayTuple generateFileValues(Path origin, Path destination){ - if ( isFusionEnabled() ){ - Nextflow.tuple(FusionHelper.toContainerMount(origin), FusionHelper.toContainerMount(destination)) + private String generateExecutionCommand(Integer id, String command, Path origin, Path destination, PublishRetryConfig retryConfig){ + return "$PUBLISH_FUNCTION ${retryConfig.maxAttempts} ${retryConfig.delay.toMillis()} ${retryConfig.jitter} ${retryConfig.maxDelay.toMillis()} " + + "$id $command ${convertFilePath(origin)} ${convertFilePath(destination)}" + } + + private String convertFilePath(Path path) { + if (isFusionEnabled()) { + return FusionHelper.toContainerMount(path) } else { - Nextflow.tuple(FilesEx.toUriString(origin), FilesEx.toUriString(destination)) + return FilesEx.toUriString(path) } } - boolean tryMoveOffload(Path origin, Path destination) { - tryInvokeProcessor(moveProcessor, origin, destination) + boolean tryMoveOffload(Path origin, Path destination, PublishRetryConfig retryConfig, boolean failonError) { + String command = 'mv' + if ( useS5cmd() ) { + command = 's5cmd mv' + } + tryOffload(command, origin, destination, retryConfig, failonError) } - boolean tryCopyOffload(Path origin, Path destination) { - tryInvokeProcessor(copyProcessor, origin, destination) + boolean tryCopyOffload(Path origin, Path destination, PublishRetryConfig retryConfig, boolean failonError) { + String command = 'cp' + if ( useS5cmd() ) { + command = 's5cmd cp' + } + tryOffload(command, origin, destination, retryConfig, failonError) } private PublishTaskProcessor createProcessor( String name, BodyDef body){ @@ -112,8 +139,8 @@ class PublishOffloadManager { if (useS5cmd()) { processConfig.put('container', S5CMD_CONTAINER); } - processConfig._in_tuple(new TokenValCall('source'), new TokenValCall('target')) - + processConfig._in_val('executions') + processConfig._out_stdout() if ( !body ) throw new IllegalArgumentException("Missing script in the specified process block -- make sure it terminates with the script string to be executed") @@ -127,10 +154,18 @@ class PublishOffloadManager { new PublishTaskProcessor( name, execObj, session, session.script, processConfig, body, this ) } + synchronized void close() { + closed=true + if ( commands.size() ){ + invokeProcessor(commands.join(";")) + commands.clear() + } + + } } +@Slf4j class PublishTaskProcessor extends TaskProcessor{ - PublishOffloadManager manager PublishTaskProcessor(String name, Executor executor, Session session, BaseScript baseScript, ProcessConfig processConfig, BodyDef bodyDef, PublishOffloadManager manager) { @@ -139,8 +174,46 @@ class PublishTaskProcessor extends TaskProcessor{ } @Override - void finalizeTask0(TaskRun task){ - final tuple = manager.runningPublications.remove(task) - session.notifyFilePublish((Path)tuple.get(0), (Path)tuple.get(1)) + void finalizeTask0(TaskRun task) { + if( task.outputs.size() == 1 ){ + def value = task.outputs.values().first() + value = value instanceof Path ? value.text : value?.toString() + for ( String finishedCopy : value.split('\n') ){ + final result = finishedCopy.split(":") + if (result.size() == 2) { + final id = result[0] as Integer + final tuple = manager.runningPublications.remove(id) + final exitcode = result[1] as Integer + if( exitcode == 0 ){ + session.notifyFilePublish((Path) tuple.get(0), (Path) tuple.get(1)) + } else { + if (tuple.get(2) as Boolean) { + log.error("Publication of file ${tuple.get(0)} -> ${tuple.get(1)} failed.") + } else { + log.warn("Publication of file ${tuple.get(0)} -> ${tuple.get(1)} failed.") + } + printPublishTaskError(task) + } + } + } + } else { + log.error("Incorrect number of outputs in the publish task"); + } + } + + private void printPublishTaskError(TaskRun task){ + final List message = [] + final max = 50 + final lines = task.dumpStderr(max) + message << "Executed publish task:" + if( lines ) { + message << "\nCommand error:" + for( String it : lines ) { + message << " ${stripWorkDir(it, task.workDir)}" + } + } + if( task?.workDir ) + message << "\nWork dir:\n ${task.workDirStr}" + log.debug(message.join('\n')) } } diff --git a/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh b/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh new file mode 100644 index 0000000000..e9ba4d45cb --- /dev/null +++ b/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh @@ -0,0 +1,72 @@ +## +## Copyright 2013-2024, Seqera Labs +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +set +e +export LC_NUMERIC=C + +nxf_publish() { + local max_retries=$1 + local delay_ms=$2 + local jitter_factor=$3 + local max_delay_ms=$4 + local id=$5 + shift 5 + local attempt=1 + + while true; do + "$@" && echo $id:0 && return # Run the command; if it succeeds, exit the function + + if (( attempt >= max_retries )); then + echo "Command failed after $attempt attempts." >&2 + echo $id:1 + return + fi + + # Calculate jitter as a factor of the delay + local min_factor=$(bc <<< "1 - $jitter_factor") + local max_factor=$(bc <<< "1 + $jitter_factor") + local random_factor=$(awk -v min="$min_factor" -v max="$max_factor" 'BEGIN{srand(); print min + (max-min)*rand()}') + + # Apply the jitter factor to the delay + local jittered_delay_ms=$(bc <<< "$delay_ms * $random_factor") + + # Ensure the delay does not exceed the maximum delay + if (( $(bc <<< "$jittered_delay_ms > $max_delay_ms") == 1 )); then + jittered_delay_ms=$max_delay_ms + fi + + # Convert milliseconds to seconds for sleep + local total_delay_sec=$(bc <<< "scale=3; $jittered_delay_ms / 1000") + + echo "Command failed. Retrying in $total_delay_sec seconds..." >&2 + sleep "$total_delay_sec" + + # Increment the delay with exponential backoff, clamped to max_delay_ms + delay_ms=$((delay_ms * 2)) + if (( delay_ms > max_delay_ms )); then + delay_ms=$max_delay_ms + fi + + ((attempt++)) + done +} + +IFS=$';' +publications=('!{executions}') +for publication in "${publications[@]}"; do + eval $publication & +done +unset IFS +wait \ No newline at end of file diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy index cde87e0ff3..2083df8389 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy @@ -43,8 +43,7 @@ class PublishOffloadManagerTest extends Specification { when: poManager.init() then: - poManager.copyProcessor != null - poManager.moveProcessor != null + poManager.publishProcessor != null cleanup: session.classesDir?.deleteDir() diff --git a/tests/checks/publish-offload.nf/.config b/tests/checks/publish-offload.nf/.config index 1840185eb4..44d46055e9 100644 --- a/tests/checks/publish-offload.nf/.config +++ b/tests/checks/publish-offload.nf/.config @@ -1 +1 @@ -publishOffload = true +publishOffloadBatchSize = 2 From ac04d04d615dc4518c69b39bdd1dc9fd5eaa9cc2 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 20 Dec 2024 20:24:02 +0100 Subject: [PATCH 07/16] fix minor issue Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 4 +-- .../processor/PublishOffloadManager.groovy | 28 ++++++++++--------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 6c5485f6a0..908faf9591 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -294,7 +294,7 @@ class Session implements ISession { FilePorter getFilePorter() { filePorter } - int publishOffloadBatchSize + private int publishOffloadBatchSize private PublishOffloadManager publishOffloadManager @@ -406,7 +406,7 @@ class Session implements ISession { if ( this.publishOffloadBatchSize ) { // -- publish offload manager config log.warn("Publish offload flag enabled. Creating Offload Manager") - this.publishOffloadManager = new PublishOffloadManager(this) + this.publishOffloadManager = new PublishOffloadManager(this, publishOffloadBatchSize) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy index 01aed46d6c..ff9cdf5e25 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy @@ -40,15 +40,17 @@ class PublishOffloadManager { static final String PUBLISH_FUNCTION = 'nxf_publish' private Session session private PublishTaskProcessor publishProcessor - private List commands = new LinkedList(); - private boolean closed = false; + private List commands = new LinkedList() + private boolean closed = false + private int batchSize /** * Unique offloaded index number */ final protected AtomicInteger indexCount = new AtomicInteger() - PublishOffloadManager(Session session) { + PublishOffloadManager(Session session, int batchSize) { this.session = session; + this.batchSize = batchSize; } @PackageScope TaskProcessor getPublishProcessor(){ publishProcessor } @@ -64,7 +66,7 @@ class PublishOffloadManager { } private boolean checkOffload(Path source, Path destination, String executor){ - return session.publishOffloadBatchSize > 0 && source.scheme in SUPPORTED_SCHEMES[executor] && destination.scheme in SUPPORTED_SCHEMES[executor]; + return this.batchSize > 0 && source.scheme in SUPPORTED_SCHEMES[executor] && destination.scheme in SUPPORTED_SCHEMES[executor]; } private void invokeProcessor(inputValue) { @@ -77,12 +79,12 @@ class PublishOffloadManager { publishProcessor.invokeTask(args.toArray()) } - private synchronized boolean tryOffload(String command, Path origin, Path destination, PublishRetryConfig retryConfig, boolean failonError){ + private synchronized boolean tryOffload(String command, Path origin, Path destination, PublishRetryConfig retryConfig, boolean failOnError){ if (checkOffload(origin, destination, publishProcessor.executor.name)) { final id = indexCount.incrementAndGet() - runningPublications.put(id, Nextflow.tuple(origin, destination, failonError)) + runningPublications.put(id, Nextflow.tuple(origin, destination, failOnError)) commands.add(generateExecutionCommand(id, command, origin, destination, retryConfig)) - if (commands.size() == session.publishOffloadBatchSize){ + if (commands.size() == this.batchSize){ invokeProcessor(commands.join(";")) commands.clear() } @@ -112,20 +114,20 @@ class PublishOffloadManager { } } - boolean tryMoveOffload(Path origin, Path destination, PublishRetryConfig retryConfig, boolean failonError) { + boolean tryMoveOffload(Path origin, Path destination, PublishRetryConfig retryConfig, boolean failOnError) { String command = 'mv' if ( useS5cmd() ) { command = 's5cmd mv' } - tryOffload(command, origin, destination, retryConfig, failonError) + tryOffload(command, origin, destination, retryConfig, failOnError) } - boolean tryCopyOffload(Path origin, Path destination, PublishRetryConfig retryConfig, boolean failonError) { + boolean tryCopyOffload(Path origin, Path destination, PublishRetryConfig retryConfig, boolean failOnError) { String command = 'cp' if ( useS5cmd() ) { command = 's5cmd cp' } - tryOffload(command, origin, destination, retryConfig, failonError) + tryOffload(command, origin, destination, retryConfig, failOnError) } private PublishTaskProcessor createProcessor( String name, BodyDef body){ @@ -183,8 +185,8 @@ class PublishTaskProcessor extends TaskProcessor{ if (result.size() == 2) { final id = result[0] as Integer final tuple = manager.runningPublications.remove(id) - final exitcode = result[1] as Integer - if( exitcode == 0 ){ + final exitCode = result[1] as Integer + if( exitCode == 0 ){ session.notifyFilePublish((Path) tuple.get(0), (Path) tuple.get(1)) } else { if (tuple.get(2) as Boolean) { From 9ac038fd65981878d300fabf71e21c9def029c94 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 20 Dec 2024 20:42:38 +0100 Subject: [PATCH 08/16] fix unit test Signed-off-by: jorgee --- .../groovy/nextflow/processor/PublishOffloadManagerTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy index 2083df8389..7af426409d 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy @@ -39,7 +39,7 @@ class PublishOffloadManagerTest extends Specification { session.init(scriptFile) //session.start() session.script = script; - def poManager = new PublishOffloadManager(session); + def poManager = new PublishOffloadManager(session, 2); when: poManager.init() then: From 2c5c185481ae669c6f9615f0f219993bcba9506e Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 20 Dec 2024 22:36:30 +0100 Subject: [PATCH 09/16] fix integration test Signed-off-by: jorgee --- tests/checks/publish-offload.nf/.checks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/checks/publish-offload.nf/.checks b/tests/checks/publish-offload.nf/.checks index e53b74e04b..c40a2ea993 100644 --- a/tests/checks/publish-offload.nf/.checks +++ b/tests/checks/publish-offload.nf/.checks @@ -3,5 +3,5 @@ cp .config nextflow.config $NXF_RUN > stdout [[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > gen_data'` == 1 ]] || false -[[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > publish_dir_copy_process'` == 1 ]] || false +[[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > publish_process'` == 1 ]] || false [[ -f publishDir/chunk_1 ]] || false From 06e61d02a73f2dd488584d194282abc29da2e3aa Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 7 Jan 2025 12:51:48 +0100 Subject: [PATCH 10/16] change bc by awk in copy process Signed-off-by: jorgee --- .../nextflow/processor/copy-group-template.sh | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh b/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh index e9ba4d45cb..234781579d 100644 --- a/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh +++ b/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh @@ -34,22 +34,11 @@ nxf_publish() { return fi - # Calculate jitter as a factor of the delay - local min_factor=$(bc <<< "1 - $jitter_factor") - local max_factor=$(bc <<< "1 + $jitter_factor") - local random_factor=$(awk -v min="$min_factor" -v max="$max_factor" 'BEGIN{srand(); print min + (max-min)*rand()}') - - # Apply the jitter factor to the delay - local jittered_delay_ms=$(bc <<< "$delay_ms * $random_factor") - - # Ensure the delay does not exceed the maximum delay - if (( $(bc <<< "$jittered_delay_ms > $max_delay_ms") == 1 )); then - jittered_delay_ms=$max_delay_ms - fi - - # Convert milliseconds to seconds for sleep - local total_delay_sec=$(bc <<< "scale=3; $jittered_delay_ms / 1000") + # Calculate jitter delay + local jittered_delay_ms=$(awk -v delay="$delay_ms" -v jitter="$jitter_factor" 'BEGIN { srand(); print ( 1 - jitter + (2 * jitter) * rand() ) * delay }') + # Ensure the delay does not exceed the maximum delay and convert to seconds for sleep + local total_delay_sec=$(awk -v delay="$jittered_delay_ms" -v max="$max_delay_ms" 'BEGIN { if (delay > max) delay = max; print delay / 1000 }') echo "Command failed. Retrying in $total_delay_sec seconds..." >&2 sleep "$total_delay_sec" @@ -63,10 +52,9 @@ nxf_publish() { done } -IFS=$';' -publications=('!{executions}') +commands="!{executions}" +IFS=';' read -r -a publications <<< "$commands" for publication in "${publications[@]}"; do eval $publication & done -unset IFS wait \ No newline at end of file From 8467947b3f5756ce82caf0ccaca2c9c62d168fba Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 14 Jan 2025 11:14:26 +0100 Subject: [PATCH 11/16] include publish offload config Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 19 +++++---- .../processor/PublishOffloadManager.groovy | 39 ++++++++++++------- .../nextflow/processor/copy-group-template.sh | 34 ++++++++-------- 3 files changed, 54 insertions(+), 38 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 4314eb50c0..5c45ce15a5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -16,6 +16,7 @@ package nextflow +import nextflow.processor.PublishOffloadConfig import nextflow.processor.PublishOffloadManager import java.nio.file.Files @@ -294,7 +295,7 @@ class Session implements ISession { FilePorter getFilePorter() { filePorter } - private int publishOffloadBatchSize + private boolean publishOffload private PublishOffloadManager publishOffloadManager @@ -401,18 +402,22 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) - this.publishOffloadBatchSize = config.publishOffloadBatchSize ? config.publishOffloadBatchSize as int : 0 + // -- Configuration for offloading workflow output publication + this.publishOffload = config.publishOffload ? loadPublishOffload(config.publishOffload as Map) : false - if ( this.publishOffloadBatchSize ) { - // -- publish offload manager config + } + + private boolean loadPublishOffload(Map config){ + final poConfig = new PublishOffloadConfig(config) + if (poConfig.enable){ log.warn("Publish offload flag enabled. Creating Offload Manager") - this.publishOffloadManager = new PublishOffloadManager(this, publishOffloadBatchSize) + this.publishOffloadManager = new PublishOffloadManager(this, poConfig) } - + return poConfig.enable } void startPublishOffloadManager() { - if ( this.publishOffloadBatchSize ) { + if ( this.publishOffload ) { log.debug("Starting Publish offload manager") this.publishOffloadManager?.init() } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy index ff9cdf5e25..b081a0695e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy @@ -31,7 +31,11 @@ import nextflow.util.ArrayTuple import java.nio.file.Path import java.util.concurrent.atomic.AtomicInteger - +/** + * Manages the offload of publishing outputs to Nextflow tasks + * + * @author Jorge Ejarque + */ @Slf4j class PublishOffloadManager { Map runningPublications= new HashMap(10) @@ -42,37 +46,41 @@ class PublishOffloadManager { private PublishTaskProcessor publishProcessor private List commands = new LinkedList() private boolean closed = false - private int batchSize + private PublishOffloadConfig config /** * Unique offloaded index number */ final protected AtomicInteger indexCount = new AtomicInteger() - PublishOffloadManager(Session session, int batchSize) { - this.session = session; - this.batchSize = batchSize; + PublishOffloadManager(Session session, PublishOffloadConfig config) { + this.session = session + this.config = config } @PackageScope TaskProcessor getPublishProcessor(){ publishProcessor } - void init(){ + synchronized void init(){ //Try with template BodyDef body = new BodyDef({ def file = PublishOffloadManager.class.getResource('copy-group-template.sh') - return file.text - },'publish file process', 'shell') + final engine = new TaskTemplateEngine() + engine.setPlaceholder('!' as char) + engine.eval(file.text, delegate) + return engine.result + },'publish file process', 'script') this.publishProcessor = createProcessor( "publish_process", body ) - + log.debug("Publish Offload Manager initialized.") } private boolean checkOffload(Path source, Path destination, String executor){ - return this.batchSize > 0 && source.scheme in SUPPORTED_SCHEMES[executor] && destination.scheme in SUPPORTED_SCHEMES[executor]; + return this.config.enable && source.scheme in SUPPORTED_SCHEMES[executor] && destination.scheme in SUPPORTED_SCHEMES[executor] } private void invokeProcessor(inputValue) { final params = new TaskStartParams(TaskId.next(), publishProcessor.indexCount.incrementAndGet()) - final values = new ArrayList(1) + final values = new ArrayList(2) values[0] = inputValue + values[1] = this.config.maxParallel final args = new ArrayList(2) args[0] = params args[1] = values @@ -84,7 +92,7 @@ class PublishOffloadManager { final id = indexCount.incrementAndGet() runningPublications.put(id, Nextflow.tuple(origin, destination, failOnError)) commands.add(generateExecutionCommand(id, command, origin, destination, retryConfig)) - if (commands.size() == this.batchSize){ + if (commands.size() == this.config.batchSize){ invokeProcessor(commands.join(";")) commands.clear() } @@ -94,7 +102,7 @@ class PublishOffloadManager { } private isFusionEnabled(){ - return FusionHelper.isFusionEnabled(session) + return FusionHelper.isFusionEnabled(session) && config.useFusion } private useS5cmd(){ @@ -139,9 +147,10 @@ class PublishOffloadManager { // Invoke the code block which will return the script closure to the executed. // As side effect will set all the property declarations in the 'taskConfig' object. if (useS5cmd()) { - processConfig.put('container', S5CMD_CONTAINER); + processConfig.put('container', S5CMD_CONTAINER) } processConfig._in_val('executions') + processConfig._in_val('max_parallel') processConfig._out_stdout() if ( !body ) throw new IllegalArgumentException("Missing script in the specified process block -- make sure it terminates with the script string to be executed") @@ -199,7 +208,7 @@ class PublishTaskProcessor extends TaskProcessor{ } } } else { - log.error("Incorrect number of outputs in the publish task"); + log.error("Incorrect number of outputs in the publish task") } } diff --git a/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh b/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh index 234781579d..0d31be496b 100644 --- a/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh +++ b/modules/nextflow/src/main/resources/nextflow/processor/copy-group-template.sh @@ -1,18 +1,4 @@ -## -## Copyright 2013-2024, Seqera Labs -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## + set +e export LC_NUMERIC=C @@ -52,9 +38,25 @@ nxf_publish() { done } +check_max_parallel_copies() { + while ((${#pid[@]}>=${max_parallel_copies})); do + sleep 0.2 2>/dev/null + local copy=() + for x in "${pid[@]}"; do + # if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code + [[ -e /proc/$x ]] && copy+=($x) || wait $x + done + pid=("${copy[@]}") + done +} + commands="!{executions}" +max_parallel_copies="!{max_parallel}" +pid=() IFS=';' read -r -a publications <<< "$commands" for publication in "${publications[@]}"; do - eval $publication & + check_max_parallel_copies + eval $publication & + pid+=($!) done wait \ No newline at end of file From 818d168fa0eb65872bc4fc03ef45d440f5caec28 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 14 Jan 2025 15:49:42 +0100 Subject: [PATCH 12/16] add publish offload config classs Signed-off-by: jorgee --- .../processor/PublishOffloadConfig.groovy | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadConfig.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadConfig.groovy new file mode 100644 index 0000000000..a0465acc30 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadConfig.groovy @@ -0,0 +1,66 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.processor + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.util.MemoryUnit + +/** + * Models the offload configuration for publishing outputs + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class PublishOffloadConfig { + boolean enable = false + boolean useFusion = false + int batchSize = 10 + int maxParallel = 10 + + PublishOffloadConfig( Map config){ + if( config.enable ){ + enable = config.enable as boolean + } + if( config.useFusion ){ + useFusion = config.useFusion as boolean + } + if( config.batchSize ){ + batchSize = config.batchSize as int + if (batchSize < 0){ + log.warn("Publish offload 'batchSize' property must be bigger than 0. Setting 'batchSize' to 1") + batchSize = 1 + } + } + if( config.maxParallel ){ + maxParallel = config.maxParallel as int + if (maxParallel < 0){ + log.warn("Publish offload 'maxParallel' property must be bigger than 0. Setting 'maxParallel' to 1") + maxParallel = 1 + } + }else{ + // if no maxParallel specified set equal to batchSize + maxParallel = batchSize + } + } + + + + +} From 8d078901c65dc6e69350114607c48e6952a9b5b3 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 15 Jan 2025 10:11:35 +0100 Subject: [PATCH 13/16] Adding tests for configuration, updating publish offload validation test configuration Signed-off-by: jorgee --- .../processor/PublishOffloadConfigTest.groovy | 85 +++++++++++++++++++ .../PublishOffloadManagerTest.groovy | 3 +- tests/checks/publish-offload.nf/.config | 6 +- 3 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadConfigTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadConfigTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadConfigTest.groovy new file mode 100644 index 0000000000..63b3359640 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadConfigTest.groovy @@ -0,0 +1,85 @@ +package nextflow.processor + +import spock.lang.Specification + +class PublishOffloadConfigTest extends Specification { + + def "should initialize with default values when config is empty"() { + given: + Map config = [:] + + when: + def publishOffloadConfig = new PublishOffloadConfig(config) + + then: + publishOffloadConfig.enable == false + publishOffloadConfig.useFusion == false + publishOffloadConfig.batchSize == 10 + publishOffloadConfig.maxParallel == 10 + } + + def "should set values correctly based on the config map"() { + given: + Map config = [ + enable : true, + useFusion : true, + batchSize : 20, + maxParallel: 15 + ] + + when: + def publishOffloadConfig = new PublishOffloadConfig(config) + + then: + publishOffloadConfig.enable == true + publishOffloadConfig.useFusion == true + publishOffloadConfig.batchSize == 20 + publishOffloadConfig.maxParallel == 15 + } + + def "should enforce batchSize and maxParallel minimum value of 1"() { + given: + Map config = [ + batchSize : -10, + maxParallel: -5 + ] + + when: + def publishOffloadConfig = new PublishOffloadConfig(config) + + then: + publishOffloadConfig.batchSize == 1 + publishOffloadConfig.maxParallel == 1 + } + + def "should set maxParallel to batchSize if maxParallel is not specified"() { + given: + Map config = [ + batchSize: 25 + ] + + when: + def publishOffloadConfig = new PublishOffloadConfig(config) + + then: "maxParallel is set equal to batchSize" + publishOffloadConfig.batchSize == 25 + publishOffloadConfig.maxParallel == 25 + } + + def "should respect values for optional parameters when explicitly set in the config map"() { + given: + Map config = [ + enable : false, + useFusion: true + ] + + when: + def publishOffloadConfig = new PublishOffloadConfig(config) + + then: + publishOffloadConfig.enable == false + publishOffloadConfig.useFusion == true + publishOffloadConfig.batchSize == 10 // default value + publishOffloadConfig.maxParallel == 10 // default value + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy index 7af426409d..e7a851efe1 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/PublishOffloadManagerTest.groovy @@ -39,7 +39,8 @@ class PublishOffloadManagerTest extends Specification { session.init(scriptFile) //session.start() session.script = script; - def poManager = new PublishOffloadManager(session, 2); + def poConfig = new PublishOffloadConfig([enable: true, batchSize: 2]) + def poManager = new PublishOffloadManager(session, poConfig); when: poManager.init() then: diff --git a/tests/checks/publish-offload.nf/.config b/tests/checks/publish-offload.nf/.config index 44d46055e9..005be38509 100644 --- a/tests/checks/publish-offload.nf/.config +++ b/tests/checks/publish-offload.nf/.config @@ -1 +1,5 @@ -publishOffloadBatchSize = 2 +publishOffload { + enable = true + batchSize = 2 + max_parallel = 2 +} From 9e600041c65d1483b95066f5680bc5489a860069 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 15 Jan 2025 14:24:07 +0100 Subject: [PATCH 14/16] Update documentation with publishOffload options Signed-off-by: jorgee --- docs/reference/config.md | 31 +++++++++++++++++++ .../processor/PublishOffloadManager.groovy | 4 +-- tests/checks/publish-offload.nf/.checks | 2 +- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/docs/reference/config.md b/docs/reference/config.md index 0efb1ce6ae..b0398cba51 100644 --- a/docs/reference/config.md +++ b/docs/reference/config.md @@ -1300,6 +1300,37 @@ The following settings are available: Read the {ref}`container-podman` page to learn more about how to use Podman containers with Nextflow. +(config-publishOffload)= + +## `publishOffload` + +The `publishOffload` scope allows you to configure the offload of the publication of workflow output as remote tasks. An internal `offload:publish_process` is created to offload the file operation (copies and moves) required to publish the output files. This functionality is currently available for 's3' files with the 'awsbatch' executor. File operations can be maneged with `s5cmd` (default option) of Fusion. + +The following settings are available: + +`publishOffload.enable` +: If `true` enables the offload of workflow outputs publication. (Default: `false`) + +`publishOffload.batchSize` +: Indicates the number of files operations in each `publish_process` execution. (Default: 10) + +`publishOffload.maxParallel` +: Indicates the maximum number of concurrent files operations in each `publish_process` execution. If `maxParallel` is not specified, it gets the value of `batchSize`. + +`publishOffload.useFusion` +: Enables the use of Fusion to manage the file operations. (Default: `false`) + +You can also control the resources assigned to the `publish_process` defining a process selector such as the following: + +```groovy +process { + withName: publish_process { + cpus = 4 + memory = 8.GB + } +} +``` + (config-report)= ## `report` diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy index b081a0695e..4b43449a48 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishOffloadManager.groovy @@ -68,7 +68,7 @@ class PublishOffloadManager { engine.eval(file.text, delegate) return engine.result },'publish file process', 'script') - this.publishProcessor = createProcessor( "publish_process", body ) + this.publishProcessor = createProcessor( "offload:publish_process", body ) log.debug("Publish Offload Manager initialized.") } @@ -156,7 +156,7 @@ class PublishOffloadManager { throw new IllegalArgumentException("Missing script in the specified process block -- make sure it terminates with the script string to be executed") // -- apply settings from config file to process config - processConfig.applyConfig((Map)session.config.process, name, name, name) + processConfig.applyConfig((Map)session.config.process, 'publish_process', name, name) // -- get the executor for the given process config final execObj = session.executorFactory.getExecutor(name, processConfig, body, session) diff --git a/tests/checks/publish-offload.nf/.checks b/tests/checks/publish-offload.nf/.checks index c40a2ea993..62a11e5d2a 100644 --- a/tests/checks/publish-offload.nf/.checks +++ b/tests/checks/publish-offload.nf/.checks @@ -3,5 +3,5 @@ cp .config nextflow.config $NXF_RUN > stdout [[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > gen_data'` == 1 ]] || false -[[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > publish_process'` == 1 ]] || false +[[ `grep 'INFO' .nextflow.log | grep -c 'Submitted process > offload:publish_process'` == 1 ]] || false [[ -f publishDir/chunk_1 ]] || false From 17e23e9d0c9576138293f1081dab214507cc121c Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 15 Jan 2025 16:13:18 +0100 Subject: [PATCH 15/16] update documentation Signed-off-by: jorgee --- docs/reference/config.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/reference/config.md b/docs/reference/config.md index b0398cba51..a871e54786 100644 --- a/docs/reference/config.md +++ b/docs/reference/config.md @@ -1303,9 +1303,14 @@ Read the {ref}`container-podman` page to learn more about how to use Podman cont (config-publishOffload)= ## `publishOffload` - +:::{versionadded} 24.11.0-edge +::: The `publishOffload` scope allows you to configure the offload of the publication of workflow output as remote tasks. An internal `offload:publish_process` is created to offload the file operation (copies and moves) required to publish the output files. This functionality is currently available for 's3' files with the 'awsbatch' executor. File operations can be maneged with `s5cmd` (default option) of Fusion. +:::{note} +To avoid error in the offloaded tasks, be sure that the `InstanceRole` of your AWS Batch Compute Environment has read and write permissions in the bucket used to publish the files. +::: + The following settings are available: `publishOffload.enable` @@ -1325,7 +1330,7 @@ You can also control the resources assigned to the `publish_process` defining a ```groovy process { withName: publish_process { - cpus = 4 + cpus = 4: memory = 8.GB } } From 3d50df02df73809463766eabb9a227e04277c0ea Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 15 Jan 2025 16:14:15 +0100 Subject: [PATCH 16/16] update documentation Signed-off-by: jorgee --- docs/reference/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/config.md b/docs/reference/config.md index a871e54786..59cc32884f 100644 --- a/docs/reference/config.md +++ b/docs/reference/config.md @@ -1330,7 +1330,7 @@ You can also control the resources assigned to the `publish_process` defining a ```groovy process { withName: publish_process { - cpus = 4: + cpus = 4 memory = 8.GB } }