Skip to content

Commit

Permalink
Merge pull request #1 from Aratz/main
Browse files Browse the repository at this point in the history
Implement first version of pipeline
  • Loading branch information
Aratz authored Sep 10, 2024
2 parents 323b984 + 2fbb3a5 commit 1776530
Show file tree
Hide file tree
Showing 23 changed files with 246 additions and 16 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/nf-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Adapted from https://github.com/nf-core/demultiplex/blob/17cde5d8f22f5327beac9637e941e4775ada1b3f/.github/workflows/ci.yml
name: Run nf-test

on:
push:
pull_request:

env:
NXF_VER: 24.04.1
NXF_ANSI_LOG: false
NFT_VER: "0.9.0"
NFT_WORKDIR: "~"
NFT_DIFF: "pdiff"
NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2"

jobs:
test:
name: Run nf-tests and pipeline
runs-on: "ubuntu-24.04"
steps:
- name: Check out pipeline code
uses: actions/checkout@v4

- uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install pdiff to see diff between nf-test snapshots
run: |
python -m pip install --upgrade pip
pip install pdiff
- name: Install Nextflow
uses: nf-core/setup-nextflow@v2
with:
version: "${{ env.NXF_VER }}"

- uses: nf-core/setup-nf-test@v1
with:
version: ${{ env.NFT_VER }}

- name: Run Tests
run: |
nf-test test --ci
- name: Run pipeline with test data
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
work/

.nextflow.log*
.nextflow/

.nf-test.log*
.nf-test/
32 changes: 32 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Sisyphus-style checksums

This pipeline takes a runfolder and an include file and generates a checksum
file compatible with [Sisyphus](https://github.com/Molmed/sisyphus).

## Usage

```nextflow
nextflow run main.nf --input_folder <path_to_runfolder> --include_file <path_to_include_file> [--ignore_cache]
```

### Running on Uppmax
When running on Uppmax, use `-profile uppmax` and specify project with `--project <uppmax project>`

## Input
- `--input_folder <path_to_runfolder`: path to the runfolder for which the
checksums need to be generated
- `--include_file <path_to_include_file`: determines which files should appear
in the checksums (same format as `rsync`)
- `--ignore-cache` whether or not to use previously computed checksums. When
this flag is set, the pipeline will ignore previously computed checksum.
Otherwise, it will keep the checksums available in `MD5/checksums.md5` in the
input runfolder.

## Output

The pipeline will write the results at `MD5/checksums.md5` in the input runfolder.

## Running the tests
```
nf-test test
```
28 changes: 12 additions & 16 deletions bin/create_sis_style_checksums.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,27 @@ set -o errexit

export FOLDER="$1"
export INCLUDE_FILE="$2"
export OUTPUT_FILE="$3"
export IGNORE_CACHE="$4"
export FOLDER_NAME="$(basename "$FOLDER")"
export OUTPUT_FILE="checksums.md5"
export IGNORE_CACHE="$3"
export FOLDER_NAME=$(basename $FOLDER)
export OUTPATH="$FOLDER/MD5/$OUTPUT_FILE"
export TMPPATH="${OUTPATH}.tmp"

pushd "$FOLDER/.." > /dev/null
export TMPPATH="${OUTPUT_FILE}.tmp"

mkdir -p "$(dirname "${OUTPATH}")"
if [ "$(echo "$IGNORE_CACHE" |tr '[:upper:]' '[:lower:]')" = true ]
if [ "$(echo "$IGNORE_CACHE" |tr '[:upper:]' '[:lower:]')" = false ]
then
rm -f "${OUTPATH}"
cp $OUTPATH $OUTPUT_FILE
fi
touch "${OUTPATH}"
touch $OUTPUT_FILE

rsync \
-vrktp \
--list-only \
--relative \
--chmod=Dg+sx,ug+w,o-rwx \
--prune-empty-dirs \
--exclude="${OUTPATH}" \
--exclude="${TMPPATH}" \
--exclude="$OUTPUT_FILE" \
--exclude="$TMPPATH" \
--include-from="$INCLUDE_FILE" \
--exclude="*" \
"$(dirname "$FOLDER")/./${FOLDER_NAME}/" |\
Expand All @@ -42,15 +40,13 @@ comm \
-2 \
-3 \
- \
<(sed -re 's/^\S+\s+//' "$OUTPATH" |sort) |\
<(sed -re 's/^\S+\s+//' "$OUTPUT_FILE" |sort) |\
xargs \
-0 \
-d '\n' \
-r \
md5sum > "$TMPPATH"

cat <(grep -v '^$' "$OUTPATH") <(grep -v '^$' "$TMPPATH") > "${OUTPATH}.intermediate"
mv "${OUTPATH}.intermediate" "${OUTPATH}"
cat <(grep -v '^$' "$OUTPUT_FILE") <(grep -v '^$' "$TMPPATH") > "${OUTPUT_FILE}.intermediate"
mv "${OUTPUT_FILE}.intermediate" "${OUTPUT_FILE}"
rm -f "$TMPPATH"

popd > /dev/null
24 changes: 24 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
process CREATE_SIS_STYLE_CHECKSUMS {
publishDir "${params.input_folder}/MD5/", mode: 'copy', overwrite: true

input:
path input_folder
path include_file
val ignore_cache

output:
path "checksums.md5"

script:
"""
create_sis_style_checksums.sh $input_folder $include_file $ignore_cache
"""
}

workflow {
CREATE_SIS_STYLE_CHECKSUMS(
params.input_folder,
params.include_file,
params.ignore_cache,
)
}
25 changes: 25 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
params.ignore_cache = false

profiles {
uppmax {
process {
shell = ['/bin/bash', '-euo', 'pipefail']
errorStrategy = { task.exitStatus in [255] ? 'retry' : 'terminate' }
maxRetries = 2
scratch = true
executor = 'slurm'
clusterOptions = { "-A $params.project" }
cpus = 1
memory = '8G'
time = '3h'
}
}

test {
params {
input_folder = "${baseDir}/tests/test_resources/runfolder_nocache/"
include_file = "${baseDir}/tests/test_resources/include.rsync"
ignore_cache = false
}
}
}
8 changes: 8 additions & 0 deletions nf-test.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
config {

testsDir "tests"
workDir ".nf-test"
configFile "tests/nextflow.config"
profile ""

}
43 changes: 43 additions & 0 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
nextflow_process {
name "Test Process CREATE_SIS_STYLE_CHECKSUMS"
script "main.nf"
process "CREATE_SIS_STYLE_CHECKSUMS"

test("Generate checksums from scratch") {
when {
params {
input_folder = "${projectDir}/tests/test_resources/runfolder_nocache"
}
process {
"""
input[0] = "${projectDir}/tests/test_resources/runfolder_nocache"
input[1] = "${projectDir}/tests/test_resources/include.rsync"
input[2] = true
"""
}
}
then {
assert process.success
assert snapshot(process.out).match()
}
}

test("Generate checksums with cache") {
when {
params {
input_folder = "${projectDir}/tests/test_resources/runfolder_cache"
}
process {
"""
input[0] = "${projectDir}/tests/test_resources/runfolder_cache"
input[1] = "${projectDir}/tests/test_resources/include.rsync"
input[2] = false
"""
}
}
then {
assert process.success
assert snapshot(process.out).match()
}
}
}
30 changes: 30 additions & 0 deletions tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"Generate checksums from scratch": {
"content": [
{
"0": [
"checksums.md5:md5,75a433b1e14782f06d44c93142a2d3fb"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-09T13:40:49.058439"
},
"Generate checksums with cache": {
"content": [
{
"0": [
"checksums.md5:md5,d6a4e7c087ac3895562df860e16a9c10"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-09T13:40:53.342759"
}
}
5 changes: 5 additions & 0 deletions tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/*
========================================================================================
Nextflow config file for running tests
========================================================================================
*/
7 changes: 7 additions & 0 deletions tests/test_resources/include.rsync
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
+ */

+ file1
+ file2
+ file3

- MD5/
3 changes: 3 additions & 0 deletions tests/test_resources/runfolder_cache/MD5/checksums.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cd06fdede6fc5f34dc50aef355f01071 runfolder_cache/file1
124e298567318b923507fe84c94f0615 runfolder_cache/file2
e944799aedc034b248f8e2c0a6e3384d runfolder_cache/file3
Binary file added tests/test_resources/runfolder_cache/file1
Binary file not shown.
Binary file added tests/test_resources/runfolder_cache/file2
Binary file not shown.
Binary file added tests/test_resources/runfolder_cache/file3
Binary file not shown.
Binary file added tests/test_resources/runfolder_cache/file4
Binary file not shown.
Binary file added tests/test_resources/runfolder_cache/file5
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/test_resources/runfolder_nocache/MD5/checksums.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cd06fdede6fc5f34dc50aef355f01071 runfolder_nocache/file1
124e298567318b923507fe84c94f0615 runfolder_nocache/file2
Binary file added tests/test_resources/runfolder_nocache/file1
Binary file not shown.
Binary file added tests/test_resources/runfolder_nocache/file2
Binary file not shown.
Binary file added tests/test_resources/runfolder_nocache/file3
Binary file not shown.
Binary file added tests/test_resources/runfolder_nocache/file4
Binary file not shown.
Binary file added tests/test_resources/runfolder_nocache/file5
Binary file not shown.

0 comments on commit 1776530

Please sign in to comment.