Skip to content

Commit

Permalink
Merge in from dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Ksenia Krasheninnikova committed Aug 15, 2024
1 parent 9a62d24 commit 2462a90
Show file tree
Hide file tree
Showing 19 changed files with 38 additions and 313 deletions.
6 changes: 1 addition & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ jobs:
run: |
nextflow secrets set NCBI_API_KEY ${{ secrets.NCBI_API_KEY }}
- name: Download test data
run: |
curl https://tolit.cog.sanger.ac.uk/test-data/resources/genomeassembly/genomeassembly_test_data.tar.gz | tar xzf -
- name: Setup apptainer
uses: eWaterCycle/setup-apptainer@main

Expand All @@ -62,4 +58,4 @@ jobs:
- name: Run pipeline with test data
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_github,singularity --outdir ./results
nextflow run ${GITHUB_WORKSPACE} -profile test,singularity --outdir ./results
13 changes: 8 additions & 5 deletions assets/test.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
dataset:
id: baUndUnlc1
illumina_10X:
reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/
reads:
- https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R1_001.fastq.gz
- https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R2_001.fastq.gz
- https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_I1_001.fastq.gz
pacbio:
reads:
- reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/test/test/HiFi.reads.BIG.fasta
- reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta
HiC:
reads:
- reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram
- reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram
hic_motif: GATC,GANTC,CTNAG,TTAA
hic_aligner: bwamem2
busco:
Expand All @@ -16,6 +19,6 @@ mito:
species: Caradrina clavipalpis
min_length: 15000
code: 5
fam: /lustre/scratch124/tol/projects/darwin/users/cz3/organelle_asm/hmm_db/insecta_mito.fam
fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/insecta_mito.fam
plastid:
fam: /lustre/scratch124/tol/projects/darwin/users/cz3/organelle_asm/hmm_db/acrogymnospermae_pltd.fam
fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/acrogymnospermae_pltd.fam
19 changes: 0 additions & 19 deletions assets/test_github.yaml

This file was deleted.

5 changes: 4 additions & 1 deletion assets/test_gsMetZobe1.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
dataset:
id: gsMetZobe1
illumina_10X:
reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/
reads:
- /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/gsMetZobe1_S6_L008_R1_001.fastq.gz
- /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/gsMetZobe1_S6_L008_R2_001.fastq.gz
- /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/gsMetZobe1_S6_L008_I1_001.fastq.gz
pacbio:
reads:
- reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/pacbio/m64125_200823_145825.ccs.bc1019_BAK8B_OA--bc1019_BAK8B_OA.filtered.fasta.gz
Expand Down
7 changes: 4 additions & 3 deletions bin/generate_cram_csv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ for cram in "$@"; do
rgline=$(samtools view -H $cram|grep "RG"|sed 's/\t/\\t/g'|sed "s/'//g")

crampath=$(readlink -f ${cram})
craipath=$(readlink -f ${cram}.crai)

ncontainers=$(zcat ${crampath}.crai|wc -l)
ncontainers=$(zcat ${craipath} | wc -l)
base=$(basename $cram .cram)

from=0
Expand All @@ -22,15 +23,15 @@ for cram in "$@"; do

while [ $to -lt $ncontainers ]
do
echo $crampath,${crampath}.crai,${from},${to},${base},${chunkn},${rgline}
echo $crampath,${craipath},${from},${to},${base},${chunkn},${rgline}
from=$((to+1))
((to+=10000))
((chunkn++))
done

if [ $from -le $ncontainers ]
then
echo $crampath,${crampath}.crai,${from},${ncontainers},${base},${chunkn},${rgline}
echo $crampath,${craipath},${from},${ncontainers},${base},${chunkn},${rgline}
((chunkn++))
fi
done
Expand Down
35 changes: 6 additions & 29 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,9 @@ process {
// Set up of the polishing pipeline
if (params.polishing_on) {
withName: LONGRANGER_MKREF {
if(System.getenv('GITHUB_ACTION') != null ) {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" },
mode: params.publish_dir_mode,
Expand All @@ -382,6 +385,9 @@ process {
withName: LONGRANGER_ALIGN {
// Keep in sync with `longranger_lsf_sanger.config`
ext.args = "--disable-ui --nopreflight"
if(System.getenv('GITHUB_ACTION') != null ) {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -843,33 +849,4 @@ profiles {
}
}
}

test_github {
process {
// Set up of the scaffolding pipeline
withName: 'YAHS' {
// Skip the initial assembly error correction step
ext.args = '-r 1000,2000,5000'
}

withName: '.*HIFIASM.*' {
// Skip bloom filter
ext.args = '--primary -f0'
}

withName: '.*OATK' {
// Set kmer size and minimal coverage
ext.args = "-k1001 -c5 -Ttmp"
}

if (params.polishing_on) {
withName: LONGRANGER_MKREF {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
withName: LONGRANGER_ALIGN {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
}
}
}
}
7 changes: 3 additions & 4 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@ params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
// Match resource limits with the ubuntu2204-4c runner
max_cpus = 4
max_memory = '15.GB'
max_time = '6.h'

// Input data
input = "${projectDir}/assets/test.yaml"
bed_chunks_polishing = 2
organelles_on = true
polishing_on = false
hifiasm_hic_on = true
}
1 change: 0 additions & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,5 @@ params {

// Input data for full size test
input = "${projectDir}/assets/test_gsMetZobe1.yaml"
polishing_on = true
hifiasm_hic_on = true
}
28 changes: 0 additions & 28 deletions conf/test_github.config

This file was deleted.

9 changes: 2 additions & 7 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,12 @@ mito:
The pipeline can be tested locally using a provided small test dataset:
```
cd ${GENOMEASSEMBLY_TEST_DATA}
curl https://darwin.cog.sanger.ac.uk/genomeassembly_test_data.tar.gz | tar xzf -

git clone [email protected]:sanger-tol/genomeassembly.git
cd genomeassembly/
sed -i "s|/home/runner/work/genomeassembly/genomeassembly|${GENOMEASSEMBLY_TEST_DATA}|" assets/test_github.yaml
nextflow run main.nf -profile test_github,singularity --outdir ${OUTDIR} {OTHER ARGUMENTS}
nextflow run main.nf -profile test,singularity --outdir ${OUTDIR} {OTHER ARGUMENTS}
```

These command line steps will download and decompress the test data first, then download the pipeline and modify YAML so that it matches dataset location in your file system.
The last command line runs the test.
These command line steps will download the pipeline and run the test.

You should now be able to run the pipeline as you see fit.

Expand Down
4 changes: 2 additions & 2 deletions modules/local/generate_cram_csv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ process GENERATE_CRAM_CSV {
'biocontainers/samtools:1.17--h00cdaf9_0' }"

input:
tuple val(meta), path(crampaths, stageAs: "?/*")
tuple val(meta), path(crampaths, stageAs: "?/*"), path(craipaths, stageAs: "?/*")


output:
Expand All @@ -23,7 +23,7 @@ process GENERATE_CRAM_CSV {
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
generate_cram_csv.sh $crampaths >> ${prefix}_cram.csv
generate_cram_csv.sh $crampaths > ${prefix}_cram.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 2 additions & 2 deletions modules/local/longranger/align/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process LONGRANGER_ALIGN {

input:
tuple val(meta), path(reference)
path(fastqs)
path(fastqs, stageAs: "10X_inputs/*")

output:
tuple val(meta), path("${meta.id}/outs/possorted_bam.bam"), emit: bam
Expand All @@ -26,7 +26,7 @@ process LONGRANGER_ALIGN {
def args = task.ext.args ?: ''
def sample = "${meta.id}"
"""
longranger align --id=$sample --fastqs=$fastqs \
longranger align --id=$sample --fastqs=10X_inputs \
--sample=$sample --reference=$reference \
${args}
Expand Down

This file was deleted.

107 changes: 0 additions & 107 deletions modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/logs/busco.log

This file was deleted.

Loading

0 comments on commit 2462a90

Please sign in to comment.