This repository has been archived by the owner on Dec 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathkpi_local.nf
executable file
·142 lines (129 loc) · 3.8 KB
/
kpi_local.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env nextflow
/*
* Given 25mer hit counts, interpret KIR genotypes and
* haplotypes.
*
* Input files end with kmcNameSuffix (default '_hit.txt') and are
* located in kmcDir.
*
* Output files end with '_prediction.txt' in resultDir.
*
* @author Dave Roe
* @todo remove the '_hits.txt' files.
*/
nfForks = 8 // run this many input text files in parallel
// input: kmc probe txt files
kmcNameSuffix = '_hits.txt' // extension on the file name
params.input = '/Users/daver/gonl/results_v7/hits_test/'
bin1Suffix = 'bin1'
params.output = '/Users/daver/gonl/results_v7/predictions_test/'
probeFile = '/Users/daver/git/kpi/input/markers.fasta'
haps = '/Users/daver/git/kpi/input/haps.txt'
params.allOut = "0"
// things that probably won't change per run
kmcDir = params.input
resultDir = params.output
allOut = params.allOut
if(!kmcDir.trim().endsWith("/")) {
kmcDir += "/"
}
if(!resultDir.trim().endsWith("/")) {
resultDir += "/"
}
kmcPath = kmcDir + '*' + kmcNameSuffix
kmcs1 = Channel.fromPath(kmcPath).ifEmpty { error "cannot find any ${kmcNameSuffix} files in ${kmcPath}" }.map { path -> tuple(sample(path), path) }
kmcs2 = Channel.fromPath(kmcPath).ifEmpty { error "cannot find any ${kmcNameSuffix} files in ${kmcPath}" }.map { path -> tuple(sample(path), path) }
/*
* kmc2locusBin
*
* Given a kmc output file, bin the hit reads into separate files based on locus.
*
* e.g., ./kmc2Locus2.groovy -j 100a.txt -p kmers.txt -e bin1 -o output
*
* Input files: e.g., 100a.fasta
* Output files have an extension of 'bin1'.
*/
process kmc2locusBin {
// publishDir resultDir, mode: 'copy', overwrite: true
// maxForks nfForks
input:
set s, file(kmc) from kmcs1
output:
set s, file{"${s}*.bin1"} into bin1Fastqs
script:
// e.g., gonl-100a.fasta
// todo: document this
String dataset
String id
id = kmc.name.replaceFirst(kmcNameSuffix, "")
"""
kmc2LocusAvg2.groovy -j ${kmc} -p ${probeFile} -e ${bin1Suffix} -i ${id} -o .
if ls *.bin1 1> /dev/null 2>&1; then
: # noop
else
echo "snp: " > "${id}_prediction.txt"
touch "${id}_uninterpretable.bin1"
fi
"""
} // kmc2locusBin
/*
* locusBin2ExtendedLocusBin
*
* 1) Makes haplotype predictions from PA probes.
*
*
* @todo document
*/
process locusBin2ExtendedLocusBin {
publishDir resultDir, mode: 'copy', overwrite: true
input:
set s, file(b1List) from bin1Fastqs
output:
set s, file{"*_prediction.txt"} into predictionChannel
script:
"""
FILES="${s}*.bin1"
outFile="${s}"
outFile+="_prediction.txt"
fileList=""
id=""
ext="*bin1*"
for bFile in \$FILES; do
if [ -s \$bFile ]; then
if [[ \$bFile == \$ext ]]; then
id=\$(basename "\$bFile")
# '%' Means start to remove after the next character;
# todo: change this to kmcNameSuffix
id="\${id%%_*}"
if [ "\$id" == "" ]; then
id=\$(basename "\$bFile")
fi
# echo \$bFile
if [ "\$fileList" == "" ]; then
:
else
fileList+=","
fi
fileList+=\$bFile
fi
fi
done
pa2Haps.groovy -a ${allOut} -h ${haps} -q "\$fileList" -o "\$outFile" -a ${params.allOut}
"""
} // locusBin2ExtendedLocusBin
// get the per-sample name
def sample(Path path) {
def name = path.getFileName().toString()
int start = Math.max(0, name.lastIndexOf('/'))
int end = name.indexOf(kmcNameSuffix)
if ( end <= 0 ) {
throw new Exception( "Expected file " + name + " to end in '" + kmcNameSuffix + "'" );
}
return name.substring(start, end)
} // sample
workflow.onComplete {
println "DONE: ${ workflow.success ? 'OK' : 'FAILED' }"
}
workflow.onError {
println "ERROR: ${workflow.errorReport.toString()}"
}