-
Notifications
You must be signed in to change notification settings - Fork 2
/
kmer.chpl
52 lines (40 loc) · 1.56 KB
/
kmer.chpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
// kmer.chpl
/*
usage on puma/ocelote:
chpl kmer.chpl
./kmer
usage on laptop with podman (or docker):
podman run --rm -v "$PWD":/myapp -w /myapp chapel/chapel chpl kmer.chpl
podman run --rm -v "$PWD":/myapp -w /myapp chapel/chapel ./kmer
# can change the infilename on command line because it is a configuration const
podman run --rm -v "$PWD":/myapp -w /myapp chapel/chapel ./kmer --infilename="kmer.chpl"
For docker usage, see https://chapel-lang.org/install-docker.html
Original version of kmer counting algorithm provided by
Aryamaan Jain (github strikeraryu) on Chapel Discourse April 7, 2021.
kmer_large_input.txt is from
https://www.ncbi.nlm.nih.gov/nuccore/NC_001422.1?report=fasta
*/
use Map, IO;
// to have it read a different input file, run as follows:
// ./kmer --infilename="anotherFileName"
config const infilename = "kmer_large_input.txt";
// set k to something different on the commandline with
// ./kmer --k=7
config const k = 4;
// read in the input sequence from the file infile and strip out newlines
var sequence, line : string;
var f = open(infilename, iomode.r);
var infile = f.reader();
while infile.readLine(line) {
sequence += line.strip();
}
// declare a dictionary/map to store the count per kmer
var nkmerCounts : map(string, int);
// count up the number of times each kmer occurs
for ind in 0..<(sequence.size-k) {
nkmerCounts[sequence[ind..#k]] += 1;
}
writeln("Number of unique k-mers in ", infilename, " is ", nkmerCounts.size);
writeln();
writeln("nkmerCounts = ");
writeln(nkmerCounts);