-
Notifications
You must be signed in to change notification settings - Fork 0
/
merprocess.h
113 lines (83 loc) · 2.26 KB
/
merprocess.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include <string>
#include <iostream>
#include <vector>
#include <unordered_map>
#include <cstring>
#include "BloomFilter.h"
#define BUFFER 1000
#define KMER_SIZE 20
#define NUMBEROFBITS 718879379
#define NUMBEROFHASHES 17
#define MAX_FREQ_KMER_COUNT 25
using namespace std;
static string ReverseComplement(string str) {
string newStr = str;
for (size_t i = 0; i < str.length(); i++) {
if (str[i] == 'A') {
newStr.at(str.length() - i - 1) = 'T';
}
else if (str[i] == 'T') {
newStr.at(str.length() - i - 1) = 'A';
}
else if (str[i] == 'G') {
newStr.at(str.length() - i - 1) = 'C';
}
else if (str[i] == 'C') {
newStr.at(str.length() - i - 1) = 'G';
}
}
return newStr;
}
static string LexicographicalComprison(string str_1, string str_2) {
return str_1.compare(str_2) >= 0 ? str_2 : str_1;
}
static void ProcessInfo(string seq,
unordered_map<string,
size_t> &hashMap,
BloomFilter &bloom_filter,
size_t MerSize) {
for (size_t i = 0; i < seq.length() - MerSize + 1; i++) {
string substr = seq.substr(i, MerSize);
substr = LexicographicalComprison(substr, ReverseComplement(substr));
if (bloom_filter.possiblyContains(substr.c_str(), substr.length())) {
if (!hashMap.count(substr)) {
hashMap[substr] = 0;
}
}
else {
bloom_filter.add(substr.c_str(), substr.length());
}
}
}
static void CountReadKmers(string seq,
unordered_map<string,
size_t> &hashMap,
BloomFilter &bloom_filter,
size_t MerSize) {
for (size_t i = 0; i < seq.length() - MerSize + 1; i++) {
string substr = seq.substr(i, MerSize);
char *substrChar = new char[substr.size() + 1];
strcpy(substrChar, LexicographicalComprison(substr, ReverseComplement(substr)).c_str());
if (hashMap.count(substrChar)) {
hashMap[substrChar]++;
}
}
}
static void FindMaxFrequencyKMersInHash(vector<string> &mostFrequentMers,
vector<size_t> &frequencies,
unordered_map<string, size_t> &hashMap,
size_t MostFreqMerCount) {
for (size_t i = 0; i < MostFreqMerCount; i++) {
size_t max = 0;
string maxFreqMer;
for (auto keyVal : hashMap) {
if (keyVal.second > max && keyVal.second > 1) {
max = keyVal.second;
maxFreqMer = keyVal.first;
}
}
hashMap[maxFreqMer] = 0;
mostFrequentMers.push_back(maxFreqMer);
frequencies.push_back(max);
}
}