-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathparse_alignments.h
159 lines (133 loc) · 6.06 KB
/
parse_alignments.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// ======================================================================
// PureCLIP: capturing target-specific protein-RNA interaction footprints
// ======================================================================
// Copyright (C) 2017 Sabrina Krakau, Max Planck Institute for Molecular
// Genetics
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// =======================================================================
// Author: Sabrina Krakau <[email protected]>
// =======================================================================
#ifndef APPS_HMMS_PARSE_ALIGNMENTS_H_
#define APPS_HMMS_PARSE_ALIGNMENTS_H_
#include <iostream>
#include <fstream>
using namespace seqan;
template <typename TContigObservations, typename TBamIn, typename TBai, typename TOptions>
bool parse_bamRegion(TContigObservations &contigObservationsF, TContigObservations &contigObservationsR, TBamIn &inFile, TBai &baiIndex, int const& rID, TOptions &options)
{
if (options.verbosity >= 2)
std::cout << "Parse BAM region " << std::endl;
int jump_beginPos = 0;
// Jump the BGZF stream to this position.
bool hasAlignments = false;
if (!jumpToRegion(inFile, hasAlignments, rID, jump_beginPos, length(contigObservationsF.truncCounts)-1, baiIndex))
{
std::cerr << "ERROR: Could not jump to " << jump_beginPos << ":" << (length(contigObservationsF.truncCounts)-1) << "\n";
return false;
}
if (!hasAlignments)
{
if (options.verbosity >= 2)
std::cout << "WARNING: no alignments here " << jump_beginPos << ":" << (length(contigObservationsF.truncCounts)-1) << "\n";
return false;
}
// Seek linearly to the selected position
BamAlignmentRecord bamRecord;
BamAlignmentRecord newBamRecord;
SEQAN_OMP_PRAGMA(critical)
while (!atEnd(inFile))
{
readRecord(bamRecord, inFile);
// If we are on the next reference
if (bamRecord.rID == -1 || bamRecord.rID > rID)
break;
// check if read corresponds to 3' cDNA end corresponding to user parameter
if (options.selectRead == 1 && !hasFlagFirst(bamRecord))
continue;
else if (options.selectRead == 2 && !hasFlagLast(bamRecord))
continue;
if (!hasFlagRC(bamRecord)) // Forward
{
if (contigObservationsF.truncCounts[bamRecord.beginPos] < options.maxTruncCount2) // uint8, discard interval if > anyway ...
++contigObservationsF.truncCounts[bamRecord.beginPos];
}
else // Reverse
{
if (contigObservationsR.truncCounts[bamRecord.beginPos + getAlignmentLengthInRef(bamRecord) - 1] < options.maxTruncCount2)
++contigObservationsR.truncCounts[bamRecord.beginPos + getAlignmentLengthInRef(bamRecord) - 1];
}
}
return true;
}
template <typename TTruncCounts, typename TBamIn, typename TBai, typename TOptions>
bool parse_bamRegion(TTruncCounts &truncCounts, TBamIn &inFile, TBai &baiIndex, int const& rID, unsigned beginPos, unsigned endPos, bool isForward, TOptions &options)
{
// Jump the BGZF stream to this position.
bool hasAlignments = false;
int jump_beginPos;
if (isForward)
jump_beginPos = beginPos;
else
jump_beginPos = beginPos - 100;
if (!jumpToRegion(inFile, hasAlignments, rID, jump_beginPos, (endPos+1000), baiIndex))
{
std::cerr << "ERROR: Could not jump to " << beginPos << ":" << endPos << "\n";
return false;
}
/*if (!hasAlignments)
{
std::cout << "WARNING: no input alignments here " << beginPos << ":" << endPos << " , " << rID << "\n";
return false;
}*/
// Seek linearly to the selected position
BamAlignmentRecord bamRecord;
BamAlignmentRecord newBamRecord;
SEQAN_OMP_PRAGMA(critical)
while (!atEnd(inFile))
{
readRecord(bamRecord, inFile);
// If we are on the next reference
if (bamRecord.rID == -1 || bamRecord.rID > rID)
break;
if (bamRecord.beginPos >= (endPos + 100))
break;
// check if read corresponds to 3' cDNA end corresponding to user parameter
if (options.selectRead == 1 && !hasFlagFirst(bamRecord))
continue;
else if (options.selectRead == 2 && !hasFlagLast(bamRecord))
continue;
if (isForward && !hasFlagRC(bamRecord)) // Forward
{
if (bamRecord.beginPos >= beginPos && // already there ?
bamRecord.beginPos < endPos && // before end of interval ?
truncCounts[bamRecord.beginPos - beginPos] < options.maxTruncCount2)
{
++truncCounts[bamRecord.beginPos - beginPos];
}
}
else if (!isForward && hasFlagRC(bamRecord)) // Reverse
{
if ((bamRecord.beginPos + getAlignmentLengthInRef(bamRecord) - 1) >= beginPos && // already there ?
(bamRecord.beginPos + getAlignmentLengthInRef(bamRecord) - 1) < endPos && // before end of interval ?
truncCounts[bamRecord.beginPos - beginPos + getAlignmentLengthInRef(bamRecord) - 1] < options.maxTruncCount2)
{
++truncCounts[bamRecord.beginPos - beginPos + getAlignmentLengthInRef(bamRecord) - 1];
}
}
}
return true;
}
#endif