-
Notifications
You must be signed in to change notification settings - Fork 0
/
pcrdFilter.pl
108 lines (95 loc) · 2.02 KB
/
pcrdFilter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/perl
#writed by zhao 2010/5/20
#filter the PCR duplication reads
use strict;
use warnings;
#get input information
use Getopt::Long;
my %opts ;
GetOptions(\%opts,"a:s","b:s","w:i","o:s","help");
if((!$opts{a}) or (!$opts{b}) or (!$opts{w}) or (!$opts{o}) ){
Usage(),exit(1);
}
my $fq1=$opts{a};
my $fq2=$opts{b};
my $alen=$opts{w};
my $outname=$opts{o};
my $seq;
my %stra;
my %strb;
my %qu;
my $rqu;
my $outstr1='';
my $outstr2='';
my $temp1;
my $temp2;
open(IN1,'<',$fq1) or die("not open the $fq1 file\n");
open(IN2,'<',$fq2) or die("not open the $fq2 file\n");
while($_=<IN1>){
$temp1=$_;
$_=<IN1>;
$temp1.=$_;
$seq=substr($_,0,$alen);
$_=<IN2>;
$temp2=$_;
$_=<IN2>;
$temp2.=$_;
$seq.='-'.substr($_,0,$alen);
$_=<IN1>;
$temp1.=$_;
$_=<IN1>;
$temp1.=$_;
$rqu=qu($_);
$_=<IN2>;
$temp2.=$_;
$_=<IN2>;
$temp2.=$_;
$rqu+=qu($_);
#print "$temp1$rqu\n";
if(!$qu{$seq}){
$qu{$seq}=$rqu;
$stra{$seq}=$temp1;
$strb{$seq}=$temp2;
#print "$temp1$rqu\n";
}else{
if($rqu>=$qu{$seq}){
$qu{$seq}=$rqu;
$stra{$seq}=$temp1;
$strb{$seq}=$temp2;
# print "$seq\n".$stra{$seq}.$strb{$seq}.$qu{$seq}."\n";
}
}
# print "$seq\n".$stra{$seq}.$strb{$seq}.$qu{$seq}."\n";
}
close(IN1);
close(IN2);
open(OUT1,'>',$outname."_1.fq");
open(OUT2,'>',$outname."_2.fq");
foreach $seq (keys(%stra)){
#print "$seq\n".$stra{$seq}. $strb{$seq};
print OUT1 $stra{$seq};
print OUT2 $strb{$seq};
}
close(OUT1);
close(OUT2);
sub qu{
my ($str)=@_;
my $qusum=0;
my $i;
for($i=0;$i<length($str);$i++){
$qusum+=ord(substr($str,$i,1))-64;
}
return $qusum;
}
sub Usage{
print qq(
Usage:
perl $0 [options] ...
Options:
-a <str> the 1th fastq file;
-b <int> the 2nd fastq file;
-w <int> the alignmengt length of reads;
-o <str> the outfile name;
-help < - > show this help information
\n);
}