-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgtfcount
executable file
·57 lines (54 loc) · 1.84 KB
/
gtfcount
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash
if [[ "x"$1 == "x-h" ]]; then
echo "Usage: gtfcount [-g][-c][-l] <gtfile>"
echo " Show the count of distinct transcript names.\n Options:"
echo " -l : list transcripts (unique transcript_ids)"
echo " -g : report distinct gene names count (gene_id attribute)"
echo " -gl : list distinct gene names (gene_id attribute)"
echo " -C : list coding transcripts (=having CDS features)"
echo " -r : report transcript counts per reference sequence"
echo " -rg : report transcript counts per reference sequence"
echo " -f : report transcript counts per gene_id"
echo " -T : list StringTie transcripts and their TPM"
exit 1
fi
a=""
if [[ $1 =~ ^-.+ ]]; then
a="${1:1}" #remove first character (the dash)
shift
fi
if [[ -z "$a" ]]; then
perl -ne 'if(m/transcript_id "([^"]+)/){ $_=$1;chomp;print "$_\n" if (++$n{$_})==1}' $1 | wc -l
exit 0
fi
case $a in
l)
perl -ne 'print "$1\n" if m/transcript_id "([^"]+)/ && (++$n{$1})==1' $1
;;
T)
perl -ne 'next unless m/\ttranscript\t/;($t,$m)=((m/transcript_id\s+"([^"]+)/),(m/TPM\s+"([^"]+)/));print "$t\t$m\n"' $1
;;
g)
perl -ne 'print "$1\n" if m/gene_id "([^"]+)/ && (++$n{$1})==1' $1 | wc -l
;;
gl)
perl -ne 'print "$1\n" if m/gene_id "([^"]+)/ && (++$n{$1})==1' $1
;;
C)
perl -aF'\t' -ne 'if (($F[2] eq "CDS") && $F[8]=~m/\btranscript_id "([^"]+)/){$_=$1;chomp;print "$_\n" if(++$n{$_}==1) }' $1
;;
r)
perl -aF'\t' -ne 'print "$F[0]\t$1\n" if $F[8]=~m/transcript_id\s+"([^"]+)/' $1 | sort -u | cut -f1 | uniq -c
;;
rg)
perl -aF'\t' -ne 'print "$F[0]\t$1\n" if $F[8]=~m/gene_id\s+"([^"]+)/' $1 | sort -u | cut -f1 | uniq -c
;;
f)
perl -ne '($g)=(m/gene_id\s+"([^"]+)/); ($t)=(m/transcript_id\s+"([^"]+)/); print "$g\t$t\n" if $g && $t' $1 | sort -u | cut -f1 | uniq -c
;;
*)
show_usage
echo "Error: -$a option not recognized!"
exit 1
;;
esac