-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_complete_wbbt_dictionary.pl
executable file
·110 lines (88 loc) · 4.92 KB
/
generate_complete_wbbt_dictionary.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/perl
# generate dictionary of genes to anatomy terms based on
# http://wormbase.caltech.edu:8080/wormbase/manual/geneenrichment/davidahypergeometrictest
# includes any anatomy term that has any annotation
use CGI;
use strict;
use LWP::Simple;
use JSON;
my $solr_url = 'http://wobr.caltech.edu:8082/solr/anatomy/';
my $json = JSON->new->allow_nonref;
# my $url = $solr_url . "select?qt=standard&fl=*&version=2.2&wt=json&indent=on&rows=1&q=id:%22" . $focusTermId . "%22&fq=document_category:%22ontology_class%22";
my $url = $solr_url . "select?qt=standard&indent=on&wt=json&version=2.2&fl=id&start=0&rows=0&q=document_category:bioentity&facet=true&facet.field=regulates_closure&facet.limit=-1&facet.mincount=1&facet.sort=count&fq=source:%22WB%22&fq=-qualifier:%22not%22";
my $focusTermId = 'WBbt:0005237';
# my $url = $solr_url . "select?qt=standard&fl=*&version=2.2&wt=json&indent=on&rows=1&q=id:%22" . $focusTermId . "%22&fq=document_category:%22ontology_class%22";
# my $url = $solr_url . 'select?qt=standard&indent=on&wt=json&version=2.2&fl=id&start=0&rows=10000000&q=document_category:bioentity&facet=true&facet.field=regulates_closure&facet.limit=-1&facet.mincount=1&facet.sort=count&fq=source:%22WB%22&fq=regulates_closure:%22' . $focusTermId . '%22';
# print "URL $url URL\n";
my $page_data = get $url;
my $perl_scalar = $json->decode( $page_data );
my %jsonHash = %$perl_scalar;
# my $reg_clos = $json->decode( $jsonHash{"facet_counts"}{"facet_fields"}{"regulates_closure"}[0] );
# my $reg_clos = $json->decode( $jsonHash{"response"}{"docs"}[0]{"topology_graph_json"} );
# my $reg_clos = $jsonHash{'response'}{'numFound'}; # get the main focusTermId gene count and store in %genesCount
my $arrRef = $jsonHash{"facet_counts"}{"facet_fields"}{"regulates_closure"} ;
my @array = @$arrRef;
my %wbbt;
while (@array) {
my $wbbt = shift @array;
my $count = shift @array;
$wbbt{$wbbt} = $count;
}
foreach my $wbbt (sort { $wbbt{$a} <=> $wbbt{$b} } keys %wbbt) {
next unless ($wbbt{$wbbt}); # terms removed while looping, so skip those already removed
# print qq($wbbt\t$wbbt{$wbbt}\n);
my $url = $solr_url . "select?qt=standard&fl=regulates_closure&version=2.2&wt=json&indent=on&rows=1&q=id:%22" . $wbbt . "%22&fq=document_category:%22ontology_class%22";
my $page_data = get $url;
my $perl_scalar = $json->decode( $page_data );
my %jsonHash = %$perl_scalar;
my $arrRef = $jsonHash{'response'}{'docs'}[0]{'regulates_closure'};
my @array = @$arrRef;
# foreach my $other (@array) {
# if ( ($other ne $wbbt) && ($wbbt{$other}) ) { delete $wbbt{$other}; } } # if other term is in list, remove from list
}
my %genes;
foreach my $wbbt (sort keys %wbbt) {
my $url = $solr_url . "select?qt=standard&indent=on&wt=json&version=2.2&fl=id&start=0&rows=10000&q=document_category:bioentity&fq=source:%22WB%22&fq=-qualifier:%22not%22&fq=regulates_closure:%22" . $wbbt . "%22";
my $page_data = get $url;
my $perl_scalar = $json->decode( $page_data );
my %jsonHash = %$perl_scalar;
my $arrRef = $jsonHash{'response'}{'docs'};
my @array = @$arrRef;
foreach my $hashRef (@array) {
my %hash = %$hashRef;
my $gene = $hash{'id'};
$gene =~ s/^WB://;
# print "$wbbt OTHER $hash{'id'}\n";
$genes{$gene}{$wbbt}++; } }
my %wbbtName;
my $url = $solr_url . "select?qt=standard&fl=id,annotation_class_label&version=2.2&wt=json&indent=on&rows=100000&q=id:*&fq=document_category:ontology_class&fq=-is_obsolete:true";
my $page_data = get $url;
my $perl_scalar = $json->decode( $page_data );
my %jsonHash = %$perl_scalar;
my $arrRef = $jsonHash{'response'}{'docs'};
my @array = @$arrRef;
foreach my $hashRef (@array) {
my %hash = %$hashRef;
my $id = $hash{'id'};
my $name = $hash{'annotation_class_label'};
$name =~ tr/,/comma/;
$wbbtName{$id} = $name; }
my @wbbts_header;
foreach my $wbbt (sort keys %wbbt) {
push @wbbts_header, qq($wbbtName{$wbbt}($wbbt));
}
my $wbbts_header = join",", @wbbts_header;
# my $wbbts_header = join",", sort keys %wbbt;
print qq(wbid,$wbbts_header\n);
foreach my $gene (sort keys %genes) {
my @out;
foreach my $wbbt (sort keys %wbbt) {
if ($genes{$gene}{$wbbt}) { push @out, '1'; } else { push @out, '0'; } }
my $out = join",", @out;
print qq($gene,$out\n);
} # foreach my $gene (sort keys %genes)
# print qq($reg_clos\n);
__END__
http://131.215.12.204:8080/solr/anatomy/select?qt=standard&indent=on&wt=json&version=2.2&fl=id&start=0&rows=0&q=document_category:bioentity&facet=true&facet.field=regulates_closure&facet.limit=-1&facet.mincount=100&facet.sort=count&fq=source:%22WB%22&fq=-qualifier:%22not%22
http://131.215.12.204:8080/solr/anatomy/select?qt=standard&fl=regulates_closure&version=2.2&wt=json&indent=on&rows=1&q=id:%22WBbt:0005373%22&fq=document_category:%22ontology_class%22
http://131.215.12.204:8080/solr/anatomy/select?qt=standard&indent=on&wt=json&version=2.2&fl=id&start=0&rows=10000&q=document_category:bioentity&fq=source:%22WB%22&fq=-qualifier:%22not%22&fq=regulates_closure:%22WBbt:0005373%22