Skip to content

Commit

Permalink
ZFIN-9282: Phenotype-Statistics-Report_m (ZFIN#1151)
Browse files Browse the repository at this point in the history
* ZFIN-9282:

- Use environment variables instead of <!--|...|-->
- Fix join logic:
Previously, the sql logic for "Number of Genes with TALEN phenotype in a Tg background (TALEN used transiently)" was failing to include a join condition for joining fish_str and fish tables resulting in a cross join. I refactored it to explicitly use join operations to make the logic easier to read.

* ZFIN-9282:

- Update db connection strings to get the dbhost from the environment
  • Loading branch information
rtaylorzfin authored Sep 13, 2024
1 parent 8303b21 commit 9e5ae88
Show file tree
Hide file tree
Showing 12 changed files with 162 additions and 113 deletions.
8 changes: 0 additions & 8 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,6 @@ def whitelistTemplateFiles = [
'**/check_undefined_environment.pl',
'**/pushDataToPublic.sh',
'**/backupBlastDbsAndRsyncAlmostBlastDbs.sh',
'**/generatePostMergeSQLs.pl', //server_apps/DB_maintenance/merge
'**/generateSQLsForMergingRecords.pl',
'**/merge_markers_cmdline.pl',
'**/getAllRelatedData.pl',
'**/compareTables.groovy', //server_apps/DB_maintenance/postgres
'**/dailyExtentCheck.sh', //server_apps/DB_maintenance/extentMonitoring
'**/dailyQueryCostCheck.sh', //server_apps/DB_maintenance/queryMonitoring
Expand All @@ -327,13 +323,9 @@ def whitelistTemplateFiles = [
'**/inc-redirect', //server_apps/apache
'**/crontab.production', //server_apps/cron
'**/elsevier_report.pl', //server_apps/Reports
'**/reportFeatureData.pl',
'**/reportPubsForGeneAndFeature.pl',
'**/reportZfinGenesMissingEnsdarts.pl',
'**/Count-phenotype.pl',
'**/runStats.sh', //server_apps/Reports/AnnualStats
'**/betterFish.sql', //server_apps/Reports/BetterFish
'**/FinCount.pl', //server_apps/Reports/PATO
'**/runAverageTimeInBinsCumulative.sh', //server_apps/Reports/PubTracking
'**/runLongestBinResidents.sh',
'**/runPaperlessPubTrackingDailyIndexedMetrics.sh',
Expand Down
17 changes: 12 additions & 5 deletions server_apps/DB_maintenance/merge/generatePostMergeSQLs.pl
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,27 @@

use strict;
use DBI;
use FindBin;
use lib "$FindBin::Bin/../../perl_lib/";
use ZFINPerlModules qw(assertEnvironment);

## check commandline parameters
die "Usage: generatePostMergeSQLs.pl ZDBID1 ZDBID2\n" if @ARGV != 2;

my $recordToBeDeleted = $ARGV[0];
my $recordToBeMergedInto = $ARGV[1];
assertEnvironment('PGHOST', 'DB_NAME');

my $dbname = "<!--|DB_NAME|-->";
my $dbhost = $ENV{'PGHOST'};
my $dbname = $ENV{'DB_NAME'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=localhost", $username, $password)
or die "\n\nCannot connect to PostgreSQL database: $DBI::errstr\n\n";
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=$dbhost", $username, $password)
or die "Cannot connect to Informix database: $DBI::errstr\n";

my $recordToBeDeleted = $ARGV[0];
my $recordToBeMergedInto = $ARGV[1];


my $type1;
if ($recordToBeDeleted =~ m/^ZDB\-([A-Z]+)\-\d{6}\-\d+$/) {
Expand Down
16 changes: 11 additions & 5 deletions server_apps/DB_maintenance/merge/generateSQLsForMergingRecords.pl
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,26 @@

use strict;
use DBI;
use FindBin;
use lib "$FindBin::Bin/../../perl_lib/";
use ZFINPerlModules qw(assertEnvironment);

## check commandline parameters
die "Usage: generateSQLsForMergingPubs.pl ZDBID1 ZDBID2\n" if @ARGV != 2;

my $recordToBeDeleted = $ARGV[0];
my $recordToBeMergedInto = $ARGV[1];
assertEnvironment('PGHOST', 'DB_NAME');

my $dbname = "<!--|DB_NAME|-->";
my $dbname = $ENV{'DB_NAME'};
my $dbhost = $ENV{'PGHOST'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=localhost", $username, $password)
or die "\n\nCannot connect to PostgreSQL database: $DBI::errstr\n\n";
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=$dbhost", $username, $password)
or die "Cannot connect to Informix database: $DBI::errstr\n";

my $recordToBeDeleted = $ARGV[0];
my $recordToBeMergedInto = $ARGV[1];

my $type1;
if ($recordToBeDeleted =~ m/^ZDB\-([A-Z]+)\-\d{6}\-\d+$/) {
Expand Down
20 changes: 13 additions & 7 deletions server_apps/DB_maintenance/merge/getAllRelatedData.pl
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,24 @@
use strict;
use DBI;

## check commandline parameters
die "Usage: getAllRelatedData.pl ZDBID\n" if @ARGV != 1;
use FindBin;
use lib "$FindBin::Bin/../../perl_lib/";
use ZFINPerlModules qw(assertEnvironment);
assertEnvironment('PGHOST', 'DB_NAME');

my $record = $ARGV[0];

my $dbname = "<!--|DB_NAME|-->";
my $dbname = $ENV{'DB_NAME'};
my $dbhost = $ENV{'PGHOST'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=localhost", $username, $password)
or die "\n\nCannot connect to PostgreSQL database: $DBI::errstr\n\n";
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=$dbhost", $username, $password)
or die "Cannot connect to Informix database: $DBI::errstr\n";

## check commandline parameters
die "Usage: getAllRelatedData.pl ZDBID\n" if @ARGV != 1;

my $record = $ARGV[0];

my $type;
if ($record =~ m/^ZDB\-([A-Z]+)\-\d{6}\-\d+$/) {
Expand Down
16 changes: 11 additions & 5 deletions server_apps/DB_maintenance/merge/merge_markers_cmdline.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,28 @@

use strict;
use DBI;
use FindBin;
use lib "$FindBin::Bin/../../perl_lib/";
use ZFINPerlModules qw(assertEnvironment);

## merge_markers_cmdline.pl Marker_ZDB_ID1 Marker_ZDB_ID2

## check commandline parameters
die "Usage: merge_markers_cmdline.pl Marker_ZDB_ID1 Marker_ZDB_ID2\n" if @ARGV != 2;

my $recordToBeDeleted = $ARGV[0];
my $recordToBeMergedInto = $ARGV[1];
assertEnvironment('PGHOST', 'DB_NAME');

my $dbname = "<!--|DB_NAME|-->";
my $dbname = $ENV{'DB_NAME'};
my $dbhost = $ENV{'PGHOST'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=localhost", $username, $password)
or die "\n\nCannot connect to PostgreSQL database: $DBI::errstr\n\n";
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=$dbhost", $username, $password)
or die "Cannot connect to Informix database: $DBI::errstr\n";

my $recordToBeDeleted = $ARGV[0];
my $recordToBeMergedInto = $ARGV[1];

my $type1;
if ($recordToBeDeleted =~ m/^ZDB\-([A-Z]+)\-\d{6}\-\d+$/) {
Expand Down
19 changes: 14 additions & 5 deletions server_apps/DB_maintenance/pub_check_and_addback_volpg.pl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@
use MIME::Lite;
use DBI;
use XML::Twig;
use FindBin;
use lib "$FindBin::Bin/../perl_lib/";
use ZFINPerlModules qw(assertEnvironment);

assertEnvironment('PGHOST', 'DB_NAME');

my $dbname = $ENV{'DB_NAME'};
my $dbhost = $ENV{'PGHOST'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=$dbhost", $username, $password)
or die "Cannot connect to Informix database: $DBI::errstr\n";


#=======================================================
Expand All @@ -24,12 +38,7 @@
system("/bin/rm -rf Update-Publication-Volume-And-Pages_w");
system("/bin/mkdir Update-Publication-Volume-And-Pages_w");

my $dbname = $ENV{'DB_NAME'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=localhost", $username, $password) or die "Cannot connect to database: $DBI::errstr\n";

my $sql = "select distinct zdb_id, accession_no, title
from publication
Expand Down
3 changes: 2 additions & 1 deletion server_apps/DB_maintenance/scrubscan.pl
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@
# ain't got none, everything in main. (put another way - they are all globals!)

my $dbname = $ENV{'DBNAME'};
my $dbhost = $ENV{'PGHOST'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=localhost", $username, $password) or die "Cannot connect to database: $DBI::errstr\n";
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=$dbhost", $username, $password) or die "Cannot connect to database: $DBI::errstr\n";


# Define tests:
Expand Down
94 changes: 48 additions & 46 deletions server_apps/Reports/Count-phenotype.pl
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,23 @@
# to the ZGC project. The result is mailed to curators.
#
use strict;
use lib "<!--|ROOT_PATH|-->/server_apps/perl_lib/";
use ZFINPerlModules;
use FindBin;
use lib "$FindBin::Bin/../perl_lib/";
use ZFINPerlModules qw(assertEnvironment);
assertEnvironment('ROOT_PATH', 'PGHOST', 'DB_NAME');

# set environment variables

my $dbname = "<!--|DB_NAME|-->";
my $dbname = $ENV{'DB_NAME'};
my $rootpath = $ENV{'ROOT_PATH'};

print "\nStart running counting SQLs ...\n";

#--------------------------------------------------------------------------
chdir "<!--|ROOT_PATH|-->/server_apps/Reports/PATO";
chdir "$rootpath/server_apps/Reports/PATO";

system("/bin/rm -f PhenotypeStatistics.txt");

system("psql -v ON_ERROR_STOP=1 -d <!--|DB_NAME|--> -f count_phenotype.sql > PhenotypeStatistics.txt 2> err.txt");
system("psql -v ON_ERROR_STOP=1 -d $dbname -f count_phenotype.sql > PhenotypeStatistics.txt 2> err.txt");

###-------- New section for case 10249, STR additions to Monthly phenotype statistics --------------------------------

Expand Down Expand Up @@ -53,12 +55,13 @@
and fx1.genox_fish_zdb_id = f1.fish_zdb_id
and f1.fish_genotype_zdb_id = geno_zdb_id
and geno_is_wildtype = 't'
and not exists(select 'x' from phenotype_experiment px2, fish f2, fish_experiment fx2, genotype_feature, feature_marker_relationship
where fx2.genox_zdb_id = px2.phenox_genox_zdb_id
and fx2.genox_fish_zdb_id = f2.fish_zdb_id
and f2.fish_genotype_zdb_id = genofeat_geno_zdb_id
and genofeat_feature_zdb_id = fmrel_ftr_zdb_id
and fmrel_mrkr_zdb_id = mrel_mrkr_2_zdb_id);";
and not exists (select 'x'
from phenotype_experiment px2
join fish_experiment fx2 on fx2.genox_zdb_id = px2.phenox_genox_zdb_id
join fish f2 on fx2.genox_fish_zdb_id = f2.fish_zdb_id
join genotype_feature on f2.fish_genotype_zdb_id = genofeat_geno_zdb_id
join feature_marker_relationship on genofeat_feature_zdb_id = fmrel_ftr_zdb_id
where fmrel_mrkr_zdb_id = mrel_mrkr_2_zdb_id);";


my $ct23 = ZFINPerlModules->countData($sql);
Expand All @@ -72,46 +75,45 @@
and fx1.genox_fish_zdb_id = f1.fish_zdb_id
and f1.fish_genotype_zdb_id = geno_zdb_id
and geno_is_wildtype = 't'
and not exists(select 'x' from phenotype_experiment px2, fish f2, fish_experiment fx2, genotype_feature, feature_marker_relationship
where fx2.genox_zdb_id = px2.phenox_genox_zdb_id
and fx2.genox_fish_zdb_id = f2.fish_zdb_id
and f2.fish_genotype_zdb_id = genofeat_geno_zdb_id
and genofeat_feature_zdb_id = fmrel_ftr_zdb_id
and fmrel_mrkr_zdb_id = mrel_mrkr_2_zdb_id);";
and not exists (select 'x'
from phenotype_experiment px2
join fish_experiment fx2 on fx2.genox_zdb_id = px2.phenox_genox_zdb_id
join fish f2 on fx2.genox_fish_zdb_id = f2.fish_zdb_id
join genotype_feature on f2.fish_genotype_zdb_id = genofeat_geno_zdb_id
join feature_marker_relationship on genofeat_feature_zdb_id = fmrel_ftr_zdb_id
where fmrel_mrkr_zdb_id = mrel_mrkr_2_zdb_id);";

my $ct24 = ZFINPerlModules->countData($sql);

$sql = "select distinct mrel_mrkr_2_zdb_id
from marker_relationship, fish_str, fish, fish_experiment, genotype_feature, feature, genotype, phenotype_experiment
where mrel_type = 'knockdown reagent targets gene'
and mrel_mrkr_1_zdb_id like 'ZDB-TALEN%'
and mrel_mrkr_1_zdb_id = fishstr_str_zdb_id
and genox_fish_zdb_id = fish_zdb_id
and fish_genotype_zdb_id = genofeat_geno_zdb_id
and geno_zdb_id = fish_genotype_zdb_id
and geno_zdb_id = genofeat_geno_zdb_id
and genofeat_feature_zdb_id = feature_zdb_id
and feature_type = 'TRANSGENIC_INSERTION'
and geno_zdb_id = genofeat_geno_zdb_id
and geno_is_wildtype = 'f'
and phenox_genox_zdb_id = genox_zdb_id;";
from marker_relationship
join fish_str on mrel_mrkr_1_zdb_id = fishstr_str_zdb_id
join fish on fishstr_fish_zdb_id = fish_zdb_id
join fish_experiment on genox_fish_zdb_id = fish_zdb_id
join genotype_feature on fish_genotype_zdb_id = genofeat_geno_zdb_id
join feature on genofeat_feature_zdb_id = feature_zdb_id
join genotype on ( geno_zdb_id = fish_genotype_zdb_id and geno_zdb_id = genofeat_geno_zdb_id )
join phenotype_experiment on phenox_genox_zdb_id = genox_zdb_id
where mrel_type = 'knockdown reagent targets gene'
and mrel_mrkr_1_zdb_id like 'ZDB-TALEN%'
and feature_type = 'TRANSGENIC_INSERTION'
and geno_is_wildtype = 'f'";

my $ct25 = ZFINPerlModules->countData($sql);

$sql = "select distinct mrel_mrkr_2_zdb_id
from marker_relationship, fish_str, fish, fish_experiment, genotype_feature, feature, genotype, phenotype_experiment
where mrel_type = 'knockdown reagent targets gene'
and mrel_mrkr_1_zdb_id like 'ZDB-CRISPR%'
and mrel_mrkr_1_zdb_id = fishstr_str_zdb_id
and genox_fish_zdb_id = fish_zdb_id
and fish_genotype_zdb_id = genofeat_geno_zdb_id
and geno_zdb_id = fish_genotype_zdb_id
and geno_zdb_id = genofeat_geno_zdb_id
and genofeat_feature_zdb_id = feature_zdb_id
and feature_type = 'TRANSGENIC_INSERTION'
and geno_zdb_id = genofeat_geno_zdb_id
and geno_is_wildtype = 'f'
and phenox_genox_zdb_id = genox_zdb_id;";
from marker_relationship
join fish_str on mrel_mrkr_1_zdb_id = fishstr_str_zdb_id
join fish on fishstr_fish_zdb_id = fish_zdb_id
join fish_experiment on genox_fish_zdb_id = fish_zdb_id
join genotype_feature on fish_genotype_zdb_id = genofeat_geno_zdb_id
join feature on genofeat_feature_zdb_id = feature_zdb_id
join genotype on ( geno_zdb_id = fish_genotype_zdb_id and geno_zdb_id = genofeat_geno_zdb_id )
join phenotype_experiment on phenox_genox_zdb_id = genox_zdb_id
where mrel_type = 'knockdown reagent targets gene'
and mrel_mrkr_1_zdb_id like 'ZDB-CRISPR%'
and feature_type = 'TRANSGENIC_INSERTION'
and geno_is_wildtype = 'f'";

my $ct26 = ZFINPerlModules->countData($sql);

Expand All @@ -128,11 +130,11 @@
close PHENO;

print "\n call FinCount.pl to get monthly fin phenotype count\n";
system ("<!--|ROOT_PATH|-->/server_apps/Reports/PATO/FinCount.pl");
system ("$rootpath/server_apps/Reports/PATO/FinCount.pl");


#--------------------------------------------------------------------------
# send Ken counts of various gene name types with & without orthology
system("<!--|ROOT_PATH|-->/server_apps/Reports/Nomenclature/get_uninformative.sh");
system("$rootpath/server_apps/Reports/Nomenclature/get_uninformative.sh");

exit;
13 changes: 8 additions & 5 deletions server_apps/Reports/PATO/FinCount.pl
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,20 @@
use DBI;

# set environment variables

chdir "<!--|ROOT_PATH|-->/server_apps/Reports/PATO";
use FindBin;
use lib "$FindBin::Bin/../../perl_lib/";
use ZFINPerlModules qw(assertEnvironment);
assertEnvironment('ROOT_PATH', 'PGHOST', 'DB_NAME');

print "\nStart counting Fin phenotypes\n\n";

my $dbname = "<!--|DB_NAME|-->";
my $dbname = $ENV{'DB_NAME'};
my $dbhost = $ENV{'PGHOST'};
my $username = "";
my $password = "";

### open a handle on the db
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=localhost", $username, $password)
my $dbh = DBI->connect ("DBI:Pg:dbname=$dbname;host=$dbhost", $username, $password)
or die "Cannot connect to Informix database: $DBI::errstr\n";

my $cur = $dbh->prepare("select to_char(now(), 'MM/DD/YYYY') from organism where organism_common_name = ?;");
Expand Down Expand Up @@ -151,6 +154,6 @@

close(SQLFILE);

system("psql -v ON_ERROR_STOP=1 -d <!--|DB_NAME|--> -f FinPhenoCount.sql > FinPhenotypeStatistics.txt 2> errFin.txt");
system("psql -v ON_ERROR_STOP=1 -d $dbname -f FinPhenoCount.sql > FinPhenotypeStatistics.txt 2> errFin.txt");

exit;
Loading

0 comments on commit 9e5ae88

Please sign in to comment.