Skip to content

Commit 46cc30e

Browse files
committed
Remove re-mapped organisms from strains/genotypes
We now check the strain and genotype tables for organisms that need to re-mapped. Then strains are de-duplicated. Refs #2831
1 parent 40f4af1 commit 46cc30e

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

etc/reapply_species_strain_map.pl

+69
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ BEGIN
5858
# the CursDBs
5959
my %genes_to_update = ();
6060

61+
sub make_strain_key
62+
{
63+
my $strain = shift;
64+
65+
return $strain->organism_id() . '-' . $strain->strain_name();
66+
}
6167

6268
my $proc = sub {
6369
my $curs = shift;
@@ -103,6 +109,69 @@ BEGIN
103109
}
104110
}
105111

112+
my $strain_rs = $curs_schema->resultset('Strain')
113+
->search({}, { prefetch => 'organism' });
114+
115+
my %updated_strains = ();
116+
my %non_updated_strains = ();
117+
118+
while (defined (my $strain = $strain_rs->next())) {
119+
my $strain_taxonid = $strain->organism()->taxonid();
120+
121+
my $new_org = $taxon_map{$strain_taxonid};
122+
123+
if (defined $new_org) {
124+
$strain->organism($new_org);
125+
$strain->update();
126+
127+
$updated_strains{make_strain_key($strain)} = $strain;
128+
} else {
129+
$non_updated_strains{make_strain_key($strain)} = $strain;
130+
}
131+
}
132+
133+
my %strain_id_update_map = ();
134+
135+
# After mapping, if we now have multiple Strain rows with the
136+
# same strain_name and organism, make a map of the strains to
137+
# remove/replace if they are duplicates
138+
while (my ($updated_key, $updated_strain) = each %updated_strains) {
139+
my $non_updated_strain = $non_updated_strains{$updated_key};
140+
141+
if (defined $non_updated_strain) {
142+
$strain_id_update_map{$updated_strain->strain_id()} =
143+
$non_updated_strain->strain_id();
144+
}
145+
}
146+
147+
my $genotype_rs = $curs_schema->resultset('Genotype')
148+
->search({}, { prefetch => 'organism' });
149+
150+
while (defined (my $genotype = $genotype_rs->next())) {
151+
my $genotype_taxonid = $genotype->organism()->taxonid();
152+
153+
my $new_org = $taxon_map{$genotype_taxonid};
154+
155+
if (defined $new_org) {
156+
$genotype->organism($new_org);
157+
$genotype->update();
158+
}
159+
160+
my $updated_strain_id =
161+
$strain_id_update_map{$genotype->strain_id()};
162+
163+
if (defined $updated_strain_id) {
164+
$genotype->strain_id($updated_strain_id);
165+
$genotype->update();
166+
}
167+
}
168+
169+
# delete one copy of strains that are now duplicated
170+
for my $strain_to_remove_id (keys %strain_id_update_map) {
171+
$curs_schema->resultset('Strain')->find($strain_to_remove_id)
172+
->delete();
173+
}
174+
106175
# Remove unused organisms
107176
$organism_rs = $curs_schema->resultset('Organism');
108177

0 commit comments

Comments
 (0)