|
58 | 58 | # the CursDBs
|
59 | 59 | my %genes_to_update = ();
|
60 | 60 |
|
| 61 | +sub make_strain_key |
| 62 | +{ |
| 63 | + my $strain = shift; |
| 64 | + |
| 65 | + return $strain->organism_id() . '-' . $strain->strain_name(); |
| 66 | +} |
61 | 67 |
|
62 | 68 | my $proc = sub {
|
63 | 69 | my $curs = shift;
|
@@ -103,6 +109,69 @@ BEGIN
|
103 | 109 | }
|
104 | 110 | }
|
105 | 111 |
|
| 112 | + my $strain_rs = $curs_schema->resultset('Strain') |
| 113 | + ->search({}, { prefetch => 'organism' }); |
| 114 | + |
| 115 | + my %updated_strains = (); |
| 116 | + my %non_updated_strains = (); |
| 117 | + |
| 118 | + while (defined (my $strain = $strain_rs->next())) { |
| 119 | + my $strain_taxonid = $strain->organism()->taxonid(); |
| 120 | + |
| 121 | + my $new_org = $taxon_map{$strain_taxonid}; |
| 122 | + |
| 123 | + if (defined $new_org) { |
| 124 | + $strain->organism($new_org); |
| 125 | + $strain->update(); |
| 126 | + |
| 127 | + $updated_strains{make_strain_key($strain)} = $strain; |
| 128 | + } else { |
| 129 | + $non_updated_strains{make_strain_key($strain)} = $strain; |
| 130 | + } |
| 131 | + } |
| 132 | + |
| 133 | + my %strain_id_update_map = (); |
| 134 | + |
| 135 | + # After mapping, if we now have multiple Strain rows with the |
| 136 | + # same strain_name and organism, make a map of the strains to |
| 137 | + # remove/replace if they are duplicates |
| 138 | + while (my ($updated_key, $updated_strain) = each %updated_strains) { |
| 139 | + my $non_updated_strain = $non_updated_strains{$updated_key}; |
| 140 | + |
| 141 | + if (defined $non_updated_strain) { |
| 142 | + $strain_id_update_map{$updated_strain->strain_id()} = |
| 143 | + $non_updated_strain->strain_id(); |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | + my $genotype_rs = $curs_schema->resultset('Genotype') |
| 148 | + ->search({}, { prefetch => 'organism' }); |
| 149 | + |
| 150 | + while (defined (my $genotype = $genotype_rs->next())) { |
| 151 | + my $genotype_taxonid = $genotype->organism()->taxonid(); |
| 152 | + |
| 153 | + my $new_org = $taxon_map{$genotype_taxonid}; |
| 154 | + |
| 155 | + if (defined $new_org) { |
| 156 | + $genotype->organism($new_org); |
| 157 | + $genotype->update(); |
| 158 | + } |
| 159 | + |
| 160 | + my $updated_strain_id = |
| 161 | + $strain_id_update_map{$genotype->strain_id()}; |
| 162 | + |
| 163 | + if (defined $updated_strain_id) { |
| 164 | + $genotype->strain_id($updated_strain_id); |
| 165 | + $genotype->update(); |
| 166 | + } |
| 167 | + } |
| 168 | + |
| 169 | + # delete one copy of strains that are now duplicated |
| 170 | + for my $strain_to_remove_id (keys %strain_id_update_map) { |
| 171 | + $curs_schema->resultset('Strain')->find($strain_to_remove_id) |
| 172 | + ->delete(); |
| 173 | + } |
| 174 | + |
106 | 175 | # Remove unused organisms
|
107 | 176 | $organism_rs = $curs_schema->resultset('Organism');
|
108 | 177 |
|
|
0 commit comments