-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MAG <- Sample containment mapping (#11)
* [wip] adds containment relationship between MAGs and Samples * adds tests for genome-sample mapping
- Loading branch information
1 parent
326d2db
commit a98c3ce
Showing
19 changed files
with
589 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
holofood/management/commands/import_mag_sample_mapping.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import argparse | ||
import logging | ||
from csv import DictReader | ||
|
||
from django.core.management.base import BaseCommand, CommandError | ||
|
||
from holofood.models import GenomeSampleContainment, Genome, Sample | ||
|
||
|
||
class Command(BaseCommand): | ||
help = ( | ||
"Import mappings between MAGs and samples, from a TSV file. " | ||
"Needs columns of at least `mgyg`, `sample_accession`, `containment`." | ||
) | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"mapping_file", | ||
type=argparse.FileType("r"), | ||
help="Path to the TSV file listing MAG – Sample pairs.", | ||
) | ||
parser.add_argument( | ||
"--catalogue_id_to_preclear", | ||
type=str, | ||
help="(Optional) ID of a MAG catalogue, in slug form, " | ||
"to clear all sample maps from prior to inserting new ones.", | ||
default=None, | ||
) | ||
|
||
def handle(self, *args, **options): | ||
tsv_file = options["mapping_file"] | ||
catalogue_id_to_preclear = options["catalogue_id_to_preclear"] | ||
|
||
if catalogue_id_to_preclear: | ||
existing_containments = GenomeSampleContainment.objects.filter( | ||
genome__catalogue_id=catalogue_id_to_preclear | ||
) | ||
logging.info( | ||
f"Deleting {existing_containments.count()} existing containments from genomes in {catalogue_id_to_preclear}" | ||
) | ||
existing_containments.delete() | ||
|
||
reader = DictReader(tsv_file, delimiter="\t") | ||
|
||
column_mapping = { | ||
"mgyg": "genome_id", | ||
"containment": "containment", | ||
"sample_accession": "sample_id", | ||
} | ||
|
||
missing = set(column_mapping.keys()).difference(reader.fieldnames) | ||
if missing: | ||
raise CommandError( | ||
f"Not all expected columns were found in the TSV. {missing=}" | ||
) | ||
|
||
for mapping in reader: | ||
logging.info( | ||
f"Importing mapping for {mapping['mgyg']} to sample {mapping['sample_accession']}" | ||
) | ||
try: | ||
sample = Sample.objects.get(accession=mapping["sample_accession"]) | ||
except Sample.DoesNotExist: | ||
self.stdout.write( | ||
self.style.WARNING( | ||
f"Sample {mapping['sample_accession']} does not exist." | ||
) | ||
) | ||
continue | ||
|
||
genomes = Genome.objects.filter(cluster_representative=mapping["mgyg"]) | ||
if not genomes.exists(): | ||
self.stdout.write( | ||
self.style.WARNING( | ||
f"Genomes with cluster rep {mapping['mgyg']} do not exist." | ||
) | ||
) | ||
else: | ||
logging.debug(f"Found {genomes.count()} Genomes") | ||
for genome in genomes: | ||
( | ||
genome_sample_containment, | ||
created, | ||
) = GenomeSampleContainment.objects.get_or_create( | ||
genome=genome, | ||
sample=sample, | ||
defaults={"containment": mapping["containment"]}, | ||
) | ||
if created: | ||
logging.debug( | ||
f"Created genome-sample-containment {genome_sample_containment}" | ||
) | ||
else: | ||
containment = float(mapping["containment"]) | ||
if containment > genome_sample_containment.containment: | ||
logging.info( | ||
f"Genome-sample-containment {genome_sample_containment} already exists, but updating " | ||
) | ||
logging.debug( | ||
f"Updated genome-sample-containment {genome_sample_containment}" | ||
) | ||
tsv_file.close() | ||
self.stdout.write(self.style.SUCCESS(f"Done")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Generated by Django 4.2 on 2024-06-20 10:54 | ||
|
||
from django.db import migrations, models | ||
import django.db.models.deletion | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("holofood", "0036_alter_animalstructureddatum_measurement_and_more"), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="GenomeSampleContainment", | ||
fields=[ | ||
( | ||
"id", | ||
models.BigAutoField( | ||
auto_created=True, | ||
primary_key=True, | ||
serialize=False, | ||
verbose_name="ID", | ||
), | ||
), | ||
("containment", models.FloatField(default=0)), | ||
( | ||
"genome", | ||
models.ForeignKey( | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="samples_containing", | ||
to="holofood.genome", | ||
), | ||
), | ||
( | ||
"sample", | ||
models.ForeignKey( | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="genomes_contained", | ||
to="holofood.sample", | ||
), | ||
), | ||
], | ||
options={ | ||
"ordering": ("genome", "-containment"), | ||
}, | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.