Skip to content

Commit

Permalink
Merge pull request #243 from andersen-lab/demix_hierarchy_output_path
Browse files Browse the repository at this point in the history
Demix hierarchy output path
  • Loading branch information
joshuailevy authored Jul 18, 2024
2 parents 68dff29 + 7cad3ac commit 5edf2a0
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 3,389 deletions.
144 changes: 10 additions & 134 deletions freyja.egg-info/PKG-INFO

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions freyja.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ freyja/data/example_covariants_plot.png
freyja/data/example_query.csv
freyja/data/introContent.txt
freyja/data/last_barcode_update.txt
freyja/data/lineage_mutations.json
freyja/data/lineages.yml
freyja/data/mixture.depth
freyja/data/mixture.tsv
freyja/data/plot_config.yml
freyja/data/rel_growth_rates.csv
freyja/data/sc2_regions_of_interest.json
freyja/data/sc2_spike.csv
freyja/data/sweep_metadata.csv
freyja/data/sweep_metadata_missing.csv
freyja/data/test.bam
Expand All @@ -55,9 +58,11 @@ freyja/data/times_metadata.csv
freyja/data/times_metadata0.csv
freyja/data/title.txt
freyja/data/usher_barcodes.csv
freyja/data/usher_barcodes.feather
freyja/data/usher_barcodes_with_gisaid.csv
freyja/tests/__init__.py
freyja/tests/test_barcoding.py
freyja/tests/test_cli.py
freyja/tests/test_deconv.py
freyja/tests/test_read_analysis.py
freyja/tests/test_utils.py
Expand Down
3 changes: 3 additions & 0 deletions freyja.egg-info/requires.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ cvxpy
seaborn
pysam
biopython
sphinx
sphinx_rtd_theme
sphinx-click@ git+https://github.com/dylanpilz/sphinx-click.git
3 changes: 2 additions & 1 deletion freyja/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def print_barcode_version(ctx, param, value):
help='Path to custom barcode file')
@click.option('--meta', default='',
help='custom lineage to variant metadata file')
@click.option('--output', default='output name',
@click.option('--output', default='demixing_result.tsv',
help='Output file',
type=click.Path(exists=False), show_default=True)
@click.option('--covcut', default=10,
Expand Down Expand Up @@ -100,6 +100,7 @@ def demix(variants, depths, output, eps, barcodes, meta,
indexSimplified = [dfi.split('_')[0] for dfi in df_barcodes.index]
df_barcodes = df_barcodes.loc[indexSimplified, :]
df_depth = pd.read_csv(depths, sep='\t', header=None, index_col=1)

if depthcutoff != 0:
df_barcodes = collapse_barcodes(df_barcodes, df_depth, depthcutoff,
lineageyml, locDir, output,
Expand Down
3,241 changes: 0 additions & 3,241 deletions freyja/data/usher_barcodes_with_gisaid.csv

This file was deleted.

85 changes: 85 additions & 0 deletions freyja/read_analysis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,88 @@ def get_gene(locus, gene_positions):
start, end = gene_positions[gene]
if locus in range(start, end+1):
return gene, start


def translate_snvs(snvs, ref, gene_positions):
# Load reference genome
ref = MutableSeq(next(SeqIO.parse(ref, 'fasta')).seq)
output = {snv: None for snv in snvs}
for snv in snvs:
output[snv] = snv
if '+' in snv:
ins = snv.split('+')
locus = int(ins[0][1:])
gene_info = get_gene(locus, gene_positions)
ins_string = str(snv).replace(' ', '')
if gene_info is None or 'N' in ins_string:
print(gene_info, ins_string, locus)
continue
gene, start_site = gene_info
aa_locus = ((locus - start_site) // 3) + 2

if len(ins[1]) % 3 == 0:
insertion_seq = MutableSeq(ins[1]).translate()
else:
insertion_seq = ''

aa_mut = (f'{ins_string}({gene}:INS{aa_locus}'
f'{insertion_seq})')
output[snv] = aa_mut

elif '-' in snv:
deletion = snv.split('-')
locus = int(deletion[0][1:])
gene_info = get_gene(locus, gene_positions)
deletion_string = str(snv).replace(' ', '')
if gene_info is None:
print(gene_info, deletion_string, locus)
continue

gene, start_site = gene_info
aa_locus = ((locus - start_site) // 3) + 2

del_length = len(deletion[1]) // 3
if (len(deletion[1]) % 3) == 0:
aa_mut = (
f'{deletion_string}({gene}:DEL{aa_locus}/'
f'{aa_locus+del_length-1})'
)
else:
continue
output[snv] = aa_mut
else:
snp = snv
locus = int(snp[1:-1])

# Find the key in gene_positions that corresponds to the gene
# containing the SNP
gene_info = None
for gene in gene_positions:
start, end = gene_positions[gene]
if start <= locus <= end:
gene_info = gene, start
break
if gene_info is None:
continue

codon_position = (locus - start) % 3
aa_locus = ((locus - codon_position - start) // 3) + 1

ref_codon = ref[locus - codon_position - 1:
locus - codon_position + 2]
ref_aa = ref_codon.translate()

alt_codon = MutableSeq(str(ref_codon))
alt_codon[codon_position] = snp[-1]

if len(alt_codon) % 3 != 0 or len(alt_codon) == 0 \
or 'N' in alt_codon:
continue
alt_aa = alt_codon.translate()

if ref_aa == alt_aa or '*' in alt_aa:
continue # Synonymous/stop codon mutation

aa_mut = f'{gene}:{ref_aa}{aa_locus}{alt_aa}'
output[snp] = aa_mut
return output
2 changes: 1 addition & 1 deletion freyja/sample_deconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def read_snv_frequencies_vcf(fn, depthFn, muts):
break
file.close()

df = pd.read_csv(fn, comment='#', delim_whitespace=True,
df = pd.read_csv(fn, comment='#', sep=r'\s+',
header=None,
names=vcfnames)
vcf_info = df['INFO'].str.split(';', expand=True)
Expand Down
5 changes: 3 additions & 2 deletions freyja/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ def test_demix(self):

def test_demix_with_cutoff(self):
os.system('freyja demix freyja/data/test.tsv freyja/data/test.depth \
--output test.demixed.tsv --depthcutoff 100 --lineageyml \
--output test_demixed.tsv --depthcutoff 100 --lineageyml \
freyja/data/lineages.yml')
self.assertTrue(file_exists('.', "test_collapsed_lineages.yml"))
self.assertTrue(file_exists('.',
"test_demixed_collapsed_lineages.yml"))

def test_plot(self):
os.system('freyja plot freyja/data/aggregated_result.tsv \
Expand Down
22 changes: 12 additions & 10 deletions freyja/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,17 +946,18 @@ def collapse_barcodes(df_barcodes, df_depth, depthcutoff,
low_cov_muts = [mut for mut in df_barcodes.columns
if mut[1:-1] in low_cov_sites]
df_barcodes = df_barcodes.drop(low_cov_muts, axis=1)

max_depth = df_depth[3].astype(int).max()

min_depth = df_depth[3].astype(int).min()
# find lineages with identical barcodes
try:
duplicates = df_barcodes.groupby(df_barcodes.columns.tolist()).apply(
lambda x: tuple(x.index) if len(x.index) > 1 else None
).dropna()
except ValueError:
raise ValueError(f'Error: --depthcutoff {depthcutoff} too high'
f'for data with max depth {max_depth}')
raise ValueError(f'Error: --depthcutoff {depthcutoff} threshold'
f' too high for data with max depth {max_depth},'
f' min depth {min_depth},'
f' Not enough coverage depth at all sites')

if len(duplicates) == 0:
return df_barcodes
Expand Down Expand Up @@ -1092,12 +1093,13 @@ def collapse_barcodes(df_barcodes, df_depth, depthcutoff,
collapsed_lineages[mrca] = list(tup)
df_barcodes = df_barcodes.rename({lin: mrca for lin in tup})
df_barcodes = df_barcodes.drop_duplicates()

# defaults from demix and boot
if output == 'demixing_result.csv' or output == 'test':
output = 'collapsed_lineages.yml'
else:
output = f'{output.split(".")[0]}_collapsed_lineages.yml'
baseName = output
if '.' in baseName:
baseName = os.path.basename(baseName).split(".")
baseName = '.'.join(baseName[0:(len(baseName)-1)])
output = os.path.join(os.path.dirname(output),
baseName +
'_collapsed_lineages.yml')

with open(output, 'w') as f:
yaml.dump(collapsed_lineages, f, default_flow_style=False)
Expand Down

0 comments on commit 5edf2a0

Please sign in to comment.