Skip to content

Commit

Permalink
version bump
Browse files Browse the repository at this point in the history
  • Loading branch information
drewjbeh committed Sep 7, 2022
1 parent 9f81252 commit 4d56f38
Show file tree
Hide file tree
Showing 13 changed files with 153 additions and 130 deletions.
1 change: 1 addition & 0 deletions build/lib/mimseq/data/hg38-eColitK/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Homo_sapiens_tRNA-Asn-GTT-16-5 does not share exact sequence with other GTT-16 g
Homo_sapiens_tRNA-Asn-GTT-9-2 does not share sequence with GTT-9-1. Rename to Homo_sapiens_tRNA-Asn-GTT-14-1
Homo_sapiens_tRX-Asn-GTT-3-1 shares sequence with Homo_sapiens_tRX-Asn-GTT-2-1. Rename to Homo_sapiens_tRX-Asn-GTT-2-2
Homo_sapiens_tRX-Phe-GAA-1-1 shares sequence with Homo_sapiens_tRNA-Phe-GAA-10-1. Rename to Homo_sapiens_tRNA-Phe-GAA-10-2
Homo_sapiens_tRNA-Ala-AGC-7-1 shares sequence with Homo_sapiens_tRNA-Ala-AGC-2. Renamed to Homo_sapiens_tRNA-Ala-AGC-2-3

### Manually removed sequences due to strange sequence, poor score and issues with infernal alignment with mimseq
### See hg38-tRNAs-filtered.fa
Expand Down
52 changes: 27 additions & 25 deletions build/lib/mimseq/deseq.R
Original file line number Diff line number Diff line change
Expand Up @@ -433,29 +433,31 @@ for (type in c("cyto", "mito")) {

# Annotation for baseMean counts
if (nrow(comb_isodecoder) != 0) {
baseMeanAnno_iso = rowAnnotation(base_mean = anno_lines(comb_isodecoder$baseMean,
gp = gpar(lwd = 1.5, col = "#084081")),
annotation_name_rot = 90,
width = unit("0.8", "cm"),
height = unit(20, "cm"))
anno_type = ifelse(nrow(comb_isodecoder) > 1, anno_lines, anno_barplot)
baseMeanAnno_iso = rowAnnotation(base_mean = anno_type(comb_isodecoder$baseMean,
gp = gpar(lwd = 1.5, col = "#084081", fill = "#084081")),
annotation_name_rot = 90,
width = unit("0.8", "cm"),
height = unit(20, "cm"))

hm_iso = Heatmap(scaled_counts_isodecoder,
col = col_fun,
row_title = paste('n = ', nrow(scaled_counts_isodecoder), sep = ""),
show_row_names = FALSE,
width = unit((length(ordered_levels)*0.7) + 2, "cm"),
height = unit(20, "cm"),
border = "gray20",
cluster_columns = TRUE,
heatmap_legend_param = list(
title = paste("Scaled expression\nlog2 fold-change (adj-p <= ", p_adj,")", sep = "")
)
col = col_fun,
row_title = paste('n = ', nrow(scaled_counts_isodecoder), sep = ""),
show_row_names = FALSE,
width = unit((length(ordered_levels)*0.7) + 2, "cm"),
height = unit(20, "cm"),
border = "gray20",
cluster_columns = TRUE,
heatmap_legend_param = list(
title = paste("Scaled expression\nlog2 fold-change (adj-p <= ", p_adj,")", sep = "")
)
)
}

if (nrow(comb_anticodon) != 0) {
baseMeanAnno_anti = rowAnnotation(base_mean = anno_lines(comb_anticodon$baseMean,
gp = gpar(lwd = 1.5, col = "#084081")),
anno_type = ifelse(nrow(comb_isodecoder) > 1, anno_lines, anno_barplot)
baseMeanAnno_anti = rowAnnotation(base_mean = anno_type(comb_anticodon$baseMean,
gp = gpar(lwd = 1.5, col = "#084081", fill = "#084081")),
annotation_name_rot = 90,
width = unit("0.8", "cm"),
height = unit(20, "cm"))
Expand Down Expand Up @@ -490,17 +492,17 @@ for (type in c("cyto", "mito")) {
}
if (nrow(comb_anticodon) != 0) {
lfc_anti = Heatmap(comb_anticodon[lfc],
col = col_fun,
width = unit(0.5, "cm"),
height = unit(20, "cm"),
na_col = "white",
border = "gray20",
show_heatmap_legend = FALSE)
hm_anti = hm_anti + lfc_anti
col = col_fun,
width = unit(0.5, "cm"),
height = unit(20, "cm"),
na_col = "white",
border = "gray20",
show_heatmap_legend = FALSE)
hm_anti = hm_anti + lfc_anti
}
}
}

# calculate plot width based on sample numbers
plot_width = (length(ordered_levels)*0.8 + 4) + (length(ordered_levels)-1)*1.3 + 0.8
plot_width = plot_width/2.54
Expand Down
48 changes: 24 additions & 24 deletions build/lib/mimseq/mimseq.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#! /usr/bin/env python3

# +-------------+
# | mim-tRNAseq |
# | mim-tRNAseq |
# +-------------+

####################################
# Main backbone and wrapper script #
####################################
#
#
# author: Drew Behrens
# contact: [email protected]
# github: https://github.com/nedialkova-lab/mim-tRNAseq
Expand All @@ -18,7 +18,7 @@
from .tRNAmap import mainAlign
from .getCoverage import getCoverage, plotCoverage
from .mmQuant import generateModsTable, plotCCA
from .ssAlign import structureParser, modContext
from .ssAlign import structureParser, modContext
from .splitClusters import splitIsodecoder, unsplitClustersCov, getIsodecoderSizes, getDeconvSizes, writeDeconvTranscripts, writeIsodecoderTranscripts, writeSplitInfo, writeIsodecoderInfo
import sys, os, subprocess, logging, datetime, copy
import argparse
Expand Down Expand Up @@ -49,8 +49,8 @@ def restrictedFloat2(x):
raise argparse.ArgumentTypeError('{} not a real number'.format(x))

def mimseq(trnas, trnaout, name, species, out, cluster, cluster_id, cov_diff, posttrans, control_cond, threads, max_multi, snp_tolerance, \
keep_temp, cca, double_cca, min_cov, mismatches, remap, remap_mismatches, misinc_thresh, mito_trnas, pretrnas, local_mod, p_adj, sample_data):
keep_temp, cca, double_cca, min_cov, mismatches, remap, remap_mismatches, misinc_thresh, mito_trnas, plastid_trnas, pretrnas, local_mod, p_adj, sample_data):

# Main wrapper
# Integrity check for output folder argument...
try:
Expand Down Expand Up @@ -98,7 +98,7 @@ def mimseq(trnas, trnaout, name, species, out, cluster, cluster_id, cov_diff, po
modifications = os.path.dirname(os.path.realpath(__file__))
modifications += "/modifications"
coverage_bed, snp_tolerance, mismatch_dict, insert_dict, del_dict, mod_lists, Inosine_lists, Inosine_clusters, tRNA_dict, cluster_dict, cluster_perPos_mismatchMembers \
= modsToSNPIndex(trnas, trnaout, mito_trnas, modifications, name, out, double_cca, threads, snp_tolerance, cluster, cluster_id, posttrans, pretrnas, local_mod)
= modsToSNPIndex(trnas, trnaout, mito_trnas, plastid_trnas, modifications, name, out, double_cca, threads, snp_tolerance, cluster, cluster_id, posttrans, pretrnas, local_mod)
structureParser()
# Generate GSNAP indices
genome_index_path, genome_index_name, snp_index_path, snp_index_name = generateGSNAPIndices(species, name, out, map_round, snp_tolerance, cluster)
Expand Down Expand Up @@ -139,7 +139,7 @@ def mimseq(trnas, trnaout, name, species, out, cluster, cluster_id, cov_diff, po
#else:
# log.info("\n*** New modifications not discovered as remap is not enabled ***\n")

# redo checks for unsplit isodecoders based on coverage
# redo checks for unsplit isodecoders based on coverage
# use original splitBool and unique_isodecoderMMs in case coverage changes in 2nd alignment round, regenerate splitBool_new and unique_isodecoderMMs_new
# rewrite deconv transcripts
if map_round == 2 and cluster and cluster_id != 1:
Expand All @@ -166,7 +166,7 @@ def mimseq(trnas, trnaout, name, species, out, cluster, cluster_id, cov_diff, po
mod_sites, cons_pos_list = modContext(out, unsplitCluster_lookup)

script_path = os.path.dirname(os.path.realpath(__file__))

if snp_tolerance or not mismatches == 0.0:
# plot mods and stops, catch exception with command call and print log error if many clusters are filtered (known to cause issues with R code handling mods table)
log.info("Plotting modification and RT stop data...")
Expand Down Expand Up @@ -217,25 +217,25 @@ def mimseq(trnas, trnaout, name, species, out, cluster, cluster_id, cov_diff, po

def main():

###################
###################
# Parse arguments #
###################
###################

parser = argparse.ArgumentParser(description = 'Custom high-throughput tRNA sequencing alignment and quantification pipeline\
based on modification induced misincorporation cDNA synthesis.', add_help = True, usage = "%(prog)s [options] sample data")

inputs = parser.add_argument_group("Input files")
inputs.add_argument('-s','--species', metavar='species', required = not ('-t' in sys.argv), dest = 'species', help = \
inputs.add_argument('-s','--species', metavar='species', required = not ('-t' in sys.argv or '--trnas' in sys.argv), dest = 'species', help = \
'Species being analyzed for which to load pre-packaged data files (prioritized over -t, -o and -m). Options are: Hsap, Hsap19, Mmus, Rnor, Scer, Spom, Dmel, Drer, Ecol, Atha', \
choices = ['Hsap', 'Hsap19','Ggor','Mmus','Rnor','Scer', 'Spom','Dmel', 'Drer', 'Ecol', 'Atha', 'HsapTCC', 'ScerMut'])
inputs.add_argument('-t', '--trnas', metavar='genomic tRNAs', required = False, dest = 'trnas', help = \
'Genomic tRNA fasta file, e.g. from gtRNAdb or tRNAscan-SE. Already avalable in data folder for a few model organisms.')
inputs.add_argument('-o', '--trnaout', metavar = 'tRNA out file', required = (not '--species' or '-s' in sys.argv) or ('-t' in sys.argv),
inputs.add_argument('-o', '--trnaout', metavar = 'tRNA out file', required = (not '--species' or '-s' in sys.argv) or ('-t' in sys.argv),
dest = 'trnaout', help = 'tRNA.out file generated by tRNAscan-SE (also may be available on gtRNAdb). Contains information about tRNA features, including introns.')
inputs.add_argument('-m', '--mito-trnas', metavar = 'mitochondrial/plastid tRNAs', required = False, nargs = "*", dest = 'mito', \
help = 'Mitochondrial/plastid tRNA fasta file(s). Can be multiple space-separated file names to specify both mitochondrial and plastid seqences.\
Ensure "plastid" and "mito" are in the file names in this case. Should be downloaded from mitotRNAdb or PtRNAdb for species of interest. Already available in data folder for a few model organisms.')

inputs.add_argument('-m', '--mito-trnas', metavar = 'mitochondrial/plastid tRNAs', required = False, dest = 'mito', \
help = 'Mitochondrial tRNA fasta file(s). Should be downloaded from mitotRNAdb for species of interest. Already available in data folder for a few model organisms.')
inputs.add_argument('-p', '--plastid-trnas', metavar = 'mitochondrial/plastid tRNAs', required = False, dest = 'plastid', \
help = 'Plastid tRNA fasta file(s). Should be downloaded from PtRNAdb for species of interest. Already available in data folder for a few model organisms.')
options = parser.add_argument_group("Program options")
options.add_argument('--pretRNAs', required = False, dest = 'pretrnas', action = 'store_true',\
help = "Input reference sequences are pretRNAs. Enabling this option will disable the removal of intron sequences and addition of 3'-CCA to generate \
Expand Down Expand Up @@ -308,7 +308,7 @@ def main():

parser.add_argument('--version', action='version', version='%(prog)s {}'.format(version.__version__), help = 'Show version number and exit')
parser.add_argument('sampledata', help = 'Sample data sheet in text format, tab-separated. Column 1: full path to fastq (or fastq.gz). Column 2: condition/group.')

parser.set_defaults(threads=1, out="./", max_multi = 3, mito = '', cov_diff = 0.5)

#########################################
Expand Down Expand Up @@ -346,7 +346,7 @@ def main():
if args.control_cond not in conditions:
raise argparse.ArgumentTypeError('{} not a valid condition in {}'.format(args.control_cond, args.sampledata))
if not args.species and not (args.trnas or args.trnaout):
parser.error('Must specify valid --species argument or supply -t (tRNA sequences) and -o (tRNAscan out file)!')
parser.error('Must specify valid --species argument or supply -t (tRNA sequences) and -o (tRNAscan out file)!')
else:
if args.species:
if args.species == 'Ggor':
Expand Down Expand Up @@ -380,7 +380,7 @@ def main():
if args.species == 'Rnor':
args.trnas = os.path.dirname(os.path.realpath(__file__)) + "/data/rn7-eColitK/rn7-tRNAs.fa"
args.trnaout = os.path.dirname(os.path.realpath(__file__)) + "/data/rn7-eColitK/rn7_eColitK-tRNAs-confidence-set.out"
args.mito = os.path.dirname(os.path.realpath(__file__)) + "/data/rn7-eColitK/rn7-mitotRNAs.fa"
args.mito = os.path.dirname(os.path.realpath(__file__)) + "/data/rn7-eColitK/rn7-mitotRNAs.fa"
if args.species == 'Spom':
args.trnas = os.path.dirname(os.path.realpath(__file__)) + "/data/schiPomb-eColitK/schiPomb_972H-tRNAs.fa"
args.trnaout = os.path.dirname(os.path.realpath(__file__)) + "/data/schiPomb-eColitK/schiPomb_eschColi-tRNAs.out"
Expand All @@ -400,15 +400,15 @@ def main():
if args.species == 'Atha':
args.trnas = os.path.dirname(os.path.realpath(__file__)) + "/data/araTha1-eColitK/araTha1-tRNAs.fa"
args.trnaout = os.path.dirname(os.path.realpath(__file__)) + "/data/araTha1-eColitK/araTha1-tRNAs-detailed.out"
# two mito files here for plastid and mito references
args.mito = os.path.dirname(os.path.realpath(__file__)) + "/data/araTha1-eColitK/araTha1-plastidtRNAs.fa " + os.path.dirname(os.path.realpath(__file__)) + "/data/araTha1-eColitK/araTha1-mitotRNAs.fa"
args.mito = os.path.dirname(os.path.realpath(__file__)) + "/data/araTha1-eColitK/araTha1-mitotRNAs.fa"
# plastid reference needed for A.thaliana
args.plastid = os.path.dirname(os.path.realpath(__file__)) + "/data/araTha1-eColitK/araTha1-plastidtRNAs.fa "
else:
args.species = args.trnas.split("/")[-1].split(".")[0]
mimseq(args.trnas, args.trnaout, args.name, args.species, args.out, args.cluster, args.cluster_id, args.cov_diff, \
args.posttrans, args.control_cond, args.threads, args.max_multi, args.snp_tolerance, \
args.keep_temp, args.cca, args.double_cca, args.min_cov, args.mismatches, args.remap, args.remap_mismatches, \
args.misinc_thresh, args.mito, args.pretrnas, args.local_mod, args.p_adj, args.sampledata)
args.misinc_thresh, args.mito, args.plastid, args.pretrnas, args.local_mod, args.p_adj, args.sampledata)

if __name__ == '__main__':
main()

5 changes: 4 additions & 1 deletion build/lib/mimseq/mmQuant.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,10 @@ def generateModsTable(sampleGroups, out_dir, name, threads, min_cov, mismatch_di
splitBool[cluster].update(set(filtMembers))

# generate new unsplit cluster names and add to the lookup dictionary (keep tRX naming for these isodecoders)
member_IsoNums = tuple(int(iso.split("-")[-2]) if "tRNA" in iso else "tRX" + iso.split("-")[-2] for iso in filtMembers)
if "tRNA" in cluster:
member_IsoNums = tuple(int(iso.split("-")[-2]) if "tRNA" in iso else "tRX" + iso.split("-")[-2] for iso in filtMembers)
elif "tRX" in cluster:
member_IsoNums = tuple(int(iso.split("-")[-2]) if "tRX" in iso else "tRNA" + iso.split("-")[-2] for iso in filtMembers)
member_IsoNums = sorted(tuple(x for x in member_IsoNums if isinstance(x, int))) + [x for x in member_IsoNums if isinstance(x, str)]
member_IsoString = "/" + "/".join([str(iso) for iso in member_IsoNums])
newClusterName = "-".join(cluster.split("-")[:-1]) + member_IsoString
Expand Down
24 changes: 17 additions & 7 deletions build/lib/mimseq/splitClusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,11 @@ def getDeconvSizes(splitBool, tRNA_dict, cluster_dict, unique_isodecoderMMs):
# generate new names for unsplit clusters (tRNA-AA-Anti-IsoX/Y) - write to lookup for later
unsplitCluster_lookup = defaultdict()
for cluster, isos in splitBool.items():
member_IsoNums = tuple(int(iso.split("-")[-2]) for iso in isos)
member_IsoNums = sorted(member_IsoNums)
if "tRNA" in cluster:
member_IsoNums = tuple(int(iso.split("-")[-2]) if "tRNA" in iso else "tRX" + iso.split("-")[-2] for iso in isos)
elif "tRX" in cluster:
member_IsoNums = tuple(int(iso.split("-")[-2]) if "tRX" in iso else "tRNA" + iso.split("-")[-2] for iso in isos)
member_IsoNums = sorted(tuple(x for x in member_IsoNums if isinstance(x, int))) + [x for x in member_IsoNums if isinstance(x, str)]
member_IsoString = "/" + "/".join([str(iso) for iso in member_IsoNums])
newClusterName = "-".join(cluster.split("-")[:-1]) + member_IsoString
shortClusterName = "-".join(cluster.split("-")[:-1])
Expand Down Expand Up @@ -466,11 +469,18 @@ def writeIsodecoderInfo(out_dir, experiment_name, isodecoder_sizes, readRef_unsp
count = 0
for num in name.split("-")[-1].split("/"):
count += 1
if not "tRX" in num:
iso = base + "-" + num
else:
num = num.replace("tRX", "")
iso = base.replace("tRNA", "tRX") + "-" + num
if "tRNA" in base:
if not "tRX" in num:
iso = base + "-" + num
else:
num = num.replace("tRX", "")
iso = base.replace("tRNA", "tRX") + "-" + num
elif "tRX" in base:
if not "tRNA" in num:
iso = base + "-" + num
else:
num = num.replace("tRX", "")
iso = base.replace("tRNA", "tRX") + "-" + num
isodecoder_list = [x for x in tRNA_dict.keys() if iso == "-".join(x.split("-")[:-1]) and not "chr" in x]
iso_min = min([x.split("-")[-1] for x in isodecoder_list])
gene = iso + "-" + str(iso_min)
Expand Down
Loading

0 comments on commit 4d56f38

Please sign in to comment.