From f4450c4d2bc47b996c9a2b53c36090612bef9465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ester=20Mu=C3=B1oz=20del=20Campo?= Date: Mon, 17 Jun 2024 12:26:54 +0200 Subject: [PATCH] Fix colormap and thick_cds bugs. --- src/pyranges_plot/data_preparation.py | 15 +++-- src/pyranges_plot/example_data.py | 67 ------------------- .../matplotlib_base/data2plot.py | 6 +- src/pyranges_plot/plot_main.py | 2 +- src/pyranges_plot/plotly_base/data2plot.py | 6 +- 5 files changed, 19 insertions(+), 77 deletions(-) delete mode 100644 src/pyranges_plot/example_data.py diff --git a/src/pyranges_plot/data_preparation.py b/src/pyranges_plot/data_preparation.py index a624dd8..f57622b 100644 --- a/src/pyranges_plot/data_preparation.py +++ b/src/pyranges_plot/data_preparation.py @@ -339,17 +339,17 @@ def chrmd_limits(chrmd_df, limits): # pyranges object elif type(limits) is pr.PyRanges: # create dict to map limits - limits_df = limits.df - limits_chrmd_df = limits_df.groupby( + limits_chrmd_df = limits.groupby( CHROM_COL, group_keys=False, observed=True ).agg({START_COL: "min", END_COL: "max"}) - limits_chrmd_dict = limits_chrmd_df.to_dict(orient="index") + # limits_chrmd_dict = limits_chrmd_df.to_dict(orient="index") # function to get matching values from limits_chrmd_df def make_min_max(row): - chromosome = str(row.name) - limits = limits_chrmd_dict.get(chromosome) - if limits: + chromosome = row.name[0] + if chromosome in limits_chrmd_df.index: + limits = limits_chrmd_df.loc[chromosome] + return ( limits[START_COL], limits[END_COL], @@ -363,7 +363,8 @@ def make_min_max(row): # dictionary as limits else: chrmd_df["min_max"] = [ - limits.get(index) for index in chrmd_df.index + limits.get(index) + for index in list(chrmd_df.index.get_level_values(CHROM_COL)) ] # fills with None the chromosomes not specified diff --git a/src/pyranges_plot/example_data.py b/src/pyranges_plot/example_data.py deleted file mode 100644 index 5bd9247..0000000 --- a/src/pyranges_plot/example_data.py +++ /dev/null @@ -1,67 +0,0 @@ -import pyranges as pr - -data1 = pr.PyRanges( - { - "Chromosome": ["1"] * 9, - "Strand": ["+", "+", "-", "-", "-", "+", "+", "+", "-"], - "Start": [i * 100 for i in [5, 35, 3, 13, 35, 45, 49, 56, 60]], - "End": [i * 100 for i in [15, 37, 6, 17, 39, 47, 51, 57, 67]], - "transcript_id": ["t1", "t1", "t2", "t2", "t2", "t3", "t3", "t3", "t4"], - "second_id": ["a"] * 4 + ["b"] * 5, - } -) - -data2 = pr.PyRanges( - { - "Chromosome": ["1"] * 10 + ["2"] * 10 + ["4"], - "Strand": ["+", "+", "+", "+", "-", "-", "-", "-", "+", "+"] - + ["+", "+", "+", "+", "-", "-", "-", "-", "+", "+"] - + ["+"], - "Start": [90, 61, 104, 228, 9, 142, 52, 149, 218, 151] - + [5, 27, 37, 47, 1, 7, 42, 37, 60, 80] - + [20], - "End": [92, 64, 113, 229, 12, 147, 57, 155, 224, 153] - + [8, 32, 40, 50, 5, 10, 46, 40, 70, 90] - + [50], - "transcript_id": ["t1", "t1", "t1", "t1", "t2", "t2", "t2", "t2", "t3", "t3"] - + ["t4", "t4", "t4", "t4", "t5", "t5", "t5", "t5", "t6", "t6", "t7"], - "Feature": [ - "CDS", - "exon", - "exon", - "exon", - "exon", - "exon", - "exon", - "exon", - "exon", - "exon", - ] - + ["exon"] * 8 - + ["exon"] * 3, - } -) - -data3 = pr.PyRanges( - { - "Chromosome": ["1", "1", "2", "2", "2", "2", "2", "3", "4", "4", "4", "5"], - "Strand": ["+", "+", "-", "-", "+", "+", "+", "+", "-", "-", "-", "+"], - "Start": [1, 40, 10, 70, 85, 110, 150, 140, 5, 170, 240, 100], - "End": [11, 60, 25, 80, 100, 115, 180, 152, 150, 200, 260, 200], - "transcript_id": [ - "t1", - "t1", - "T2", - "T2", - "T3", - "T3", - "T3", - "T4", - "T5", - "T5", - "T5", - "T6", - ], - "Feature": ["exon"] * 12, - } -) diff --git a/src/pyranges_plot/matplotlib_base/data2plot.py b/src/pyranges_plot/matplotlib_base/data2plot.py index 267fdc6..fa2e82e 100644 --- a/src/pyranges_plot/matplotlib_base/data2plot.py +++ b/src/pyranges_plot/matplotlib_base/data2plot.py @@ -118,7 +118,11 @@ def apply_gene_bridge( # If transcript structure subtract exons if transcript_str: cds = df[df["Feature"] == "CDS"] - exons = df[df["Feature"] == "exon"].subtract_ranges(cds) + exons = df[df["Feature"] == "exon"] + + # if there are exons and cds, subtract + if sum([cds.empty, exons.empty]) == 2: + exons = exons.subtract_ranges(cds) df = pr.concat([cds, exons]) # Define depth order diff --git a/src/pyranges_plot/plot_main.py b/src/pyranges_plot/plot_main.py index f438eb4..7fc4ad9 100644 --- a/src/pyranges_plot/plot_main.py +++ b/src/pyranges_plot/plot_main.py @@ -88,7 +88,7 @@ def plot( they do not overlap) and False for unpacked (one row per gene). color_col: str, default None - Name of the column used to color the genes. + Name of the column used to color the genes. If not specified, id_col will be used. thickness_col: str, default None Name of the data column with max 2 different values to plot the intervals correspondig to one value to diff --git a/src/pyranges_plot/plotly_base/data2plot.py b/src/pyranges_plot/plotly_base/data2plot.py index d686ab6..0aca204 100644 --- a/src/pyranges_plot/plotly_base/data2plot.py +++ b/src/pyranges_plot/plotly_base/data2plot.py @@ -127,7 +127,11 @@ def apply_gene_bridge( # If transcript structure subtract exons if transcript_str: cds = df[df["Feature"] == "CDS"] - exons = df[df["Feature"] == "exon"].subtract_ranges(cds) + exons = df[df["Feature"] == "exon"] + + # if there are exons and cds, subtract + if sum([cds.empty, exons.empty]) == 2: + exons = exons.subtract_ranges(cds) df = pr.concat([cds, exons]) # Define depth order