Skip to content

Commit

Permalink
update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
KenjiKamimoto-ac committed Aug 25, 2021
1 parent 3db5d4a commit 65809f2
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 30 deletions.
85 changes: 56 additions & 29 deletions celloracle/applications/development_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
#import h5py

from scipy.stats import wilcoxon
Expand Down Expand Up @@ -171,21 +172,73 @@ def calculate_inner_product(self):

# Calculate inner product between the pseudotime-gradient and the perturb-gradient
self.inner_product = np.array([np.dot(i, j) for i, j in zip(self.flow, self.ref_flow)])
self.inner_product_random = np.array([np.dot(i, j) for i, j in zip(self.flow_rndm, self.ref_flow)])


def calculate_digitized_ip(self, n_bins=10):

inner_product_df = pd.DataFrame({"score": self.inner_product[~self.mass_filter_simulation],
"pseudotime": self.pseudotime_on_grid[~self.mass_filter_simulation]})
"score_randomized_GRN": self.inner_product_random[~self.mass_filter_simulation],
"pseudotime": self.pseudotime_on_grid[~self.mass_filter_simulation]})


bins = _get_bins(inner_product_df.pseudotime, n_bins)
inner_product_df["pseudotime_id"] = np.digitize(inner_product_df.pseudotime, bins)

self.inner_product_df = inner_product_df

def get_p_inner_product(self, method="wilcoxon"):
return get_p_inner_product(inner_product_df=self.inner_product_df, method=method)
def get_negative_PS_p_value(self, pseudotime=None, return_ps_sum=False, plot=False):

df = self.inner_product_df.copy()

if pseudotime is not None:
pseudotime = [i for i in pseudotime if i in list("0123456789")]
df = df[df.pseudotime_id.isin(pseudotime)]

x = df["score"]
y = df["score_randomized_GRN"]

# Clipping positive value to focus on negative IP
x = np.clip(x, -np.inf, 0)
y = np.clip(y, -np.inf, 0)

if plot:
sns.distplot(x)
sns.distplot(y)

# Paired non-parametric test with Wilcoxon's runk sum test
s, p = wilcoxon(x, y, alternative="less")

if return_ps_sum:
return p, -x.sum(), -y.sum()

return p

def get_positive_PS_p_value(self, pseudotime=None, return_ps_sum=False, plot=False):
df = self.inner_product_df.copy()

if pseudotime is not None:
pseudotime = [i for i in pseudotime if i in list("0123456789")]
df = df[df.pseudotime_id.isin(pseudotime)]

x = df["score"]
y = df["score_randomized_GRN"]

# Clipping negative value to focus on positive IP
x = np.clip(x, 0, np.inf)
y = np.clip(y, 0, np.inf)

if plot:
sns.distplot(x)
sns.distplot(y)

# Paired non-parametric test with Wilcoxon's runk sum test
s, p = wilcoxon(x, y, alternative="greater")

if return_ps_sum:
return p, -x.sum(), -y.sum()

return p

def get_sum_of_negative_ips(self):
return get_sum_of_negative_ips(inner_product_df=self.inner_product_df)
Expand Down Expand Up @@ -302,32 +355,6 @@ def _get_bins(array, n_bins):

from scipy.stats import wilcoxon

def get_p_inner_product(inner_product_df, method="wilcoxon"):
"""
"""
dizitized_pseudotimes = np.sort(inner_product_df["pseudotime_id"].unique())

li = []
for i in dizitized_pseudotimes:
pseudotime_ = inner_product_df[inner_product_df["pseudotime_id"]==i].score.values

if method == "wilcoxon":
_, p_ts = wilcoxon(x=pseudotime_, alternative="two-sided")
_, p_greater = wilcoxon(x=pseudotime_, alternative="greater")
_, p_less = wilcoxon(x=pseudotime_, alternative="less")
mean_ = pseudotime_.mean()
median_ = np.median(pseudotime_)
#print(i, p)
li.append([mean_, median_, p_ts, p_greater, p_less])

inner_product_summary = \
pd.DataFrame(np.array(li),
columns=["ip_mean", "ip_median", "p_twosided", "p_less", "p_greater"],
index=dizitized_pseudotimes)

return inner_product_summary


def get_sum_of_positive_ips(inner_product_df):
Expand Down
88 changes: 88 additions & 0 deletions celloracle/applications/systematic_analysis_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,94 @@ def wrapper(misc, pseudotime, n_TFs=20):

return interactive_table

def _interactive_calculate_negative_ps_p_value(self):
"""
8/21/2021. This function is under development
Get p-values for negative PS sum score by comparing nPS to randomized GRN nPS.
"""

if self.hdf5_info is None:
self.get_hdf5_info()

def wrapper(misc, pseudotime, n_TFs=20):
df = self._calculate_negative_ps_p_value(misc=misc, pseudotime=pseudotime)


print(f"Top {n_TFs} in {misc}")
display(HTML(df.iloc[:min(n_TFs, df.shape[0])].to_html()))

interactive_table = interactive(wrapper,
#{'manual': True},
misc=self.hdf5_info["misc_list"],
pseudotime="0,1,2,3,4,5,6,7,8,9",
n_TFs=(5, 50, 1))

return interactive_table

def _update_inner_product_df(self, n_bins=10, verbose=True):
self.del_attrs()

gene_misc_lists = self.hdf5_info["gene_misc_lists"]

li = []
if verbose:
loop = tqdm(gene_misc_lists)
else:
loop = gene_misc_lists

self.del_attrs()

for gene, misc in loop:
self.load_hdf5(gene=gene, misc=misc)
self.calculate_inner_product()
self.calculate_digitized_ip(n_bins=n_bins)
self.dump_hdf5(gene=gene, misc=misc)
# Clear memory
self.del_attrs()


def _calculate_negative_ps_p_value(self, misc, pseudotime="0,1,2,3,4,5,6,7,8,9", verbose=True):

"""
8/21/2021. This function is under development
Get p-values for negative PS sum score by comparing nPS to randomized GRN nPS.
"""

self.del_attrs()

gene_lists = self.hdf5_info["gene_list"]

p_list = []
ps_sums = []
ps_sum_randoms = []
if verbose:
loop = tqdm(gene_lists)
else:
loop = gene_lists

for gene in loop:

self.load_hdf5(gene=gene, misc=misc, specify_attributes=["inner_product_df"])
if "score_randomized_GRN" not in self.inner_product_df.columns:
raise ValueError("please update inner_product_df first")
p, ps_sum, ps_sum_random = self.get_negative_PS_p_value(pseudotime=pseudotime, return_ps_sum=True, plot=False)
p_list.append(p)
ps_sum_randoms.append(ps_sum_random)
ps_sums.append(ps_sum)

# Clear memory
self.del_attrs()

# p-value correction
p_corrected = np.clip(np.array(p_list)*len(gene_lists), 0, 1)

result = pd.DataFrame({"gene": gene_lists, "ps_sum":ps_sums, "ps_sum_random": ps_sum_randoms,
"p": p_list, "p_adj": p_corrected})

result = result.sort_values("ps_sum", ascending=False).reset_index(drop=True)

return result


def estimate_scale_for_visualization(self, return_result=True):

Expand Down
Binary file modified docs/celloracle.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/tutorials/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Prerequisites

- This tutorial assumes that you have some Python programming experience. In particular, we assume you are familiar with Python data science libraries: jupyter, pandas, and matplotlib.

- Also, this tutorial assume that you are familiar with basic scRNA-seq data analysis. In particular, we assume you have some experience of scRNA-seq analysis using `Scanpy and Anndata <https://scanpy.readthedocs.io/en/stable/>`_ , which is a python toolkit for single-cell analysis. You can use scRNA-seq data processed with `Seurat <https://satijalab.org/seurat/>`_ . But the Seurat data need to be converted into Anndata format in advance to CellOracle analysis. See this `tutorial <https://xxx.com>`_ for detail.
- Also, this tutorial assume that you are familiar with basic scRNA-seq data analysis. In particular, we assume you have some experience of scRNA-seq analysis using `Scanpy and Anndata <https://scanpy.readthedocs.io/en/stable/>`_ , which is a python toolkit for single-cell analysis. You can use scRNA-seq data processed with `Seurat <https://satijalab.org/seurat/>`_ . But the Seurat data need to be converted into Anndata format in advance to CellOracle analysis. See this `page <https://morris-lab.github.io/CellOracle.documentation/modules/index.html#command-line-api>`_ for detail.

- CellOracle provides pre-build base-GRN, and it is not necessary to construct custom base-GRN. But if you want to construct custom base-GRN from your scATAC-seq data, we recommend using `Cicero <https://cole-trapnell-lab.github.io/cicero-release/>`_ . In this case, please get used to Cicero, basic scATAC-seq data analysis, and TF motif analysis in advance to start constructing base-GRN.

Expand Down

0 comments on commit 65809f2

Please sign in to comment.