Skip to content

Commit

Permalink
interactive visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
KenjiKamimoto-ac committed Dec 1, 2020
1 parent 7066dfe commit 1e89faf
Show file tree
Hide file tree
Showing 8 changed files with 1,418 additions and 40 deletions.
325 changes: 316 additions & 9 deletions celloracle/oracle_utility/development_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,20 @@ def subset_oracle_for_development_analysiis(oracle_object, cluster_column_name,
def get_stat_for_inner_product(oracle_object, n_bins=10):

# Prepare data
inner_product_stats = pd.DataFrame({"score": oracle_object.inner_product,
"pseudotime": oracle_object.new_pseudotime})
inner_product_stats = pd.DataFrame({"score": oracle_object.inner_product[~oracle_object.mass_filter],
"pseudotime": oracle_object.new_pseudotime[~oracle_object.mass_filter]})


bins = _get_bins(inner_product_stats.pseudotime, n_bins)
inner_product_stats["pseudotime_id"] = np.digitize(inner_product_stats.pseudotime, bins) - 1

try:
inner_product_stats["stage"] = oracle_object.stage_grid[~oracle_object.mass_filter]

except:
print("stage_grid not calculated")


# stat test
ps = []
for i in np.sort(inner_product_stats["pseudotime_id"].unique()):
Expand Down Expand Up @@ -88,11 +96,17 @@ def extract_data_from_oracle(self, oracle_object, min_mass):
setattr(self.oracle_dev, i, getattr(oracle_object, i))

# 1.2. mass_filter for grid matrix
self.oracle_dev.mass_filter = mass_filter = oracle_object.total_p_mass < min_mass
self.oracle_dev.mass_filter = (oracle_object.total_p_mass < min_mass)

## 2. Extract pseudotime data
self.oracle_dev.pseudotime = oracle_object.adata.obs["pseudotime"].values

try:
self.oracle_dev.stage = np.array(oracle_object.adata.obs["Stage"].values)
except:
print("Stage not in data")




def transfer_data_into_grid(self, args={}):
Expand All @@ -105,15 +119,29 @@ def transfer_data_into_grid(self, args={}):
grid=self.oracle_dev.flow_grid,
value=self.oracle_dev.pseudotime,
**args)
try:
self.oracle_dev.stage_grid = scatter_value_to_grid_value(embedding=self.oracle_dev.embedding,
grid=self.oracle_dev.flow_grid,
value=self.oracle_dev.stage,
**{"method": "knn_class",
"n_knn": 30})
except:
print("Stage not in data")

def calculate_gradient_and_inner_product(self, scale_factor=60):
def calculate_gradient_and_inner_product(self, scale_factor="l2_norm_mean", normalization=None):

# Gradient calculation
new_pseudotime = self.oracle_dev.new_pseudotime
n = int(np.sqrt(new_pseudotime.shape[0]))
new_pseudotime_as_grid = new_pseudotime.reshape(n, n)
dy, dx = np.gradient(new_pseudotime_as_grid)
self.oracle_dev.gradient = np.stack([dx.flatten(), dy.flatten()], axis=1) * scale_factor
gradient = get_gradient(value_on_grid=self.oracle_dev.new_pseudotime.copy())

if normalization == "sqrt":
gradient = normalize_gradient(gradient, method="sqrt")

if scale_factor == "l2_norm_mean":
# divide gradient by the mean of l2 norm.
l2_norm = np.linalg.norm(gradient, ord=2, axis=1)
scale_factor = 1 / l2_norm.mean()

self.oracle_dev.gradient = gradient * scale_factor

# Calculate inner product between the pseudotime-gradient and the perturb-gradient
self.oracle_dev.inner_product = np.array([np.dot(i, j) for i, j in zip(self.oracle_dev.flow, self.oracle_dev.gradient)])
Expand All @@ -125,3 +153,282 @@ def calculate_stats(self, n_bins=10):

self.oracle_dev.inner_product_stats = inner_product_stats
self.oracle_dev.inner_product_stats_grouped = inner_product_stats_grouped



class Gradient_based_trajecory():
def __init__(self, adata=None, obsm_key=None, pseudotime_key="pseudotime", cluster_column_name=None, cluster=None, gt=None):

if adata is not None:
self.load_adata(adata=adata, obsm_key=obsm_key,
pseudotime_key=pseudotime_key,cluster_column_name=cluster_column_name,
cluster=cluster)
elif gt is not None:
self.embedding = gt.embedding_whole.copy()
self.embedding_whole = gt.embedding_whole.copy()
self.mass_filter = gt.mass_filter_whole.copy()
self.mass_filter_whole = gt.mass_filter_whole.copy()
self.gridpoints_coordinates = gt.gridpoints_coordinates.copy()
self.pseudotime = gt.pseudotime_whole.copy()

def load_adata(self, adata, obsm_key, pseudotime_key, cluster_column_name=None, cluster=None):

self.embedding = adata.obsm[obsm_key]
self.pseudotime = adata.obs[pseudotime_key].values
self.embedding_whole = self.embedding.copy()
self.pseudotime_whole = self.pseudotime.copy()

if (cluster_column_name is not None) & (cluster is not None):
cells_ix = np.where(adata.obs[cluster_column_name] == cluster)[0]
self.embedding = self.embedding[cells_ix, :]
self.pseudotime = self.pseudotime[cells_ix]





def calculate_mass_filter(self, min_mass=0.01, smooth=0.8, steps=(40, 40), n_neighbors=200, n_jobs=4):

x_min, y_min = self.embedding_whole.min(axis=0)
x_max, y_max = self.embedding_whole.max(axis=0)
xylim = ((x_min, x_max), (y_min, y_max))

total_p_mass, gridpoints_coordinates = calculate_p_mass(self.embedding, smooth=smooth, steps=steps,
n_neighbors=n_neighbors, n_jobs=n_jobs, xylim=xylim)

total_p_mass_whole, _ = calculate_p_mass(self.embedding_whole, smooth=smooth, steps=steps,
n_neighbors=n_neighbors, n_jobs=n_jobs, xylim=xylim)

self.total_p_mass = total_p_mass
self.mass_filter = (total_p_mass < min_mass)
self.mass_filter_whole = (total_p_mass_whole < min_mass)
self.gridpoints_coordinates = gridpoints_coordinates

def transfer_data_into_grid(self, args={}):

if not args:
args = {"method": "knn",
"n_knn": 30}

self.pseudotime_on_grid = scatter_value_to_grid_value(embedding=self.embedding,
grid=self.gridpoints_coordinates,
value=self.pseudotime,
**args)
def calculate_gradient(self, scale_factor=60, normalization=None):

# Gradient calculation
gradient = get_gradient(value_on_grid=self.pseudotime_on_grid.copy())

if normalization == "sqrt":
gradient = normalize_gradient(gradient, method="sqrt")

if scale_factor == "l2_norm_mean":
# divide gradient by the mean of l2 norm.
l2_norm = np.linalg.norm(gradient, ord=2, axis=1)
scale_factor = 1 / l2_norm.mean()

self.gradient = gradient * scale_factor

def visualize_dev_flow(self, scale_for_pseudotime=30, s=10, s_grid=30):
visualize_dev_flow(self, scale_for_pseudotime=scale_for_pseudotime, s=s, s_grid=s_grid)

def aggregate_GT_object(list_GT_object, base_gt=None):

pseudotime_stack = [i.pseudotime_on_grid for i in list_GT_object]
gradient_stack = [i.gradient for i in list_GT_object]
mass_filter_stack = [i.mass_filter for i in list_GT_object]

new_pseudotime, new_gradient, new_mass_filter = _aggregate_gradients(pseudotime_stack=pseudotime_stack,
gradient_stack=gradient_stack,
mass_filter_stack=mass_filter_stack)

if base_gt is None:
gt = Gradient_based_trajecory(gt=list_GT_object[0])
gt.pseudotime_on_grid = new_pseudotime
gt.gradient = new_gradient

else:
gt = base_gt
gt.pseudotime_on_grid[~new_mass_filter] = new_pseudotime[~new_mass_filter]
gt.gradient[~new_mass_filter, :] = new_gradient[~new_mass_filter, :]

return gt

def _aggregate_gradients(pseudotime_stack, gradient_stack, mass_filter_stack):

new_pseudotime = np.zeros_like(pseudotime_stack[0])
new_pseudotime_count = np.zeros_like(pseudotime_stack[0])
new_gradient = np.zeros_like(gradient_stack[0])
gradient_count = np.zeros_like(gradient_stack[0])
for fil, pt, gra in zip(mass_filter_stack, pseudotime_stack, gradient_stack):
new_pseudotime[~fil] += pt[~fil]
new_pseudotime_count[~fil] +=1
new_gradient[~fil, :] += gra[~fil, :]
gradient_count[~fil, :] += 1

new_pseudotime[new_pseudotime_count != 0] /= new_pseudotime_count[new_pseudotime_count != 0]
new_gradient[gradient_count != 0] /= gradient_count[gradient_count != 0]
new_mass_filter = (gradient_count.sum(axis=1) == 0)

return new_pseudotime, new_gradient, new_mass_filter


def normalize_gradient(gradient, method="sqrt"):
"""
Normalize length of 2D vector
"""

if method == "sqrt":

size = np.sqrt(np.power(gradient, 2).sum(axis=1))
size_sq = np.sqrt(size)
size_sq[size_sq == 0] = 1
factor = np.repeat(np.expand_dims(size_sq, axis=1), 2, axis=1)

return gradient / factor

from scipy.stats import norm as normal
from sklearn.neighbors import NearestNeighbors


def calculate_p_mass(embedding, smooth=0.5, steps=(40, 40),
n_neighbors=100, n_jobs=4, xylim=((None, None), (None, None))):
"""Calculate the velocity using a points on a regular grid and a gaussian kernel
Note: the function should work also for n-dimensional grid
Arguments
---------
embedding:
smooth: float, smooth=0.5
Higher value correspond to taking in consideration further points
the standard deviation of the gaussian kernel is smooth * stepsize
steps: tuple, default
the number of steps in the grid for each axis
n_neighbors:
number of neighbors to use in the calculation, bigger number should not change too much the results..
...as soon as smooth is small
Higher value correspond to slower execution time
n_jobs:
number of processes for parallel computing
xymin:
((xmin, xmax), (ymin, ymax))
Returns
-------
total_p_mass: np.ndarray
density at each point of the grid
"""

# Prepare the grid
grs = []
for dim_i in range(embedding.shape[1]):
m, M = np.min(embedding[:, dim_i]), np.max(embedding[:, dim_i])

if xylim[dim_i][0] is not None:
m = xylim[dim_i][0]
if xylim[dim_i][1] is not None:
M = xylim[dim_i][1]

m = m - 0.025 * np.abs(M - m)
M = M + 0.025 * np.abs(M - m)
gr = np.linspace(m, M, steps[dim_i])
grs.append(gr)

meshes_tuple = np.meshgrid(*grs)
gridpoints_coordinates = np.vstack([i.flat for i in meshes_tuple]).T

nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=n_jobs)
nn.fit(embedding)
dists, neighs = nn.kneighbors(gridpoints_coordinates)

std = np.mean([(g[1] - g[0]) for g in grs])
# isotropic gaussian kernel
gaussian_w = normal.pdf(loc=0, scale=smooth * std, x=dists)
total_p_mass = gaussian_w.sum(1)
gridpoints_coordinates

return total_p_mass, gridpoints_coordinates

def get_gradient(value_on_grid):
# Gradient calculation
n = int(np.sqrt(value_on_grid.shape[0]))
value_on_grid_as_matrix = value_on_grid.reshape(n, n)
dy, dx = np.gradient(value_on_grid_as_matrix)
gradient = np.stack([dx.flatten(), dy.flatten()], axis=1)

return gradient


def visualize_dev_flow(self, scale_for_pseudotime=30, s=10, s_grid=30):

embedding_whole = self.embedding_whole
embedding_of_interest= self.embedding
mass_filter = self.mass_filter
mass_filter_whole = self.mass_filter_whole
gridpoints_coordinates=self.gridpoints_coordinates

pseudotime_raw = self.pseudotime
pseudotime_on_grid=self.pseudotime_on_grid

gradient_pseudotime=self.gradient


fig, ax = plt.subplots(1, 5, figsize=[25,5])

##
ax_ = ax[0]
ax_.scatter(embedding_whole[:, 0], embedding_whole[:, 1], c="lightgray", s=s)
ax_.scatter(embedding_of_interest[:, 0], embedding_of_interest[:, 1], c=pseudotime_raw, cmap="rainbow", s=s)
ax_.set_title("Pseudotime")
ax_.axis("off")

####
ax_ = ax[1]
ax_.scatter(gridpoints_coordinates[mass_filter, 0], gridpoints_coordinates[mass_filter, 1], s=0)
ax_.scatter(gridpoints_coordinates[~mass_filter_whole, 0], gridpoints_coordinates[~mass_filter_whole, 1],
c="lightgray", s=s_grid)
ax_.scatter(gridpoints_coordinates[~mass_filter, 0], gridpoints_coordinates[~mass_filter, 1],
c=pseudotime_on_grid[~mass_filter], cmap="rainbow", s=s_grid)
ax_.set_title("Pseudotime on grid")
ax_.axis("off")



###
ax_ = ax[2]
#ax_.scatter(gridpoints_coordinates[mass_filter, 0], gridpoints_coordinates[mass_filter, 1], s=0)
ax_.scatter(gridpoints_coordinates[mass_filter, 0], gridpoints_coordinates[mass_filter, 1], s=0)
ax_.scatter(gridpoints_coordinates[~mass_filter_whole, 0], gridpoints_coordinates[~mass_filter_whole, 1],
c="lightgray", s=s_grid)
ax_.scatter(gridpoints_coordinates[~mass_filter, 0], gridpoints_coordinates[~mass_filter, 1],
c=pseudotime_on_grid[~mass_filter], cmap="rainbow", s=s_grid)

ax_.quiver(gridpoints_coordinates[~mass_filter, 0], gridpoints_coordinates[~mass_filter, 1],
gradient_pseudotime[~mass_filter, 0], gradient_pseudotime[~mass_filter, 1],
scale=scale_for_pseudotime)
ax_.set_title("Gradient of pseudotime \n(=Development flow)")
ax_.axis("off")

###
ax_ = ax[3]
#ax_.scatter(gridpoints_coordinates[mass_filter, 0], gridpoints_coordinates[mass_filter, 1], s=0)
ax_.scatter(embedding_whole[:, 0], embedding_whole[:, 1], c="lightgray", s=s)
ax_.quiver(gridpoints_coordinates[~mass_filter, 0], gridpoints_coordinates[~mass_filter, 1],
gradient_pseudotime[~mass_filter, 0], gradient_pseudotime[~mass_filter, 1],
scale=scale_for_pseudotime)
ax_.set_title("Gradient of pseudotime \n(=Development flow)")
ax_.axis("off")

####
ax_ = ax[4]
ax_.scatter(embedding_whole[:, 0], embedding_whole[:, 1], c="lightgray", s=s)
ax_.scatter(embedding_of_interest[:, 0], embedding_of_interest[:, 1], c=pseudotime_raw, cmap="rainbow", s=s)

ax_.quiver(gridpoints_coordinates[~mass_filter, 0], gridpoints_coordinates[~mass_filter, 1],
gradient_pseudotime[~mass_filter, 0], gradient_pseudotime[~mass_filter, 1],
scale=scale_for_pseudotime)
ax_.set_title("Pseudotime + \nDevelopment flow")
ax_.axis("off")
Loading

0 comments on commit 1e89faf

Please sign in to comment.