Skip to content

Commit

Permalink
Merge pull request #13 from DendrouLab/CRG_compat_with_panpipes_dev
Browse files Browse the repository at this point in the history
updating with sc_pipeline_muon_dev
  • Loading branch information
crichgriffin authored Mar 9, 2023
2 parents 0397275 + b4fe99e commit 49f7393
Show file tree
Hide file tree
Showing 24 changed files with 630 additions and 338 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Available pipelines:
See [installation instrcutions here](https://github.com/DendrouLab/panpipes/blob/main/docs/install.md)
Review this issue before installatiion: https://github.com/DendrouLab/panpipes/issues/11

<!-- Oxford BMRC Rescomp users find additional advice in [docs/installation_rescomp](https://github.com/DendrouLab/panpipes/blob/main/docs/installation_rescomp.md) -->
Oxford BMRC Rescomp users find additional advice in [docs/installation_rescomp](https://github.com/DendrouLab/panpipes/blob/main/docs/installation_rescomp.md)

# General principles for running pipelines

Expand Down
2 changes: 1 addition & 1 deletion docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ conda activate pipeline_env
we include an environment.yml for a conda environment tested on all the pipelines packaged in this version of Panpipes.

##### Step 2 Download and install this repo

If you have not already set up SSH keys for github first follow these [instructions](https://github.com/DendrouLab/panpipes/docs/set_up_ssh_keys_for_github.md):

```
git clone https://github.com/DendrouLab/panpipes
Expand Down
25 changes: 25 additions & 0 deletions docs/set_up_ssh_keys_for_github.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

## Set up SSH key for GitHub
For more advice: https://docs.github.com/en/authentication/connecting-to-github-with-ssh/about-ssh

After checking for existing keys, if you receive error that ~/.ssh doesn't exist then you don't have one. If there already is one (ie. id_rsa.pub, id_ed25519.pub) then you can either connect it to GitHub or generate new one.
```
ls -al ~/.ssh #check for existing keys
ssh-keygen -t ed25519 -C "[email protected]" #use your GitHub email address
#Enter a file in which to save the key (/c/Users/you/.ssh/id_algorithm):[Press enter]
#Enter passphrase (empty for no passphrase): [Type a passphrase]
eval "$(ssh-agent -s)" #start ssh-agent
ssh-add ~/.ssh/id_ed25519 #add your SSH private key to ssh-agent
clip < ~/.ssh/id_ed25519.pub #copy SSH public key
```
After copying your SSH public key, go to GitHub --> Settings --> SSH and GPG keys (under Access) --> Add new public SSH key

To test connection
```
ssh -T [email protected]
```
A successful connection should result in
> Hi username! You've successfully authenticated, but GitHub does not provide shell access.
Activate the environment
```
60 changes: 52 additions & 8 deletions panpipes/funcs/scmethods.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from scipy.sparse import issparse
from scanpy.get import obs_df as get_obs_df
from scanpy.pp import normalize_total
import scanpy as sc
import warnings
import logging
from typing import Optional, Literal
Expand Down Expand Up @@ -38,9 +39,53 @@ def exp_mean_dense(x):
# convert out of compressed sparse matrix
return np.log((np.sum(np.exp(x)-1)/x.shape[1]) + 1)



def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfcdiff=0.25, use_dense=False):
def find_all_markers_pseudo_seurat(
adata,
groups,
groupby,
layer=None,
method=None,
n_genes=float("inf"),
corr_method="bonferroni",
arg_minpct=0.1,
arg_mindiffpct=-float("inf"),
arg_logfcdiff=0.25):
# add replace X with layer
if layer is not None:
adata.X = adata.layers[layer]
# need to check is the assay layer is dense or not
assay_is_sparse = issparse(adata.X)
use_dense = assay_is_sparse==False
if groups == 'all':
groups = adata.obs[groupby].unique().tolist()
markers_dict = {}
filter_dict = {}
for cv in groups:
# \ set up idenst as cv ==1 and everything else = 0
adata.obs['idents'] = ['1' if x == cv else '0' for x in adata.obs[groupby]]
filter_dict[cv] = pseudo_seurat(adata, use_dense=use_dense,arg_minpct=arg_minpct,
arg_mindiffpct=arg_mindiffpct,
arg_logfcdiff=arg_logfcdiff )
logging.info("number of genes remaining after filtering: %i\n" % filter_dict[cv]['background'].sum())
adata_rg = adata[:, filter_dict[cv]['background'].tolist()].copy()
sc.tl.rank_genes_groups(adata_rg, layer=layer,
groupby="idents", groups=["1"],
reference="0",
method=method,
n_genes=float("inf"),
corr_method="bonferroni")
markers_dict[cv] = sc.get.rank_genes_groups_df(adata_rg, group="1")
# remove adata from mem
adata_rg = None
markers = pd.concat(markers_dict.values(), keys=markers_dict.keys())
filter_stats = pd.concat(filter_dict.values(), keys=filter_dict.keys())
return markers, filter_stats

def pseudo_seurat(adata,
arg_minpct=0.1,
arg_mindiffpct=-float("inf"),
arg_logfcdiff=0.25,
use_dense=False):
"""
alternative method that"s more like seurat (pseudo seurat if you will)
In that you filter genes before running rank genes
Expand Down Expand Up @@ -79,7 +124,6 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
min_pct = pcts.min(axis=1)
diff_pct = max_pct - min_pct
take_diff_pct = diff_pct > arg_mindiffpct

# remove genes that are not expressed higher than 0.1 in one of the groups
take_min_pct = max_pct > arg_minpct

Expand All @@ -88,7 +132,7 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
# this has the potential to be very slow. Transposeing it speeds it up a bit.
# I need to undertand sparse matrices better to make it work
if use_dense:
print("using dense matrix")
logging.info("using dense matrix")
# extract the counts for cluster cells and calculate exp means on each row
nct = adata.X.T[:, cluster_cells_ind]
cluster_mean = np.apply_along_axis(exp_mean_dense, 1, nct.todense())
Expand All @@ -98,7 +142,7 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
other_mean = np.apply_along_axis(exp_mean_dense, 1, nct.todense())
diff_mean = abs(cluster_mean - other_mean)
else:
print("using sparse matrix")
logging.info("using sparse matrix")
cluster_mean = exp_mean_sparse(adata.X.T[:, cluster_cells_ind])
other_mean = exp_mean_sparse(adata.X.T[:, other_cells_ind])
diff_mean = abs(cluster_mean - other_mean).A1
Expand All @@ -122,7 +166,7 @@ def run_neighbors_method_choice(adata, method, n_neighbors, n_pcs, metric, use_r
# useful if we are dealing with a MuData object but we want to use single rep, e.g.
# calculating neighbors on a totalVI latent rep
if method == "scanpy":
print("Computing neighbors using scanpy")
logging.info("Computing neighbors using scanpy")
from scanpy.pp import neighbors
neighbors(adata,
n_pcs=n_pcs,
Expand All @@ -131,7 +175,7 @@ def run_neighbors_method_choice(adata, method, n_neighbors, n_pcs, metric, use_r
use_rep=use_rep)
elif method == "hnsw":
from scvelo.pp import neighbors
print("Computing neighbors using hnswlib (with scvelo a la pegasus!)")
logging.info("Computing neighbors using hnswlib (with scvelo a la pegasus!)")
# we use the neighbors function from scvelo (thanks!)
# with parameters from pegasus (for a more exact result).
# code snippet from Steve Sansom, via COMBAT project
Expand Down
Loading

0 comments on commit 49f7393

Please sign in to comment.