Skip to content

Commit

Permalink
Merge pull request #76 from aertslab/genome_object
Browse files Browse the repository at this point in the history
Genome class to handle fasta files and chromsizes throughout package
  • Loading branch information
LukasMahieu authored Dec 10, 2024
2 parents d730793 + 01a46e7 commit 955bb1d
Show file tree
Hide file tree
Showing 23 changed files with 1,438 additions and 1,101 deletions.
2 changes: 2 additions & 0 deletions docs/api/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ Downloading of use case datasets which ar explored in the example analyses.
get_dataset
get_motif_db
Genome
register_genome
```
22 changes: 13 additions & 9 deletions docs/references.bib
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
@article{hu2023single,
title={Single-cell multi-scale footprinting reveals the modular organization of DNA regulatory elements},
author={Hu, Yan and Ma, Sai and Kartha, Vinay K and Duarte, Fabiana M and Horlbeck, Max and Zhang, Ruochi and Shrestha, Rojesh and Labade, Ajay and Kletzien, Heidi and Meliki, Alia and others},
journal={bioRxiv},
pages={2023--03},
year={2023},
publisher={Cold Spring Harbor Laboratory}
}

@misc{shrikumar2021tfmodisco,
author = {Av Shrikumar and Katherine Tian and annashcherbina and Žiga Avsec and Amr and Charles McAnany and pgreenside and Surag Nair and mhfzsharmin and Stefan Holderbach and Rosa Ma},
title = {{kundajelab/tfmodisco: Nicer API for density-adaptive hit scoring (v0.5.14.1)}},
Expand All @@ -26,3 +17,16 @@ @article{Virshup_2023
title = {The scverse project provides a computational ecosystem for single-cell omics data analysis},
journal = {Nature Biotechnology}
}

@article{Zhang_2024,
doi = {10.1038/s41592-023-02139-9},
url = {https://doi.org/10.1038/s41592-023-02139-9},
year = 2024,
month = {mar},
publisher = {Springer Nature},
author = {Kai Zhang and Nicholas R. Zemke and Evan J. Armand and others},
title = {A fast, scalable and versatile tool for analysis of single-cell omics data},
journal = {Nature Methods},
volume = {21},
pages = {217--227}
}
47 changes: 32 additions & 15 deletions docs/tutorials/enhancer_code_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand All @@ -54,9 +54,12 @@
"adata = anndata.read_h5ad(\"mouse_biccn_data_filtered.h5ad\")\n",
"\n",
"genome_file = \"/home/VIB.LOCAL/niklas.kempynck/nkemp/software/dev_DeepPeak/DeepPeak/data/raw_mm/genome.fa\"\n",
"\n",
"genome = crested.Genome(genome_file)\n",
"\n",
"datamodule = crested.tl.data.AnnDataModule(\n",
" adata,\n",
" genome_file=genome_file,\n",
" genome,\n",
")"
]
},
Expand Down Expand Up @@ -315,7 +318,7 @@
],
"source": [
"%matplotlib inline\n",
"top_k=1000\n",
"top_k = 1000\n",
"crested.pl.patterns.modisco_results(\n",
" classes=[\"Astro\", \"L5ET\", \"Vip\", \"Oligo\"],\n",
" contribution=\"positive\",\n",
Expand Down Expand Up @@ -691,7 +694,10 @@
" verbose=True, # Useful for doing sanity checks on matching patterns\n",
")\n",
"pattern_matrix = crested.tl.modisco.create_pattern_matrix(\n",
" classes=list(adata.obs_names), all_patterns=all_patterns, normalize=False, pattern_parameter='seqlet_count_log'\n",
" classes=list(adata.obs_names),\n",
" all_patterns=all_patterns,\n",
" normalize=False,\n",
" pattern_parameter=\"seqlet_count_log\",\n",
")\n",
"pattern_matrix.shape"
]
Expand Down Expand Up @@ -722,13 +728,21 @@
],
"source": [
"import matplotlib\n",
"\n",
"%matplotlib inline\n",
"matplotlib.rcParams['pdf.fonttype'] = 42\n",
"matplotlib.rcParams['ps.fonttype'] = 42\n",
"matplotlib.rcParams[\"pdf.fonttype\"] = 42\n",
"matplotlib.rcParams[\"ps.fonttype\"] = 42\n",
"\n",
"pat_seqs = crested.tl.modisco.generate_nucleotide_sequences(all_patterns)\n",
"crested.pl.patterns.clustermap(\n",
" pattern_matrix, list(adata.obs_names), figsize=(16, 4.2), pat_seqs=pat_seqs, grid=True, fig_path='paperfigs/motif_clustering.pdf', dendrogram_ratio=(0.03,0.15), importance_threshold=4.5\n",
" pattern_matrix,\n",
" list(adata.obs_names),\n",
" figsize=(16, 4.2),\n",
" pat_seqs=pat_seqs,\n",
" grid=True,\n",
" fig_path=\"paperfigs/motif_clustering.pdf\",\n",
" dendrogram_ratio=(0.03, 0.15),\n",
" importance_threshold=4.5,\n",
")"
]
},
Expand Down Expand Up @@ -791,12 +805,12 @@
"crested.pl.patterns.clustermap(\n",
" pattern_matrix,\n",
" classes=list(adata.obs_names),\n",
" subset=['L2_3IT', 'L5ET', 'L5IT', 'L5_6NP', 'L6CT', 'L6IT','L6b'],\n",
" subset=[\"L2_3IT\", \"L5ET\", \"L5IT\", \"L5_6NP\", \"L6CT\", \"L6IT\", \"L6b\"],\n",
" figsize=(10, 2),\n",
" pat_seqs=pat_seqs,\n",
" grid=True,\n",
" dy=0.0025,\n",
" importance_threshold=4.5\n",
" importance_threshold=4.5,\n",
")"
]
},
Expand Down Expand Up @@ -1022,6 +1036,7 @@
],
"source": [
"import crested\n",
"\n",
"file_path = \"/home/VIB.LOCAL/niklas.kempynck/nkemp/mouse/biccn/Mouse_rna.h5ad\" # Locate h5 file containing scRNAseq data\n",
"cell_type_column = \"subclass_Bakken_2022\"\n",
"mean_expression_df = crested.tl.modisco.calculate_mean_expression_per_cell_type(\n",
Expand All @@ -1046,7 +1061,9 @@
}
],
"source": [
"crested.pl.patterns.tf_expression_per_cell_type(mean_expression_df, ['Nfia', 'Spi1', 'Mef2c'])"
"crested.pl.patterns.tf_expression_per_cell_type(\n",
" mean_expression_df, [\"Nfia\", \"Spi1\", \"Mef2c\"]\n",
")"
]
},
{
Expand Down Expand Up @@ -1486,7 +1503,7 @@
" normalize_gex=True,\n",
" min_tf_gex=0.95,\n",
" importance_threshold=5,\n",
" pattern_parameter='seqlet_count_log',\n",
" pattern_parameter=\"seqlet_count_log\",\n",
" filter_correlation=True,\n",
" verbose=True,\n",
" zscore_threshold=1,\n",
Expand Down Expand Up @@ -1528,13 +1545,13 @@
"source": [
"crested.pl.patterns.clustermap_tf_motif(\n",
" tf_ct_matrix,\n",
" heatmap_dim='contrib',\n",
" dot_dim='gex',\n",
" heatmap_dim=\"contrib\",\n",
" dot_dim=\"gex\",\n",
" class_labels=classes,\n",
" pattern_labels=tf_pattern_annots,\n",
" fig_size=(35,6),\n",
" fig_size=(35, 6),\n",
" cluster_rows=True,\n",
" cluster_columns=False\n",
" cluster_columns=False,\n",
")"
]
},
Expand Down
Loading

0 comments on commit 955bb1d

Please sign in to comment.