Skip to content

Commit

Permalink
Add cxg_immune_cell_atlas dataset to test resources
Browse files Browse the repository at this point in the history
  • Loading branch information
lazappi committed Oct 1, 2024
1 parent bf274d8 commit 1c5907e
Showing 1 changed file with 48 additions and 0 deletions.
48 changes: 48 additions & 0 deletions src/datasets/resource_test_scripts/cxg_immune_cell_atlas.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

DATASET_DIR=resources_test/common


mkdir -p $DATASET_DIR

wget https://raw.githubusercontent.com/theislab/scib/c993ffd9ccc84ae0b1681928722ed21985fb91d1/scib/resources/g2m_genes_tirosh.txt -O $DATASET_DIR/temp_g2m_genes_tirosh_mm.txt
wget https://raw.githubusercontent.com/theislab/scib/c993ffd9ccc84ae0b1681928722ed21985fb91d1/scib/resources/s_genes_tirosh.txt -O $DATASET_DIR/temp_s_genes_tirosh_mm.txt
KEEP_FEATURES=`cat $DATASET_DIR/temp_g2m_genes_tirosh_mm.txt $DATASET_DIR/temp_s_genes_tirosh_mm.txt | paste -sd ":" -`

cat > "/tmp/params.yaml" << HERE
param_list:
- id: cxg_immune_cell_atlas
species: homo_sapiens
census_version: "2023-07-25"
obs_value_filter: "dataset_id == '1b9d8702-5af8-4142-85ed-020eb06ec4f6' and donor_id in ['A29', 'A31', 'A35']"
obs_batch: donor_id
dataset_name: Immune Cell Atlas
dataset_summary: Cross-tissue immune cell analysis reveals tissue-specific features in humans
dataset_description: Despite their crucial role in health and disease, our knowledge of immune cells within human tissues remains limited. We surveyed the immune compartment of 16 tissues from 12 adult donors by single-cell RNA sequencing and VDJ sequencing generating a dataset of ~360,000 cells. To systematically resolve immune cell heterogeneity across tissues, we developed CellTypist, a machine learning tool for rapid and precise cell type annotation. Using this approach, combined with detailed curation, we determined the tissue distribution of finely phenotyped immune cell types, revealing hitherto unappreciated tissue-specific features and clonal architecture of T and B cells. Our multitissue approach lays the foundation for identifying highly resolved immune cell types by leveraging a common reference dataset, tissue-integrated expression analysis, and antigen receptor sequencing.
dataset_url: https://cellxgene.cziscience.com/collections/62ef75e4-cbea-454e-a0ce-998ec40223d3
dataset_reference: dominguez2022crosstissue
dataset_organism: homo_sapiens
normalization_methods: [log_cp10k]
n_obs: 600
n_vars: 1500
output_dataset: '\$id/dataset.h5ad'
output_meta: '\$id/dataset_metadata.yaml'
output_state: '\$id/state.yaml'
output_raw: force_null
output_normalized: force_null
output_pca: force_null
output_hvg: force_null
output_knn: force_null
publish_dir: $DATASET_DIR
do_subsample: true
keep_features: '$KEEP_FEATURES'
HERE

nextflow run . \
-main-script target/nextflow/datasets/workflows/process_cellxgene_census/main.nf \
-c src/wf_utils/labels_ci.config \
-profile docker \
-params-file "/tmp/params.yaml"

rm -r $DATASET_DIR/temp_*

0 comments on commit 1c5907e

Please sign in to comment.