diff --git a/g4x_io/README.md b/g4x_io/README.md new file mode 100644 index 0000000..c3e4e3c --- /dev/null +++ b/g4x_io/README.md @@ -0,0 +1,24 @@ +# G4X Sample Data + +Download and convert a sample dataset from the Singular Genomics G4X Platform to SpatialData. The data will include: + +- images: + - `h_and_e`: H&E image in RGB + - `nuclear`: nuclear stain + - `eosin`: eosin stain + - `protein`: multi-channel stack of 20 protein images +- labels: + - `nuclei`: Nuclei segmentation instance labels + - `nuclei_exp`: Cell segmentation instance labels +- points: + - `transcripts`: Single-molecule transcript locations, gene identities, and metadata +- shapes: + - `nuclei_shapes`: Nuclei segmentation polygons + - `nuclei_exp`: Cell segmentation polygons +- table: + - `table`: the cell by gene expression count matrix with cell metadata + +### Download +1. Make a folder `g4x_io` and cd into it. +1. Download sample dataset with `download.py` +3. Convert the data into the SpatialData format with `to_zarr.py` diff --git a/g4x_io/download.py b/g4x_io/download.py new file mode 100644 index 0000000..e249770 --- /dev/null +++ b/g4x_io/download.py @@ -0,0 +1,25 @@ +## +import os +from pathlib import Path +import subprocess + +# Singular Genomics sample data + +urls = [ + "https://singular-public-repo.s3.us-west-1.amazonaws.com/g4x_tutorial_dataset.zip" +] + +# download the data +for url in urls: + filename = Path(url).name + os.makedirs("data", exist_ok=True) + command = f"curl -o {'data/' + filename} {url}" + subprocess.run(command, shell=True, check=True) + +## +# unzip the data +subprocess.run( + "unzip -o data/g4x_tutorial_dataset.zip -d data/", + shell=True, + check=True, +) diff --git a/g4x_io/to_zarr.py b/g4x_io/to_zarr.py new file mode 100644 index 0000000..265e104 --- /dev/null +++ b/g4x_io/to_zarr.py @@ -0,0 +1,36 @@ +## +from spatialdata_io import g4x +import spatialdata as sd + +## +from pathlib import Path +import shutil + +## +path = Path().resolve() +# luca's workaround for pycharm +if not str(path).endswith("g4x_io"): + path /= "g4x_io" + assert path.exists() + +path_read = path / "data" +path_write = path / "data.zarr" + +## +print("parsing the data... ", end="") +sdata = g4x( + input_path=str(path_read), + output_path=str(path_write), + include_he=True, + include_segmentation=True, + include_protein=True, + include_transcripts=True, + include_tables=True, + mode="append", +) +print("done") + +## +sdata = sd.SpatialData.read("./data.zarr/") +print(sdata) +##