Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds a way to see available borzoi tracks #16

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion examples/promoter_activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)

response = client.send_request(query)
print(response)
print("Single-query response:", response)


# In this next example we pull the promoter files from a fasta file and send them
Expand All @@ -39,6 +39,8 @@
},
)

print("Now sending 100 requests, by batches of 10")
print("Writing results to promoter_activity.jsonl...")
output_file = Path(__file__).parent / "outputs" / "promoter_activity.jsonl"
for batch_result in client.send_requests_by_batches(queries, batch_size=10):
for query_result in batch_result:
Expand Down
30 changes: 30 additions & 0 deletions ginkgo_ai_client/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from typing import Dict, Optional, Any, List, Literal, Union
from abc import ABC, abstractmethod
from pathlib import Path
from functools import lru_cache
import json
import yaml
import tempfile

import pydantic
import requests
import pandas

from ginkgo_ai_client.utils import (
fasta_sequence_iterator,
Expand Down Expand Up @@ -405,6 +407,34 @@ def list_with_promoter_from_fasta(
)
return list(iterator)

@classmethod
@lru_cache(maxsize=1)
def _get_full_tissue_dataframe(cls):
file_id = "13eQTxjqW3KMCzbaRYUSbZiyzXCaNYTIg"
url = f"https://drive.google.com/uc?export=download&id={file_id}"
tracks = pandas.read_csv(url)
return tracks

@classmethod
def get_tissue_track_dataframe(
cls, tissue: str = None, assay: str = None
) -> pandas.DataFrame:
"""Return a pandas DataFrame with the tissues and their corresponding tracks.

Parameters
----------
tissue: str, optional
If provided, only rows with the tissue name will be returned.
assay: str, optional
If provided, only rows with the assay name will be returned.
"""
df = cls._get_full_tissue_dataframe()
if tissue is not None:
df = df[df["sample"].str.contains(tissue, case=False)]
if assay is not None:
df = df[df.assay.str.contains(assay)]
return df


## ---- DIFFUSION QUERIES ---------------------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"requests",
"tqdm",
"pyyaml",
"pandas",
]

[project.optional-dependencies]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ requests==2.32.3
biopython==1.82.0
pytest-xdist==3.6.1
pytest-cov==4.0.0
pandas==2.1.2

sphinx==8.1.3
docutils==0.21.2
Expand Down
5 changes: 5 additions & 0 deletions test/test_query_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def test_promoter_activity_iteration():
assert len(queries) == 50


def test_get_tissue_tracks():
df = PromoterActivityQuery.get_tissue_track_dataframe(tissue="heart", assay="DNASE")
assert len(df) == 22


@pytest.mark.parametrize(
"filename, expected_sequences",
[
Expand Down
Loading