Skip to content

Commit

Permalink
Merge pull request #138 from bioimage-io/fix-hpa-link
Browse files Browse the repository at this point in the history
Fix cell image links in HPA extension
  • Loading branch information
oeway authored Jun 5, 2024
2 parents 382fa0b + 28ffead commit 1066057
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
16 changes: 9 additions & 7 deletions bioimageio_chatbot/chatbot_extensions/hpa_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,23 @@ async def read_protein_info(self,


async def get_cell_image(self,
gen: str = Field(..., description="Gene name of the protein."),
gene: str = Field(..., description="Gene name of the protein."),
ensembl: str = Field(..., description="Ensembl ID of the protein."),
section: str = Field("subcellular", description="Section of the Human Protein Atlas to search for the protein. Valid options are 'subcellular', 'tissue',")
) -> List[str]:
"""Retrieve a list of cell image links from the Human Protein Atlas, where a specific protein is tagged in the green channel.
The results should be rendered as a horizatal table of images and create link (format: `[![](http://..._thumb.jpg)](http://....jpg)`) to the full-size image without the '_thumb' suffix."""
link_name = f"{ensembl}-{gen}"
ALWAYS render the result thumbnail images as a horizatal table and create link (format: `[![](http://..._thumb.jpg)](http://....jpg)`) to the full-size image without the '_thumb' suffix."""
link_name = f"{ensembl}-{gene}"
http_link = f"https://www.proteinatlas.org/{link_name}/{section}"
# read the source code of the page
response = requests.get(http_link)
if '<p>Not available</p>' in response.text:
return 'No cell image available.'
# Search for image links, capturing the part after 'src="'
pattern = r'src="(?P<url>//images\.proteinatlas\.org/.*?_red_green_thumb\.jpg)"'
image_links = re.findall(pattern, response.text)
# replace the 'red_green' with 'blue_red_green_yellow'
image_links = [link.replace('red_green', 'blue_red_green_yellow') for link in image_links]
# replace the 'red_green' with 'blue_red_green_yellow' if 'blue' not in the link, otherwise replace 'blue_red_green' with 'blue_red_green_yellow'
image_links = [link.replace('red_green', 'blue_red_green_yellow') if 'blue' not in link else link.replace('blue_red_green', 'blue_red_green_yellow') for link in image_links]
# Remove '_thumb' from each link and print or process them
final_image_links = []
for link in image_links:
Expand All @@ -93,7 +95,7 @@ def get_extension():
return ChatbotExtension(
id="hpa",
name="Human Protein Atlas",
description="Search the Human Protein Atlas to find human protein-related information, including gene expressions, functions, locations, disease associations, and cell images etc.",
description="Search the Human Protein Atlas to find human protein-related information, including gene expressions, functions, locations, disease associations, and cell images etc. When searching for cell images, always search for the gene name and Ensembl ID of the protein.",
tools=dict(
search=search_tool,
read=read_tool,
Expand All @@ -111,4 +113,4 @@ async def main():
# test only one image
# print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon."))
# Run the async function
asyncio.run(main())
asyncio.run(main())
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]

[project]
name = "bioimageio-chatbot"
version = "0.2.4"
version = "0.2.5"
readme = "README.md"
description = "Your Personal Assistant in Computational BioImaging."
dependencies = [
Expand Down

0 comments on commit 1066057

Please sign in to comment.