Merge pull request #138 from bioimage-io/fix-hpa-link

Fix cell image links in HPA extension
bioimage-io · Jun 5, 2024 · 1066057 · 1066057
2 parents 382fa0b + 28ffead
commit 1066057
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 8 deletions.
diff --git a/bioimageio_chatbot/chatbot_extensions/hpa_extension.py b/bioimageio_chatbot/chatbot_extensions/hpa_extension.py
@@ -62,21 +62,23 @@ async def read_protein_info(self,
 
 
     async def get_cell_image(self,
-        gen: str = Field(..., description="Gene name of the protein."),
+        gene: str = Field(..., description="Gene name of the protein."),
         ensembl: str = Field(..., description="Ensembl ID of the protein."),
         section: str = Field("subcellular", description="Section of the Human Protein Atlas to search for the protein. Valid options are 'subcellular', 'tissue',")
         ) -> List[str]:
         """Retrieve a list of cell image links from the Human Protein Atlas, where a specific protein is tagged in the green channel. 
-        The results should be rendered as a horizatal table of images and create link (format: `[![](http://..._thumb.jpg)](http://....jpg)`) to the full-size image without the '_thumb' suffix."""
-        link_name = f"{ensembl}-{gen}"
+        ALWAYS render the result thumbnail images as a horizatal table and create link (format: `[![](http://..._thumb.jpg)](http://....jpg)`) to the full-size image without the '_thumb' suffix."""
+        link_name = f"{ensembl}-{gene}"
         http_link = f"https://www.proteinatlas.org/{link_name}/{section}"
         # read the source code of the page
         response = requests.get(http_link)
+        if '<p>Not available</p>' in response.text:
+            return 'No cell image available.'
         # Search for image links, capturing the part after 'src="'
         pattern = r'src="(?P<url>//images\.proteinatlas\.org/.*?_red_green_thumb\.jpg)"'
         image_links = re.findall(pattern, response.text)
-        # replace the 'red_green' with 'blue_red_green_yellow' 
-        image_links = [link.replace('red_green', 'blue_red_green_yellow') for link in image_links]
+        # replace the 'red_green' with 'blue_red_green_yellow' if 'blue' not in the link, otherwise replace 'blue_red_green' with 'blue_red_green_yellow'
+        image_links = [link.replace('red_green', 'blue_red_green_yellow') if 'blue' not in link else link.replace('blue_red_green', 'blue_red_green_yellow') for link in image_links]
         # Remove '_thumb' from each link and print or process them
         final_image_links = []
         for link in image_links:
@@ -93,7 +95,7 @@ def get_extension():
     return ChatbotExtension(
         id="hpa",
         name="Human Protein Atlas",
-        description="Search the Human Protein Atlas to find human protein-related information, including gene expressions, functions, locations, disease associations, and cell images etc.",
+        description="Search the Human Protein Atlas to find human protein-related information, including gene expressions, functions, locations, disease associations, and cell images etc. When searching for cell images, always search for the gene name and Ensembl ID of the protein.",
         tools=dict(
             search=search_tool,
             read=read_tool,
@@ -111,4 +113,4 @@ async def main():
         # test only one image
         # print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon."))
     # Run the async function
-    asyncio.run(main())
+    asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 
 [project]
 name = "bioimageio-chatbot"
-version = "0.2.4"
+version = "0.2.5"
 readme = "README.md"
 description = "Your Personal Assistant in Computational BioImaging."
 dependencies = [