Added vision examples for basic and multiple

JakeWalker23 · Sep 13, 2024 · c57f705 · c57f705
1 parent c8c7ee6
commit c57f705
Show file tree

Hide file tree

Showing 10 changed files with 133 additions and 25 deletions.
diff --git a/Image/flip.png b/Image/flip.png
diff --git a/Image/shot.png b/Image/shot.png
diff --git a/README.md b/README.md
@@ -23,5 +23,5 @@ It covers enough to get started with llm prompting. In upcoming additions we wil
 
 ### To run:
 ```
-python {file}.py
+python examples/{file}.py
 ```
diff --git a/stream.py → examples/stream.py b/stream.py → examples/stream.py
diff --git a/examples/vision/vision_basic.py b/examples/vision/vision_basic.py
@@ -0,0 +1,43 @@
+from anthropic import Anthropic
+from dotenv import load_dotenv
+import base64
+import os
+
+load_dotenv()
+
+anthropic_key = os.getenv('ANTHROPIC_API_KEY')
+
+client = Anthropic()
+
+
+with open('./Image/flip.png', "rb") as image_file:
+    binary_data = image_file.read()
+
+    base_64_encoded_data = base64.b64encode(binary_data)
+
+    base64_string = base_64_encoded_data.decode('utf-8')
+
+messages = [
+    {
+        "role": "user", 
+        "content": [
+            {"type" : "image", "source" : {
+                "type" : "base64",
+                "media_type": "image/png",
+                "data" : base64_string
+            }},
+            {
+                "type" : "text",
+                "text" : "Do you know what move she is performing?"
+            }
+        ]
+    },
+]
+
+response = client.messages.create(
+    model="claude-3-5-sonnet-20240620",
+    max_tokens=2048,
+    messages=messages,
+)
+
+print(response.content[0].text)
diff --git a/examples/vision/vision_multiple.py b/examples/vision/vision_multiple.py
@@ -0,0 +1,33 @@
+from helpers.image_source import create_image_message
+
+from anthropic import Anthropic
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+anthropic_key = os.getenv('ANTHROPIC_API_KEY')
+
+client = Anthropic()
+
+messages = [
+    {
+        "role": "user", 
+        "content": 
+        [ 
+            {"type": "text", "text": "Image 1:"},                                           # When working with lesser capable models, labelling images can be very helpful.
+            create_image_message('./Image/flip.png'),
+            {"type": "text", "text": "Image 2:"},
+            create_image_message('./Image/shot.png'),
+            {"type": "text", "text": "You have perfect vision and pay great attention to detail in images. How many people flags are in this picture? Some of the flags may be partially obscured or cut off in the image or may be visible. Please count flags even if you can only see a single aspect. Before providing the answer in <answer> tags, think step by step in <thinking> tags and analyze every part of the image."}
+        ]
+    },
+]
+
+response = client.messages.create(
+    model="claude-3-haiku-20240307", # Different models have different capabilities.
+    max_tokens=2048,
+    messages=messages,
+)
+
+print(response.content[0].text)
diff --git a/helpers/__init__.py b/helpers/__init__.py
diff --git a/helpers/image_source.py b/helpers/image_source.py
@@ -0,0 +1,23 @@
+import base64
+import mimetypes
+
+def create_image_message(image_path):
+    with open(image_path, "rb") as image_file:
+        binary_data = image_file.read()
+
+        base64_encoded_data = base64.b64encode(binary_data)
+
+        base64_string = base64_encoded_data.decode('utf-8')
+
+        mime_type, _ = mimetypes.guess_type(image_path)
+
+        image_block = {
+            "type": "image",
+            "source" : {
+                "type" : "base64",
+                "media_type" : mime_type,
+                "data" : base64_string
+            }
+        }
+
+        return image_block
diff --git a/vision.py b/vision.py
diff --git a/vision_json.py b/vision_json.py
@@ -0,0 +1,33 @@
+from helpers.image_source import create_image_message
+
+from anthropic import Anthropic
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+anthropic_key = os.getenv('ANTHROPIC_API_KEY')
+
+client = Anthropic()
+
+messages = [
+    {
+        "role": "user", 
+        "content": 
+        [ 
+            {"type": "text", "text": "Image 1:"},                                           # When working with lesser capable models, labelling images can be very helpful.
+            create_image_message('./Image/flip.png'),
+            {"type": "text", "text": "Image 2:"},
+            create_image_message('./Image/shot.png'),
+            {"type": "text", "text": "You have perfect vision and pay great attention to detail in images. How many people flags are in this picture? Some of the flags may be partially obscured or cut off in the image or may be visible. Please count flags even if you can only see a single aspect. Before providing the answer in <answer> tags, think step by step in <thinking> tags and analyze every part of the image."}
+        ]
+    },
+]
+
+response = client.messages.create(
+    model="claude-3-haiku-20240307", # Different models have different capabilities.
+    max_tokens=2048,
+    messages=messages,
+)
+
+print(response.content[0].text)