From 454eeaa991a1b504438fd72855aa63ceb3caf6fc Mon Sep 17 00:00:00 2001 From: Sarma Date: Thu, 21 Nov 2024 16:04:03 -0800 Subject: [PATCH] Update and Verify tweetsgenerator Example (#1058) * Add new images * Remove param?? * Fixes * Add 4o instead * Readme --- .../publish_executor_containers.yaml | 4 ++ examples/tweetsgenerator/README.md | 3 + examples/tweetsgenerator/docker-compose.yml | 4 +- examples/tweetsgenerator/workflow.py | 69 +++++++++++++------ 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/.github/workflows/publish_executor_containers.yaml b/.github/workflows/publish_executor_containers.yaml index 387b0ca88..603dad50f 100644 --- a/.github/workflows/publish_executor_containers.yaml +++ b/.github/workflows/publish_executor_containers.yaml @@ -44,3 +44,7 @@ jobs: docker push tensorlake/pdf-structured-extraction-inkwell-example docker push tensorlake/openai-image docker push tensorlake/base-image + docker push tensorlake/base-image:3.10 + docker push tensorlake/base-image:3.11 + docker push tensorlake/openai-image:3.10 + docker push tensorlake/openai-image:3.11 diff --git a/examples/tweetsgenerator/README.md b/examples/tweetsgenerator/README.md index d590dbdff..5eba21a6b 100644 --- a/examples/tweetsgenerator/README.md +++ b/examples/tweetsgenerator/README.md @@ -15,6 +15,9 @@ This project demonstrates how to build a tweet generation and ranking pipeline u - Docker and Docker Compose (for containerized setup) - OpenAI API key +NOTE: The example is setup to work with python 3.10. See `image` param in the workflow and the image name used in +the docker compose file. Please update them as needed while following the instructions below for Docker deployment. + ## Installation and Usage ### Option 1: Local Installation - In Process diff --git a/examples/tweetsgenerator/docker-compose.yml b/examples/tweetsgenerator/docker-compose.yml index 0ba3aa019..74205e051 100644 --- a/examples/tweetsgenerator/docker-compose.yml +++ b/examples/tweetsgenerator/docker-compose.yml @@ -15,7 +15,7 @@ services: - data:/tmp/indexify-blob-storage openai: - image: tensorlake/openai-image:latest + image: tensorlake/openai-image:3.10 environment: - OPENAI_API_KEY=${OPENAI_API_KEY} command: ["indexify-cli", "executor", "--server-addr", "indexify:8900"] @@ -25,7 +25,7 @@ services: - data:/tmp/indexify-blob-storage base-executor: - image: tensorlake/base-image:latest + image: tensorlake/base-image:3.10 command: [ "indexify-cli", diff --git a/examples/tweetsgenerator/workflow.py b/examples/tweetsgenerator/workflow.py index bc0292519..2b2171c5e 100644 --- a/examples/tweetsgenerator/workflow.py +++ b/examples/tweetsgenerator/workflow.py @@ -1,6 +1,7 @@ import logging import os -from typing import Dict, List +from typing import List + from pydantic import BaseModel, Field from indexify import RemoteGraph from indexify.functions_sdk.graph import Graph @@ -17,82 +18,106 @@ class Tweets(BaseModel): class RankedTweets(BaseModel): scores: List[float] = Field(description="List of scores for the tweets. Higher score means better tweet.") + # Define custom image +base_image_3_10 = ( + Image() + .name("tensorlake/base-image") + .base_image("python:3.10-slim-bookworm") + .tag("3.10") +) -base_image = ( +base_image_3_11 = ( Image() .name("tensorlake/base-image") + .base_image("python:3.11-slim-bookworm") + .tag("3.11") +) + +openai_image_3_10 = ( + Image() + .name("tensorlake/openai-image") + .base_image("python:3.10-slim-bookworm") + .tag("3.10") + .run("pip install openai") ) -openai_image = ( + +openai_image_3_11 = ( Image() .name("tensorlake/openai-image") + .base_image("python:3.11-slim-bookworm") + .tag("3.11") .run("pip install openai") ) -@indexify_function(image=openai_image) + +@indexify_function(image=openai_image_3_10) def generate_tweet_topics(subject: str) -> List[str]: """Generate topics for tweets about a given subject.""" import openai from pydantic import BaseModel, Field from typing import List + client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) class Topics(BaseModel): topics: List[str] = Field(default_factory=list) - response = client.chat.completions.create( - model="gpt-4", + response = client.beta.chat.completions.parse( + model="gpt-4o-mini-2024-07-18", messages=[ {"role": "system", "content": "You are a helpful assistant that generates topics for a tweet about a given subject."}, {"role": "user", "content": f"Generate 5 topics for a tweet about {subject}"}, ], - response_model=Topics + response_format=Topics ) - topics = response.choices[0].message.content + topics = response.choices[0].message.parsed return topics.topics -@indexify_function(image=openai_image) +@indexify_function(image=openai_image_3_10) def generate_tweet(topic: str) -> str: """Generate a tweet about a given topic.""" import openai from pydantic import BaseModel, Field + client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) class Tweet(BaseModel): tweet: str = Field(description="a tweet about the given topic") - response = client.chat.completions.create( - model="gpt-4", + response = client.beta.chat.completions.parse( + model="gpt-4o-mini-2024-07-18", messages=[ {"role": "system", "content": "You are a helpful assistant that generates a tweet about a given topic."}, {"role": "user", "content": f"Generate a tweet about {topic}"}, ], - response_model=Tweet + response_format=Tweet ) - tweet = response.choices[0].message.content + tweet = response.choices[0].message.parsed return tweet.tweet -@indexify_function(image=base_image,accumulate=Tweets) +@indexify_function(image=base_image_3_10, accumulate=Tweets) def accumulate_tweets(acc: Tweets, tweet: str) -> Tweets: """Accumulate generated tweets.""" acc.tweets.append(tweet) return acc -@indexify_function(image=openai_image) +@indexify_function(image=openai_image_3_10) def score_and_rank_tweets(tweets: Tweets) -> RankedTweets: """Score and rank the accumulated tweets.""" import openai client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) tweet_contents = "\n".join(tweets.tweets) - response = client.chat.completions.create( - model="gpt-4", + + response = client.beta.chat.completions.parse( + model="gpt-4o-mini-2024-07-18", messages=[ {"role": "system", "content": "You are a helpful assistant that scores and ranks tweets based on their relevance to a given topic."}, {"role": "user", "content": f"Score and rank the following tweets, separated by new lines: {tweet_contents}"}, ], - response_model=RankedTweets + response_format=RankedTweets ) - ranked_tweets = response.choices[0].message.content + ranked_tweets = response.choices[0].message.parsed return ranked_tweets def create_tweets_graph(): @@ -116,7 +141,11 @@ def run_workflow(mode: str, server_url: str = 'http://localhost:8900'): raise ValueError("Invalid mode. Choose 'in-process-run' or 'remote-run'.") import httpx - subject = httpx.get("https://discord.com/blog/how-discord-reduced-websocket-traffic-by-40-percent").text + # NOTE contact Tensorlake to get a key! + subject = httpx.get( + url="https://discord.com/blog/how-discord-reduced-websocket-traffic-by-40-percent", + ).text + logging.info(f"Generating tweets for subject: {subject[:100]}...") invocation_id = graph.run(block_until_done=True, subject=subject)