upgrade openAI api, add gpt4 support

jrodal98 · Apr 8, 2024 · 86fbece · 86fbece
1 parent 16f6715
commit 86fbece
Show file tree

Hide file tree

Showing 10 changed files with 62 additions and 92 deletions.
diff --git a/Makefile b/Makefile
@@ -11,6 +11,4 @@ upload:
 clean:
 	rm -rf dist build
 test:
-	ls tests/manual | xargs -I {} python "tests/manual/{}" cheap
-expensive_test:
 	ls tests/manual | xargs -I {} python "tests/manual/{}"
diff --git a/README.md b/README.md
@@ -68,9 +68,8 @@ Estimated cost (usd): $0.0016
 - Slow
   - Small articles take ~20 seconds, longer articles and videos take a few minutes. There is probably some more optimization to be had.
 - Cost
-  - Can be expensive - by default, this uses the "gpt-3.5-turbo" model, which costs $0.002 / 1000 tokens.
-  - To minimize cost, the library currently limits the number of input characters to 120,000. This limits the cost of a request to a maximum of ~$0.062. In the future, I may add a flag to modify or remove this limit.
-  - If you pass `--cheap` to the command line, the ada model will be used, which is 5x cheaper, but not very good.
+  - Can be expensive - by default, this uses the "gpt-3.5-turbo" model, which is much cheaper than gpt4
+  - To minimize cost, the library currently limits the number of input characters to 120,000.
 
 To illustrate speed and cost: [This wikipedia page](https://en.wikipedia.org/wiki/List_of_common_misconceptions) hits the maximum limit:
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-openai>=0.27.0,<0.28.0
+openai>=1.16.2,<2.0.0
 youtube-transcript-api>=0.6.0,<0.7.0
 beautifulsoup4>=4.12.0,<5.0.0
 requests>=2.30.0,<3.0.0

diff --git a/tests/manual/test_helper.py b/tests/manual/test_helper.py
@@ -2,20 +2,18 @@
 # www.jrodal.com
 
 import time
-import sys
 import asyncio
 from tldrwl.logger import init_logging
 from tldrwl.summarize import Summarizer
-from tldrwl.summarizers.text_ada_001_text_summarizer import TextAda001TextSummarizer
+from tldrwl.summarizers.chat_completions_text_summarizer import (
+    ChatCompletionsTextSummarizer,
+)
 
 
 def main_sync(text: str) -> None:
     print("Sync test")
     start = time.time()
-    if "cheap" in sys.argv:
-        text_summarizer = TextAda001TextSummarizer()
-    else:
-        text_summarizer = None
+    text_summarizer = ChatCompletionsTextSummarizer()
 
     summary = Summarizer(text_summarizer=text_summarizer).summarize_sync(text)
     end = time.time()
@@ -28,10 +26,7 @@ def main_sync(text: str) -> None:
 async def main_async(text: str) -> None:
     print("Async test")
     start = time.time()
-    if "cheap" in sys.argv:
-        text_summarizer = TextAda001TextSummarizer()
-    else:
-        text_summarizer = None
+    text_summarizer = ChatCompletionsTextSummarizer()
     summary = await Summarizer(text_summarizer=text_summarizer).summarize_async(text)
     end = time.time()
 

diff --git a/tldrwl/__main__.py b/tldrwl/__main__.py
@@ -6,9 +6,10 @@
 
 from tldrwl.summarize import Summarizer
 from tldrwl.logger import init_logging
-from tldrwl.summarizers.text_ada_001_text_summarizer import (
-    TextAda001TextSummarizer,
+from tldrwl.summarizers.chat_completions_text_summarizer import (
+    ChatCompletionsTextSummarizer,
 )
+from tldrwl.ai_interface import Model
 
 
 async def main():
@@ -26,9 +27,11 @@ async def main():
         help="very verbose logging (include third party logs)",
     )
     parser.add_argument(
-        "--cheap",
-        action="store_true",
-        help="Try to make the run cheaper (e.g. using less powerful models like Ada)",
+        "--model",
+        type=Model,
+        choices=list(Model),
+        default=None,
+        help="Model to use for summarization",
     )
     args = parser.parse_args()
 
@@ -38,10 +41,7 @@ async def main():
         very_verbose_logging=args.very_verbose_logging,
     )
 
-    if args.cheap:
-        text_summarizer = TextAda001TextSummarizer()
-    else:
-        text_summarizer = None
+    text_summarizer = ChatCompletionsTextSummarizer(model=args.model)
 
     summary = await Summarizer(text_summarizer=text_summarizer).summarize_async(
         args.input

diff --git a/tldrwl/ai_interface.py b/tldrwl/ai_interface.py
@@ -12,27 +12,44 @@
 
 class Model(Enum):
     GPT35TURBO = "gpt-3.5-turbo"
-    TEXTADA001 = "text-ada-001"
+    GPT4 = "gpt-4"
 
     @property
-    def cost_per_1000_tokens(self) -> float:
+    def cost_per_1000_prompt_tokens(self) -> float:
         if self is self.GPT35TURBO:
-            return 0.002
-        if self is self.TEXTADA001:
-            return 0.0004
+            return 0.0005
+        elif self is self.GPT4:
+            return 0.03
         else:
             return 0
 
+    @property
+    def cost_per_1000_completion_tokens(self) -> float:
+        if self is self.GPT35TURBO:
+            return 0.0015
+        elif self is self.GPT4:
+            return 0.06
+        else:
+            return 0
+
+    @classmethod
+    def default_model(cls) -> "Model":
+        return cls.GPT35TURBO
+
 
 @dataclass
 class Summary:
     text: str
-    num_tokens: int
+    prompt_tokens: int
+    completion_tokens: int
     model: Model
 
     @property
     def estimated_cost_usd(self) -> float:
-        return self.num_tokens * self.model.cost_per_1000_tokens * (1 / 1000)
+        return (
+            (self.prompt_tokens * self.model.cost_per_1000_prompt_tokens)
+            + (self.completion_tokens * self.model.cost_per_1000_completion_tokens)
+        ) * (1 / 1000)
 
 
 class AiInterface(ABC):

diff --git a/tldrwl/summarize.py b/tldrwl/summarize.py
@@ -8,7 +8,7 @@
 
 from tldrwl.ai_interface import AiInterface, Summary
 from tldrwl.summarizers.text_summarizer import TextSummarizer
-from tldrwl.summarizers.gpt_35_turbo_text_summarizer import Gpt35TurboTextSummarizer
+from tldrwl.summarizers.chat_completions_text_summarizer import ChatCompletionsTextSummarizer
 from tldrwl.transformers.webpage_transformer import WebpageTransformer
 from tldrwl.transformers.youtube_transformer import YoutubeTransformer
 
@@ -17,7 +17,7 @@ class Summarizer(AiInterface):
     def __init__(self, *, text_summarizer: Optional[TextSummarizer] = None) -> None:
         super().__init__()
         self._logger = logging.getLogger(__name__)
-        self._summarizer = text_summarizer or Gpt35TurboTextSummarizer()
+        self._summarizer = text_summarizer or ChatCompletionsTextSummarizer()
 
     async def _transform_text(self, text: str) -> str:
         if YoutubeTransformer.is_youtube_url(text):

diff --git a/...mmarizers/gpt_35_turbo_text_summarizer.py → ...izers/chat_completions_text_summarizer.py b/...mmarizers/gpt_35_turbo_text_summarizer.py → ...izers/chat_completions_text_summarizer.py
@@ -3,30 +3,34 @@
 
 import re
 
-import openai
-
+from openai import AsyncOpenAI
 from tldrwl.summarizers.text_summarizer import TextSummarizer
 
 from tldrwl.ai_interface import Summary
 
+aclient = AsyncOpenAI()
+
 
-class Gpt35TurboTextSummarizer(TextSummarizer):
+class ChatCompletionsTextSummarizer(TextSummarizer):
     MAX_TOKEN_RESPONSE = 1500
 
     async def _query_openai(self, text: str, max_tokens: int) -> Summary:
         prompt = self._prompt_string.format(text)
-        response = await openai.ChatCompletion.acreate(  # type: ignore
+        completion = await aclient.chat.completions.create(
             model=self._model.value,
             messages=[{"role": "user", "content": prompt}],
             max_tokens=max_tokens,
         )
-        output_text = response["choices"][0]["message"]["content"]  # type: ignore
-        num_tokens = response["usage"]["total_tokens"]  # type: ignore
-        self._logger.debug(f"{num_tokens=}")
+        output_text = completion.choices[0].message.content
+        prompt_tokens = completion.usage.prompt_tokens  # type: ignore
+        completion_tokens = completion.usage.completion_tokens  # type: ignore
+
+        self._logger.debug(f"{prompt_tokens=}, {completion_tokens=}")
 
         summary = re.sub(r"\s+", " ", output_text.strip())  # type: ignore
         return Summary(
             text=summary,
-            num_tokens=num_tokens,  # type: ignore
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
             model=self._model,
         )
diff --git a/tldrwl/summarizers/text_ada_001_text_summarizer.py b/tldrwl/summarizers/text_ada_001_text_summarizer.py
diff --git a/tldrwl/summarizers/text_summarizer.py b/tldrwl/summarizers/text_summarizer.py
@@ -20,13 +20,13 @@ class TextSummarizer(AiInterface):
     def __init__(
         self,
         *,
-        model: Model = Model.GPT35TURBO,
+        model: Model | None = None,
         prompt_string: str = "Write a detailed summary of the following:\n\n{}\n",
         chunk_size: int = 12000,
         max_num_chunks: int = 10,
     ) -> None:
         super().__init__()
-        self._model = model
+        self._model = model or Model.default_model()
         self._prompt_string = prompt_string
         self._chunk_size = chunk_size
         self._max_num_chunks = max_num_chunks
@@ -40,7 +40,7 @@ async def _summarize_chunk_async(self, chunk: str, max_tokens: int) -> Summary:
         for _ in range(0, 3):
             try:
                 return await self._query_openai(chunk, max_tokens)
-            except openai.error.RateLimitError:  # pyright: ignore
+            except openai.RateLimitError:  # pyright: ignore
                 retry_interval = 3
                 self._logger.debug(
                     f"Rate limited by openai - resting for {retry_interval}s"
@@ -85,7 +85,9 @@ async def _summarize_async(self, text: str) -> Summary:
             )
             return Summary(
                 text=final_summary.text,
-                num_tokens=final_summary.num_tokens
-                + sum(s.num_tokens for s in summaries),
+                prompt_tokens=final_summary.prompt_tokens
+                + sum(s.prompt_tokens for s in summaries),
+                completion_tokens=final_summary.completion_tokens
+                + sum(s.completion_tokens for s in summaries),
                 model=self._model,
             )