diff --git a/Makefile b/Makefile index b4c54c7..b88c235 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,4 @@ upload: clean: rm -rf dist build test: - ls tests/manual | xargs -I {} python "tests/manual/{}" cheap -expensive_test: ls tests/manual | xargs -I {} python "tests/manual/{}" diff --git a/README.md b/README.md index d627250..a979c95 100644 --- a/README.md +++ b/README.md @@ -68,9 +68,8 @@ Estimated cost (usd): $0.0016 - Slow - Small articles take ~20 seconds, longer articles and videos take a few minutes. There is probably some more optimization to be had. - Cost - - Can be expensive - by default, this uses the "gpt-3.5-turbo" model, which costs $0.002 / 1000 tokens. - - To minimize cost, the library currently limits the number of input characters to 120,000. This limits the cost of a request to a maximum of ~$0.062. In the future, I may add a flag to modify or remove this limit. - - If you pass `--cheap` to the command line, the ada model will be used, which is 5x cheaper, but not very good. + - Can be expensive - by default, this uses the "gpt-3.5-turbo" model, which is much cheaper than gpt4 + - To minimize cost, the library currently limits the number of input characters to 120,000. To illustrate speed and cost: [This wikipedia page](https://en.wikipedia.org/wiki/List_of_common_misconceptions) hits the maximum limit: diff --git a/requirements.txt b/requirements.txt index 60956d6..7b76975 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -openai>=0.27.0,<0.28.0 +openai>=1.16.2,<2.0.0 youtube-transcript-api>=0.6.0,<0.7.0 beautifulsoup4>=4.12.0,<5.0.0 requests>=2.30.0,<3.0.0 diff --git a/tests/manual/test_helper.py b/tests/manual/test_helper.py index d5ca894..a28d262 100644 --- a/tests/manual/test_helper.py +++ b/tests/manual/test_helper.py @@ -2,20 +2,18 @@ # www.jrodal.com import time -import sys import asyncio from tldrwl.logger import init_logging from tldrwl.summarize import Summarizer -from tldrwl.summarizers.text_ada_001_text_summarizer import TextAda001TextSummarizer +from tldrwl.summarizers.chat_completions_text_summarizer import ( + ChatCompletionsTextSummarizer, +) def main_sync(text: str) -> None: print("Sync test") start = time.time() - if "cheap" in sys.argv: - text_summarizer = TextAda001TextSummarizer() - else: - text_summarizer = None + text_summarizer = ChatCompletionsTextSummarizer() summary = Summarizer(text_summarizer=text_summarizer).summarize_sync(text) end = time.time() @@ -28,10 +26,7 @@ def main_sync(text: str) -> None: async def main_async(text: str) -> None: print("Async test") start = time.time() - if "cheap" in sys.argv: - text_summarizer = TextAda001TextSummarizer() - else: - text_summarizer = None + text_summarizer = ChatCompletionsTextSummarizer() summary = await Summarizer(text_summarizer=text_summarizer).summarize_async(text) end = time.time() diff --git a/tldrwl/__main__.py b/tldrwl/__main__.py index 84de8ad..367a6d9 100644 --- a/tldrwl/__main__.py +++ b/tldrwl/__main__.py @@ -6,9 +6,10 @@ from tldrwl.summarize import Summarizer from tldrwl.logger import init_logging -from tldrwl.summarizers.text_ada_001_text_summarizer import ( - TextAda001TextSummarizer, +from tldrwl.summarizers.chat_completions_text_summarizer import ( + ChatCompletionsTextSummarizer, ) +from tldrwl.ai_interface import Model async def main(): @@ -26,9 +27,11 @@ async def main(): help="very verbose logging (include third party logs)", ) parser.add_argument( - "--cheap", - action="store_true", - help="Try to make the run cheaper (e.g. using less powerful models like Ada)", + "--model", + type=Model, + choices=list(Model), + default=None, + help="Model to use for summarization", ) args = parser.parse_args() @@ -38,10 +41,7 @@ async def main(): very_verbose_logging=args.very_verbose_logging, ) - if args.cheap: - text_summarizer = TextAda001TextSummarizer() - else: - text_summarizer = None + text_summarizer = ChatCompletionsTextSummarizer(model=args.model) summary = await Summarizer(text_summarizer=text_summarizer).summarize_async( args.input diff --git a/tldrwl/ai_interface.py b/tldrwl/ai_interface.py index 1a12ad8..3d89b4b 100644 --- a/tldrwl/ai_interface.py +++ b/tldrwl/ai_interface.py @@ -12,27 +12,44 @@ class Model(Enum): GPT35TURBO = "gpt-3.5-turbo" - TEXTADA001 = "text-ada-001" + GPT4 = "gpt-4" @property - def cost_per_1000_tokens(self) -> float: + def cost_per_1000_prompt_tokens(self) -> float: if self is self.GPT35TURBO: - return 0.002 - if self is self.TEXTADA001: - return 0.0004 + return 0.0005 + elif self is self.GPT4: + return 0.03 else: return 0 + @property + def cost_per_1000_completion_tokens(self) -> float: + if self is self.GPT35TURBO: + return 0.0015 + elif self is self.GPT4: + return 0.06 + else: + return 0 + + @classmethod + def default_model(cls) -> "Model": + return cls.GPT35TURBO + @dataclass class Summary: text: str - num_tokens: int + prompt_tokens: int + completion_tokens: int model: Model @property def estimated_cost_usd(self) -> float: - return self.num_tokens * self.model.cost_per_1000_tokens * (1 / 1000) + return ( + (self.prompt_tokens * self.model.cost_per_1000_prompt_tokens) + + (self.completion_tokens * self.model.cost_per_1000_completion_tokens) + ) * (1 / 1000) class AiInterface(ABC): diff --git a/tldrwl/summarize.py b/tldrwl/summarize.py index 58ff990..000498e 100644 --- a/tldrwl/summarize.py +++ b/tldrwl/summarize.py @@ -8,7 +8,7 @@ from tldrwl.ai_interface import AiInterface, Summary from tldrwl.summarizers.text_summarizer import TextSummarizer -from tldrwl.summarizers.gpt_35_turbo_text_summarizer import Gpt35TurboTextSummarizer +from tldrwl.summarizers.chat_completions_text_summarizer import ChatCompletionsTextSummarizer from tldrwl.transformers.webpage_transformer import WebpageTransformer from tldrwl.transformers.youtube_transformer import YoutubeTransformer @@ -17,7 +17,7 @@ class Summarizer(AiInterface): def __init__(self, *, text_summarizer: Optional[TextSummarizer] = None) -> None: super().__init__() self._logger = logging.getLogger(__name__) - self._summarizer = text_summarizer or Gpt35TurboTextSummarizer() + self._summarizer = text_summarizer or ChatCompletionsTextSummarizer() async def _transform_text(self, text: str) -> str: if YoutubeTransformer.is_youtube_url(text): diff --git a/tldrwl/summarizers/gpt_35_turbo_text_summarizer.py b/tldrwl/summarizers/chat_completions_text_summarizer.py similarity index 54% rename from tldrwl/summarizers/gpt_35_turbo_text_summarizer.py rename to tldrwl/summarizers/chat_completions_text_summarizer.py index 8978535..9eec54d 100644 --- a/tldrwl/summarizers/gpt_35_turbo_text_summarizer.py +++ b/tldrwl/summarizers/chat_completions_text_summarizer.py @@ -3,30 +3,34 @@ import re -import openai - +from openai import AsyncOpenAI from tldrwl.summarizers.text_summarizer import TextSummarizer from tldrwl.ai_interface import Summary +aclient = AsyncOpenAI() + -class Gpt35TurboTextSummarizer(TextSummarizer): +class ChatCompletionsTextSummarizer(TextSummarizer): MAX_TOKEN_RESPONSE = 1500 async def _query_openai(self, text: str, max_tokens: int) -> Summary: prompt = self._prompt_string.format(text) - response = await openai.ChatCompletion.acreate( # type: ignore + completion = await aclient.chat.completions.create( model=self._model.value, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, ) - output_text = response["choices"][0]["message"]["content"] # type: ignore - num_tokens = response["usage"]["total_tokens"] # type: ignore - self._logger.debug(f"{num_tokens=}") + output_text = completion.choices[0].message.content + prompt_tokens = completion.usage.prompt_tokens # type: ignore + completion_tokens = completion.usage.completion_tokens # type: ignore + + self._logger.debug(f"{prompt_tokens=}, {completion_tokens=}") summary = re.sub(r"\s+", " ", output_text.strip()) # type: ignore return Summary( text=summary, - num_tokens=num_tokens, # type: ignore + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, model=self._model, ) diff --git a/tldrwl/summarizers/text_ada_001_text_summarizer.py b/tldrwl/summarizers/text_ada_001_text_summarizer.py deleted file mode 100644 index 482c269..0000000 --- a/tldrwl/summarizers/text_ada_001_text_summarizer.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -# www.jrodal.com - -import re - -import openai - -from tldrwl.ai_interface import Model, Summary -from tldrwl.summarizers.text_summarizer import TextSummarizer - - -class TextAda001TextSummarizer(TextSummarizer): - MAX_TOKEN_RESPONSE = 750 - MAX_TOKEN_INPUT = 125 - - def __init__( - self, - *, - model: Model = Model.TEXTADA001, - prompt_string: str = "Write a detailed summary of the following:\n\n{}\n", - chunk_size: int = 4000, - max_num_chunks: int = 10, - ) -> None: - super().__init__( - model=model, - prompt_string=prompt_string, - chunk_size=chunk_size, - max_num_chunks=max_num_chunks, - ) - - async def _query_openai(self, text: str, max_tokens: int) -> Summary: - prompt = self._prompt_string.format(text) - response = openai.Completion.create( # type: ignore - model=self._model.value, prompt=prompt, max_tokens=max_tokens - ) - output_text = response.choices[0].text.strip() # type: ignore - num_tokens = response["usage"]["total_tokens"] # type: ignore - self._logger.debug(f"{num_tokens=}") - - summary = re.sub(r"\s+", " ", output_text.strip()) # type: ignore - return Summary( - text=summary, - num_tokens=num_tokens, # type: ignore - model=self._model, - ) diff --git a/tldrwl/summarizers/text_summarizer.py b/tldrwl/summarizers/text_summarizer.py index 4be9f84..46f3ec5 100644 --- a/tldrwl/summarizers/text_summarizer.py +++ b/tldrwl/summarizers/text_summarizer.py @@ -20,13 +20,13 @@ class TextSummarizer(AiInterface): def __init__( self, *, - model: Model = Model.GPT35TURBO, + model: Model | None = None, prompt_string: str = "Write a detailed summary of the following:\n\n{}\n", chunk_size: int = 12000, max_num_chunks: int = 10, ) -> None: super().__init__() - self._model = model + self._model = model or Model.default_model() self._prompt_string = prompt_string self._chunk_size = chunk_size self._max_num_chunks = max_num_chunks @@ -40,7 +40,7 @@ async def _summarize_chunk_async(self, chunk: str, max_tokens: int) -> Summary: for _ in range(0, 3): try: return await self._query_openai(chunk, max_tokens) - except openai.error.RateLimitError: # pyright: ignore + except openai.RateLimitError: # pyright: ignore retry_interval = 3 self._logger.debug( f"Rate limited by openai - resting for {retry_interval}s" @@ -85,7 +85,9 @@ async def _summarize_async(self, text: str) -> Summary: ) return Summary( text=final_summary.text, - num_tokens=final_summary.num_tokens - + sum(s.num_tokens for s in summaries), + prompt_tokens=final_summary.prompt_tokens + + sum(s.prompt_tokens for s in summaries), + completion_tokens=final_summary.completion_tokens + + sum(s.completion_tokens for s in summaries), model=self._model, )