Litellm dev 11 07 2024 (#6649)

* fix(streaming_handler.py): save finish_reasons which might show up mid-stream (store last received one) Fixes #6104 * refactor: add readme to litellm_core_utils/ make it easier to navigate * fix(team_endpoints.py): return team id + object for invalid team in `/team/list` * fix(streaming_handler.py): remove import * fix(pattern_match_deployments.py): default to user input if unable to map based on wildcards (#6646) * fix(pattern_match_deployments.py): default to user input if unable to… (#6632) * fix(pattern_match_deployments.py): default to user input if unable to map based on wildcards * test: fix test * test: reset test name * test: update conftest to reload proxy server module between tests * ci(config.yml): move langfuse out of local_testing reduce ci/cd time * ci(config.yml): cleanup langfuse ci/cd tests * fix: update test to not use global proxy_server app module * ci: move caching to a separate test pipeline speed up ci pipeline * test: update conftest to check if proxy_server attr exists before reloading * build(conftest.py): don't block on inability to reload proxy_server * ci(config.yml): update caching unit test filter to work on 'cache' keyword as well * fix(encrypt_decrypt_utils.py): use function to get salt key * test: mark flaky test * test: handle anthropic overloaded errors * refactor: create separate ci/cd pipeline for proxy unit tests make ci/cd faster * ci(config.yml): add litellm_proxy_unit_testing to build_and_test jobs * ci(config.yml): generate prisma binaries for proxy unit tests * test: readd vertex_key.json * ci(config.yml): remove `-s` from proxy_unit_test cmd speed up test * ci: remove any 'debug' logging flag speed up ci pipeline * test: fix test * test(test_braintrust.py): rerun * test: add delay for braintrust test * chore: comment for maritalk (#6607) * Update gpt-4o-2024-08-06, and o1-preview, o1-mini models in model cost map (#6654) * Adding supports_response_schema to gpt-4o-2024-08-06 models * o1 models do not support vision --------- Co-authored-by: Emerson Gomes <[email protected]> * (QOL improvement) add unit testing for all static_methods in litellm_logging.py (#6640) * add unit testing for standard logging payload * unit testing for static methods in litellm_logging * add code coverage check for litellm_logging * litellm_logging_code_coverage * test_get_final_response_obj * fix validate_redacted_message_span_attributes * test validate_redacted_message_span_attributes * (feat) log error class, function_name on prometheus service failure hook + only log DB related failures on DB service hook (#6650) * log error on prometheus service failure hook * use a more accurate function name for wrapper that handles logging db metrics * fix log_db_metrics * test_log_db_metrics_failure_error_types * fix linting * fix auth checks * Update several Azure AI models in model cost map (#6655) * Adding Azure Phi 3/3.5 models to model cost map * Update gpt-4o-mini models * Adding missing Azure Mistral models to model cost map * Adding Azure Llama3.2 models to model cost map * Fix Gemini-1.5-flash pricing * Fix Gemini-1.5-flash output pricing * Fix Gemini-1.5-pro prices * Fix Gemini-1.5-flash output prices * Correct gemini-1.5-pro prices * Correction on Vertex Llama3.2 entry --------- Co-authored-by: Emerson Gomes <[email protected]> * fix(streaming_handler.py): fix linting error * test: remove duplicate test causes gemini ratelimit error --------- Co-authored-by: nobuo kawasaki <[email protected]> Co-authored-by: Emerson Gomes <[email protected]> Co-authored-by: Emerson Gomes <[email protected]> Co-authored-by: Ishaan Jaff <[email protected]>
BerriAI · Nov 8, 2024 · 1bef645 · 1bef645
1 parent 9f2053e
commit 1bef645
Show file tree

Hide file tree

Showing 10 changed files with 4,251 additions and 2,149 deletions.
diff --git a/litellm/litellm_core_utils/README.md b/litellm/litellm_core_utils/README.md
@@ -0,0 +1,11 @@
+## Folder Contents
+
+This folder contains general-purpose utilities that are used in multiple places in the codebase. 
+
+Core files:
+- `streaming_handler.py`: The core streaming logic + streaming related helper utils 
+- `core_helpers.py`: code used in `types/` - e.g. `map_finish_reason`. 
+- `exception_mapping_utils.py`: utils for mapping exceptions to openai-compatible error types. 
+- `default_encoding.py`: code for loading the default encoding (tiktoken)
+- `get_llm_provider_logic.py`: code for inferring the LLM provider from a given model name. 
+
diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py
@@ -3,6 +3,8 @@
 import os
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
 
+import httpx
+
 from litellm._logging import verbose_logger
 
 if TYPE_CHECKING:
@@ -99,3 +101,28 @@ def _get_parent_otel_span_from_kwargs(
             "Error in _get_parent_otel_span_from_kwargs: " + str(e)
         )
         return None
+
+
+def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> dict:
+    from litellm.types.utils import OPENAI_RESPONSE_HEADERS
+
+    openai_headers = {}
+    processed_headers = {}
+    additional_headers = {}
+
+    for k, v in response_headers.items():
+        if k in OPENAI_RESPONSE_HEADERS:  # return openai-compatible headers
+            openai_headers[k] = v
+        if k.startswith(
+            "llm_provider-"
+        ):  # return raw provider headers (incl. openai-compatible ones)
+            processed_headers[k] = v
+        else:
+            additional_headers["{}-{}".format("llm_provider", k)] = v
+
+    additional_headers = {
+        **openai_headers,
+        **processed_headers,
+        **additional_headers,
+    }
+    return additional_headers
diff --git a/litellm/litellm_core_utils/default_encoding.py b/litellm/litellm_core_utils/default_encoding.py
@@ -0,0 +1,21 @@
+import os
+
+import litellm
+
+try:
+    # New and recommended way to access resources
+    from importlib import resources
+
+    filename = str(resources.files(litellm).joinpath("llms/tokenizers"))
+except (ImportError, AttributeError):
+    # Old way to access resources, which setuptools deprecated some time ago
+    import pkg_resources  # type: ignore
+
+    filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
+
+os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
+    "CUSTOM_TIKTOKEN_CACHE_DIR", filename
+)  # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
+import tiktoken
+
+encoding = tiktoken.get_encoding("cl100k_base")
diff --git a/litellm/litellm_core_utils/rules.py b/litellm/litellm_core_utils/rules.py
@@ -0,0 +1,50 @@
+from typing import Optional
+
+import litellm
+
+
+class Rules:
+    """
+    Fail calls based on the input or llm api output
+
+    Example usage:
+    import litellm
+    def my_custom_rule(input): # receives the model response
+            if "i don't think i can answer" in input: # trigger fallback if the model refuses to answer
+                    return False
+            return True
+
+    litellm.post_call_rules = [my_custom_rule] # have these be functions that can be called to fail a call
+
+    response = litellm.completion(model="gpt-3.5-turbo", messages=[{"role": "user",
+        "content": "Hey, how's it going?"}], fallbacks=["openrouter/mythomax"])
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def pre_call_rules(self, input: str, model: str):
+        for rule in litellm.pre_call_rules:
+            if callable(rule):
+                decision = rule(input)
+                if decision is False:
+                    raise litellm.APIResponseValidationError(message="LLM Response failed post-call-rule check", llm_provider="", model=model)  # type: ignore
+        return True
+
+    def post_call_rules(self, input: Optional[str], model: str) -> bool:
+        if input is None:
+            return True
+        for rule in litellm.post_call_rules:
+            if callable(rule):
+                decision = rule(input)
+                if isinstance(decision, bool):
+                    if decision is False:
+                        raise litellm.APIResponseValidationError(message="LLM Response failed post-call-rule check", llm_provider="", model=model)  # type: ignore
+                elif isinstance(decision, dict):
+                    decision_val = decision.get("decision", True)
+                    decision_message = decision.get(
+                        "message", "LLM Response failed post-call-rule check"
+                    )
+                    if decision_val is False:
+                        raise litellm.APIResponseValidationError(message=decision_message, llm_provider="", model=model)  # type: ignore
+        return True