diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py index d94f8af2..3fdcffa6 100644 --- a/refact_known_models/passthrough.py +++ b/refact_known_models/passthrough.py @@ -1,6 +1,7 @@ # refer to https://docs.litellm.ai/docs/providers/ passthrough_mini_db = { + # OpenAI models "gpt-4o": { "backend": "litellm", "provider": "openai", @@ -12,28 +13,41 @@ "pp1000t_generated": 15_000, # $15.00 / 1M tokens (2024 may) "filter_caps": ["chat", "tools", "completion"], }, - "gpt-4-turbo": { + "gpt-4o-2024-05-13": { "backend": "litellm", "provider": "openai", - "tokenizer_path": "Xenova/gpt-4", - "resolve_as": "gpt-4-turbo", + "tokenizer_path": "Xenova/gpt-4o", + "resolve_as": "gpt-4o-2024-05-13", "T": 128_000, "T_out": 4096, - "pp1000t_prompt": 10_000, - "pp1000t_generated": 30_000, # $30.00 / 1M tokens (2024 may) + "pp1000t_prompt": 5_000, + "pp1000t_generated": 15_000, # $15.00 / 1M tokens "filter_caps": ["chat", "tools", "completion"], }, - "gpt-3.5-turbo": { + "gpt-4o-2024-08-06": { "backend": "litellm", "provider": "openai", - "tokenizer_path": "Xenova/gpt-3.5-turbo-16k", - "resolve_as": "gpt-3.5-turbo-1106", - "T": 16_000, + "tokenizer_path": "Xenova/gpt-4o", + "resolve_as": "gpt-4o-2024-08-06", + "T": 128_000, "T_out": 4096, - "pp1000t_prompt": 1000, - "pp1000t_generated": 2000, + "pp1000t_prompt": 2_500, + "pp1000t_generated": 10_000, # $15.00 / 1M tokens + "filter_caps": ["chat", "tools", "completion"] + }, + "gpt-4o-mini": { + "backend": "litellm", + "provider": "openai", + "tokenizer_path": "Xenova/gpt-4o", + "resolve_as": "gpt-4o-mini-2024-07-18", + "T": 128_000, + "T_out": 4096, + "pp1000t_prompt": 150, + "pp1000t_generated": 600, # $0.60 / 1M tokens "filter_caps": ["chat", "tools", "completion"], }, + + # Anthropic models "claude-3-5-sonnet": { "backend": "litellm", "provider": "anthropic", @@ -78,39 +92,6 @@ "pp1000t_generated": 15_000, "filter_caps": ["chat", "tools", "completion"], }, - "gpt-4o-2024-05-13": { - "backend": "litellm", - "provider": "openai", - "tokenizer_path": "Xenova/gpt-4o", - "resolve_as": "gpt-4o-2024-05-13", - "T": 128_000, - "T_out": 4096, - "pp1000t_prompt": 5_000, - "pp1000t_generated": 15_000, # $15.00 / 1M tokens - "filter_caps": ["chat", "tools", "completion"], - }, - "gpt-4o-2024-08-06": { - "backend": "litellm", - "provider": "openai", - "tokenizer_path": "Xenova/gpt-4o", - "resolve_as": "gpt-4o-2024-08-06", - "T": 128_000, - "T_out": 4096, - "pp1000t_prompt": 2_500, - "pp1000t_generated": 10_000, # $15.00 / 1M tokens - "filter_caps": ["chat", "tools", "completion"] - }, - "gpt-4o-mini": { - "backend": "litellm", - "provider": "openai", - "tokenizer_path": "Xenova/gpt-4o", - "resolve_as": "gpt-4o-mini-2024-07-18", - "T": 128_000, - "T_out": 4096, - "pp1000t_prompt": 150, - "pp1000t_generated": 600, # $0.60 / 1M tokens - "filter_caps": ["chat", "tools", "completion"], - }, "claude-3-5-sonnet-20241022": { "backend": "litellm", "provider": "anthropic", @@ -122,6 +103,8 @@ "pp1000t_generated": 15_000, # $15.00 / 1M tokens (2024 oct) "filter_caps": ["chat", "tools", "completion"], }, + + # Groq models "groq-llama-3.1-8b": { "backend": "litellm", "provider": "groq", @@ -188,6 +171,8 @@ "pp1000t_generated": 600, # TODO: don't know the price "filter_caps": ["chat", "completion"], }, + + # Cerebras models "cerebras-llama3.1-8b": { "backend": "litellm", "provider": "cerebras", diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py index 69558235..51874c03 100644 --- a/refact_utils/finetune/utils.py +++ b/refact_utils/finetune/utils.py @@ -103,6 +103,20 @@ def _add_results_for_passthrough_provider(provider: str) -> None: if 'completion' in v.get('filter_caps', []): result['completion'].append(k) + for k, v in data.get("model_assign", {}).items(): + if model_dict := [d for d in data['models'] if d['name'] == k]: + model_dict = model_dict[0] + + add_result(k, model_dict) + + if not model_dict.get('has_finetune'): + continue + + finetune_info = model_dict.get('finetune_info', []) or [] + for run in finetune_info: + val = f"{k}:{run['run_id']}:{run['checkpoint']}" + add_result(val, model_dict) + if data.get("openai_api_enable"): _add_results_for_passthrough_provider('openai') @@ -121,20 +135,6 @@ def _add_results_for_passthrough_provider(provider: str) -> None: if data.get('xai_api_enable'): _add_results_for_passthrough_provider('xai') - for k, v in data.get("model_assign", {}).items(): - if model_dict := [d for d in data['models'] if d['name'] == k]: - model_dict = model_dict[0] - - add_result(k, model_dict) - - if not model_dict.get('has_finetune'): - continue - - finetune_info = model_dict.get('finetune_info', []) or [] - for run in finetune_info: - val = f"{k}:{run['run_id']}:{run['checkpoint']}" - add_result(val, model_dict) - return result diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index 6fbfa0ec..47e39125 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -270,7 +270,7 @@ def _caps_base_data(self) -> Dict[str, Any]: "telemetry_basic_dest": "/stats/telemetry-basic", "telemetry_corrected_snippets_dest": "/stats/telemetry-snippets", "telemetry_basic_retrieve_my_own": "/stats/rh-stats", - "running_models": [r for r in [*running['completion'], *running['chat']]], + "running_models": list(set(r for r in [*running['completion'], *running['chat']])), "code_completion_default_model": code_completion_default_model, "multiline_code_completion_default_model": multiline_code_completion_default_model, "code_chat_default_model": code_chat_default_model,