Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
infwinston committed Nov 10, 2023
1 parent 1a4e948 commit 2c1b9a2
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 26 deletions.
2 changes: 1 addition & 1 deletion fastchat/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
MODERATION_MSG = "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES."
CONVERSATION_LIMIT_MSG = "YOU HAVE REACHED THE CONVERSATION LENGTH LIMIT. PLEASE CLEAR HISTORY AND START A NEW CONVERSATION."
INACTIVE_MSG = "THIS SESSION HAS BEEN INACTIVE FOR TOO LONG. PLEASE REFRESH THIS PAGE."
SLOW_MODEL_MSG = '⚠️ Both models will show the responses all at once. Please stay patient as it may take over 30 seconds.'
SLOW_MODEL_MSG = "⚠️ Both models will show the responses all at once. Please stay patient as it may take over 30 seconds."
# Maximum input length
INPUT_CHAR_LEN_LIMIT = int(os.getenv("FASTCHAT_INPUT_CHAR_LEN_LIMIT", 12000))
# Maximum conversation turns
Expand Down
5 changes: 4 additions & 1 deletion fastchat/model/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ def get_model_info(name: str) -> ModelInfo:
["gpt-4"], "GPT-4", "https://openai.com/research/gpt-4", "ChatGPT-4 by OpenAI"
)
register_model_info(
["gpt-4-turbo"], "GPT-4-Turbo", "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo", "GPT-4-Turbo by OpenAI"
["gpt-4-turbo"],
"GPT-4-Turbo",
"https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo",
"GPT-4-Turbo by OpenAI",
)
register_model_info(
["claude-2"],
Expand Down
37 changes: 27 additions & 10 deletions fastchat/serve/gradio_block_arena_anony.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,14 @@ def share_click(state0, state1, model_selector0, model_selector1, request: gr.Re
}

SAMPLING_BOOST_MODELS = ["openchat-3.5", "gpt-4-turbo", "gpt-3.5-turbo-1106"]
OUTAGE_MODELS = ["claude-1", "claude-2", "claude-instant-1", "zephyr-7b-alpha", "wizardlm-70b", "falcon-180b-chat"]
OUTAGE_MODELS = [
"claude-1",
"claude-2",
"claude-instant-1",
"zephyr-7b-alpha",
"wizardlm-70b",
"falcon-180b-chat",
]


def get_sample_weight(model):
Expand Down Expand Up @@ -233,11 +240,20 @@ def get_battle_pair():
"openchat-3.5": {"gpt-3.5-turbo", "llama-2-70b-chat", "zephyr-7b-beta"},
"qwen-14b-chat": {"vicuna-13b", "llama-2-13b-chat", "llama-2-70b-chat"},
"zephyr-7b-alpha": {"mistral-7b-instruct", "llama-2-13b-chat"},
"zephyr-7b-beta": {"mistral-7b-instruct", "llama-2-13b-chat", "llama-2-7b-chat", "wizardlm-13b"},
"zephyr-7b-beta": {
"mistral-7b-instruct",
"llama-2-13b-chat",
"llama-2-7b-chat",
"wizardlm-13b",
},
"llama-2-70b-chat": {"gpt-3.5-turbo", "vicuna-33b", "claude-instant-1"},
"llama-2-13b-chat": {"mistral-7b-instruct", "vicuna-13b", "llama-2-70b-chat"},
"llama-2-7b-chat": {"mistral-7b-instruct", "vicuna-7b", "llama-2-13b-chat"},
"mistral-7b-instruct": {"llama-2-7b-chat", "llama-2-13b-chat", "llama-2-70b-chat"},
"mistral-7b-instruct": {
"llama-2-7b-chat",
"llama-2-13b-chat",
"llama-2-70b-chat",
},
"vicuna-33b": {"llama-2-70b-chat", "gpt-3.5-turbo", "claude-instant-1"},
"vicuna-13b": {"llama-2-13b-chat", "llama-2-70b-chat"},
"vicuna-7b": {"llama-2-7b-chat", "mistral-7b-instruct", "llama-2-13b-chat"},
Expand All @@ -259,8 +275,7 @@ def get_battle_pair():
if model == chosen_model:
continue
weight = get_sample_weight(model)
if (weight != 0 and chosen_model in targets and
model in targets[chosen_model]):
if weight != 0 and chosen_model in targets and model in targets[chosen_model]:
# boost to 50% chance
weight = total_weight / len(targets[chosen_model])
rival_models.append(model)
Expand Down Expand Up @@ -314,9 +329,7 @@ def add_text(
model_list = [states[i].model_name for i in range(num_sides)]
flagged = moderation_filter(text, model_list)
if flagged:
logger.info(
f"violate moderation (anony). ip: {ip}. text: {text}"
)
logger.info(f"violate moderation (anony). ip: {ip}. text: {text}")
# overwrite the original text
text = MODERATION_MSG

Expand Down Expand Up @@ -541,7 +554,9 @@ def build_side_by_side_ui_anony(models):
flash_buttons, [], btn_list
)
clear_btn.click(
clear_history, None, states + chatbots + model_selectors + [textbox] + btn_list + [slow_warning]
clear_history,
None,
states + chatbots + model_selectors + [textbox] + btn_list + [slow_warning],
)

share_js = """
Expand Down Expand Up @@ -575,7 +590,9 @@ def build_side_by_side_ui_anony(models):
states + [temperature, top_p, max_output_tokens],
states + chatbots + btn_list,
).then(
flash_buttons, [], btn_list,
flash_buttons,
[],
btn_list,
)

send_btn.click(
Expand Down
4 changes: 1 addition & 3 deletions fastchat/serve/gradio_block_arena_named.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,7 @@ def add_text(
model_list = [states[i].model_name for i in range(num_sides)]
flagged = moderation_filter(text, model_list)
if flagged:
logger.info(
f"violate moderation (named). ip: {ip}. text: {text}"
)
logger.info(f"violate moderation (named). ip: {ip}. text: {text}")
# overwrite the original text
text = MODERATION_MSG

Expand Down
13 changes: 6 additions & 7 deletions fastchat/serve/gradio_web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def clear_history(request: gr.Request):

def get_ip(request: gr.Request):
if "cf-connecting-ip" in request.headers:
ip = request.headers['cf-connecting-ip']
ip = request.headers["cf-connecting-ip"]
else:
ip = request.client.host
return ip
Expand All @@ -259,9 +259,7 @@ def add_text(state, model_selector, text, request: gr.Request):

flagged = moderation_filter(text, [state.model_name])
if flagged:
logger.info(
f"violate moderation. ip: {ip}. text: {text}"
)
logger.info(f"violate moderation. ip: {ip}. text: {text}")
# overwrite the original text
text = MODERATION_MSG

Expand Down Expand Up @@ -559,8 +557,8 @@ def get_model_description_md(models):
ct += 1
return model_description_md

def build_about():

def build_about():
about_markdown = f"""
# About Us
Chatbot Arena is an open-source research project developed by members from [LMSYS](https://lmsys.org/about/) and UC Berkeley [SkyLab](https://sky.cs.berkeley.edu/). Our mission is to build an open crowdsourced platform to collect human feedback and evaluate LLMs under real-world scenarios. We open-source our code at [GitHub](https://github.com/lm-sys/FastChat) and release chat and human feedback datasets [here](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md). We invite everyone to join us in this journey!
Expand Down Expand Up @@ -592,10 +590,11 @@ def build_about():
</div>
"""

#state = gr.State()
# state = gr.State()
gr.Markdown(about_markdown, elem_id="about_markdown")

#return [state]
# return [state]


def build_single_model_ui(models, add_promotion_links=False):
promotion = (
Expand Down
8 changes: 6 additions & 2 deletions fastchat/serve/gradio_web_server_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ def load_demo(url_params, request: gr.Request):
if args.anony_only_for_proprietary_model:
# Only enable these models in anony battles.
if args.add_chatgpt:
models_anony += ["gpt-4", "gpt-3.5-turbo", "gpt-4-turbo", "gpt-3.5-turbo-1106"]
models_anony += [
"gpt-4",
"gpt-3.5-turbo",
"gpt-4-turbo",
"gpt-3.5-turbo-1106",
]
if args.add_claude:
models_anony += ["claude-2", "claude-1", "claude-instant-1"]
if args.add_palm:
Expand Down Expand Up @@ -127,7 +132,6 @@ def build_demo(models, elo_results_file, leaderboard_table_file):
with gr.Tab("About Us", id=4):
about = build_about()


url_params = gr.JSON(visible=False)

if args.model_list_mode not in ["once", "reload"]:
Expand Down
4 changes: 2 additions & 2 deletions fastchat/serve/monitor/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def make_leaderboard_md(elo_results):
- [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: October, 2023.
💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: November, 2023.
"""
return leaderboard_md

Expand Down Expand Up @@ -218,7 +218,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file):
else:
pass


leader_component_values[:] = [md, p1, p2, p3, p4]

"""
Expand Down Expand Up @@ -253,6 +252,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file):
# return [md_1, plot_1, plot_2, plot_3, plot_4]
return [md_1]


def build_demo(elo_results_file, leaderboard_table_file):
from fastchat.serve.gradio_web_server import block_css

Expand Down
1 change: 1 addition & 0 deletions fastchat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def oai_moderation(text):
Check whether the text violates OpenAI moderation API.
"""
import openai

openai.api_base = "https://api.openai.com/v1"
openai.api_key = os.environ["OPENAI_API_KEY"]

Expand Down

0 comments on commit 2c1b9a2

Please sign in to comment.