Skip to content

Commit

Permalink
notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
semio committed Jan 22, 2025
1 parent ed9b123 commit 35e976f
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 20 deletions.
11 changes: 5 additions & 6 deletions experiments/20250109/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@

# File paths - edit these to point to your data files
# REQUESTS_FILE = "./finished/mc044-question_prompts.jsonl"
RESPONSES_FILE = "./finished/mc045-question_response.jsonl"
EVAL1_FILE = "./finished/mc045-question_response-eval-prompts-gpt-4o-response.jsonl"
RESPONSES_FILE = "./finished/mc043-question_response.jsonl"
EVAL1_FILE = "./finished/mc043-question_response-eval-prompts-gpt-4o-response.jsonl"
EVAL2_FILE = (
"./finished/mc045-question_response-eval-prompts-gemini-1-5-pro-002-response.jsonl"
"./finished/mc043-question_response-eval-prompts-gemini-1-5-pro-002-response.jsonl"
)
EVAL3_FILE = "./finished/"
"mc045-question_response-eval-prompts-claude-3-5-sonnet-20241022-response.jsonl"
OUTPUT_FILE = "./finished/mc045_output.parquet"
EVAL3_FILE = "./finished/mc043-question_response-eval-prompts-claude-3-5-sonnet-20241022-response.jsonl" # noqa
OUTPUT_FILE = "./mc043_output.parquet"


def load_jsonl(file_path: str) -> list[dict]:
Expand Down
32 changes: 18 additions & 14 deletions experiments/20250109/upload_to_ai_eval_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Notebook to upload the results to ai eval sheets.
"""


from lib.config import read_config
from lib.pilot.utils import read_ai_eval_spreadsheet

Expand Down Expand Up @@ -53,23 +52,28 @@
)

# Select and rename columns for upload with correct order
upload_df = res.select([
pl.col("question_id"),
pl.col("language"),
pl.col("prompt_variation_id"),
pl.col("model_config_id").alias("model_configuration_id"),
pl.col("last_evaluation_datetime"),
pl.col("percent_correct"),
pl.col("percent_wrong"),
pl.col("percent_very_wrong"),
pl.col("percent_eval_failed"),
pl.col("round").alias("rounds"),
pl.col("result")
])
upload_df = res.select(
[
pl.col("question_id"),
pl.col("language"),
pl.col("prompt_variation_id"),
pl.col("model_config_id").alias("model_configuration_id"),
pl.col("last_evaluation_datetime"),
pl.col("percent_correct"),
pl.col("percent_wrong"),
pl.col("percent_very_wrong"),
pl.col("percent_eval_failed"),
pl.col("round").alias("rounds"),
pl.col("result"),
]
)

# Print shape and preview
print(upload_df.shape)
print(upload_df.head())

backup.columns


# upload it
ai_eval_sheet.evaluation_results.replace_data(upload_df.to_pandas())
86 changes: 86 additions & 0 deletions experiments/create_master_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Notebook to upload the results to ai eval sheets.
"""

from glob import glob
import polars as pl
from typing import List


def create_master_output(output_folder: str, language: str = "en-US") -> pl.DataFrame:
"""Create a master output DataFrame from parquet files in a folder.
Args:
output_folder: Folder containing parquet files with results
language: Language code to add to results (default: "en-US")
Returns:
DataFrame with standardized columns for upload
"""
# Define mapping for correctness values
result_map = {-1: "n/a", 0: "fail", 1: "very_wrong", 2: "wrong", 3: "correct"}

# Read and combine all parquet files
res_list = [pl.read_parquet(x) for x in glob(f"{output_folder}/*parquet")]
res = pl.concat(res_list)

# Add metadata columns and map correctness
res = res.with_columns(
pl.lit(language).alias("language"),
pl.lit(output_folder.split("/")[-1]).alias("last_evaluation_datetime"),
pl.col("final_correctness").replace_strict(result_map).alias("result"),
)

# Select and rename columns for upload with correct order
return res.select(
[
pl.col("question_id"),
pl.col("language"),
pl.col("prompt_variation_id"),
pl.col("model_config_id").alias("model_configuration_id"),
pl.col("last_evaluation_datetime"),
pl.col("result"),
]
)


final_df1 = create_master_output("20250109")
final_df1

final_df1.write_csv("master_output_20250109.csv")

final_df2 = create_master_output("./20250120")
final_df2

final_df2.write_csv("master_output_20250120.csv")


def create_combined_raw_output(output_folders: List[str]) -> pl.DataFrame:
"""Create a combined DataFrame from raw parquet files in multiple folders.
Args:
output_folders: List of folders containing raw parquet files
Returns:
Combined DataFrame with all raw data
"""
# Find all parquet files in all folders
parquet_files = []
for folder in output_folders:
parquet_files.extend(glob(f"{folder}/*parquet"))

# Read and combine all parquet files
dfs = [pl.read_parquet(file) for file in parquet_files]
return pl.concat(dfs)


raw_outputs = create_combined_raw_output(
["./20240921-20241205/", "./20250109/", "./20250120/"]
)

raw_outputs

raw_outputs["question_id"].unique()


raw_outputs.write_parquet("./latest_model_responses.parquet")

0 comments on commit 35e976f

Please sign in to comment.