Skip to content

Commit

Permalink
add total number of runs to logs & small fixes (#72)
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanlooyml6 authored Jan 18, 2024
1 parent 29bdc30 commit ff5aa21
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 38 deletions.
31 changes: 17 additions & 14 deletions src/parameter_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"**Check if GPU is available**"
"Check if **GPU** is available"
]
},
{
Expand Down Expand Up @@ -165,7 +165,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install -q -r ../requirements.txt --disable-pip-version-check && echo \"Success\""
"%pip install -q -r ../requirements.txt --disable-pip-version-check && echo \"Success\""
]
},
{
Expand Down Expand Up @@ -320,11 +320,11 @@
"outputs": [],
"source": [
"searchable_index_params = {\n",
" 'chunk_size' : [192, 256, 320],\n",
" 'chunk_overlap' : [64, 128, 192],\n",
" 'chunk_size' : [128, 256, 384],\n",
" 'chunk_overlap' : [16, 64, 128],\n",
"}\n",
"searchable_shared_params = {\n",
" 'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\")]\n",
" 'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\")] # add more as tuples: ,(\"huggingface\", \"BAAI/bge-base-en-v1.5\")\n",
"}\n",
"searchable_eval_params = {\n",
" 'retrieval_top_k' : [2, 4, 8]\n",
Expand Down Expand Up @@ -355,7 +355,7 @@
"shared_args = {\n",
" \"base_path\" : \"./data\", # where data goes\n",
" \"embed_api_key\" : {},\n",
" \"weaviate_url\" : f\"http://{get_host_ip()}:8081\" # IP address\n",
" \"weaviate_url\" : f\"http://{get_host_ip()}:8081\"\n",
"}\n",
"index_args = {\n",
" \"n_rows_to_load\" : 1000,\n",
Expand All @@ -366,8 +366,8 @@
" \"llm_module_name\": \"langchain.chat_models\",\n",
" \"llm_class_name\": \"ChatOpenAI\",\n",
" \"llm_kwargs\": {\n",
" \"openai_api_key\": \"\" , # TODO: Update with your key or use a different model\n",
" \"model_name\" : \"gpt-3.5-turbo\"\n",
" \"openai_api_key\": \"\" , # TODO: update with your key or use a different model\n",
" \"model_name\" : \"gpt-3.5-turbo\" # choose model, e.g. \"gpt-4\", \"gpt-3.5-turbo\"\n",
" },\n",
" \"evaluation_metrics\" : [\"context_precision\", \"context_relevancy\"]\n",
"}\n",
Expand Down Expand Up @@ -400,7 +400,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from utils import ParameterSearch\n",
Expand All @@ -417,6 +419,7 @@
" search_method = search_method,\n",
" target_metric = target_metric,\n",
" evaluation_set_path=evaluation_set_path,\n",
" debug=False,\n",
")\n",
"\n",
"results = mysearch.run(weaviate_client)"
Expand Down Expand Up @@ -465,7 +468,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install -q \"plotly\" --disable-pip-version-check && echo \"Plotly installed successfully\""
"%pip install -q \"plotly\" --disable-pip-version-check && echo \"Plotly installed successfully\""
]
},
{
Expand All @@ -483,8 +486,8 @@
"source": [
"from utils import add_embed_model_numerical_column, show_legend_embed_models\n",
"\n",
"parameter_search_results = add_embed_model_numerical_column(parameter_search_results)\n",
"show_legend_embed_models(parameter_search_results)"
"results = add_embed_model_numerical_column(results)\n",
"show_legend_embed_models(results)"
]
},
{
Expand All @@ -503,7 +506,7 @@
"import plotly.express as px\n",
"\n",
"dimensions = ['chunk_size', 'chunk_overlap', 'embed_model_numerical', 'retrieval_top_k', 'context_precision']\n",
"fig = px.parallel_coordinates(parameter_search_results, color=\"context_precision\",\n",
"fig = px.parallel_coordinates(results, color=\"context_precision\",\n",
" dimensions=dimensions,\n",
" color_continuous_scale=px.colors.sequential.Bluered)\n",
"fig.show()"
Expand Down Expand Up @@ -614,7 +617,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
40 changes: 16 additions & 24 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,12 @@ def run(self, weaviate_client: weaviate.Client):
while True:
configs = self.create_configs(run_count)

# stop running when out of configs
if configs is None:
break

# create configs
indexing_config, evaluation_config = configs
indexing_config, evaluation_config, n_runs = configs

# create pipeline objects
indexing_pipeline, evaluation_pipeline = self.create_pipelines(
Expand All @@ -186,18 +187,24 @@ def run(self, weaviate_client: weaviate.Client):
)

# run indexing pipeline
self.run_indexing_pipeline(run_count, indexing_config, indexing_pipeline)
logger.info(
f"Starting indexing pipeline of RUN {run_count}/{n_runs} with {indexing_config}",
)
self.runner.run(indexing_pipeline)

check_weaviate_class_exists(
weaviate_client,
indexing_config["weaviate_class"],
)

# run evaluation pipeline
self.run_evaluation_pipeline(
run_count,
evaluation_config,
evaluation_pipeline,
logger.info(
f"Starting evaluation pipeline of run #{run_count} / \
{n_runs} with {evaluation_config}",
)
self.runner.run(
input=evaluation_pipeline,
extra_volumes=self.extra_volumes,
)

# read metrics from pipeline output
Expand All @@ -222,6 +229,7 @@ def create_configs(self, run_count: int):
if self.search_method == "grid_search":
# all possible combinations of parameters
all_combinations = list(cartesian_product(self.searchable_params))
n_runs = len(all_combinations)

# when all combinations have been tried, stop searching
if run_count > len(all_combinations) - 1:
Expand Down Expand Up @@ -255,6 +263,7 @@ def create_configs(self, run_count: int):
variations_to_try = [
{keys_to_try[i]: values_to_try[i]} for i in range(len(keys_to_try))
]
n_runs = len(variations_to_try) + 1

# if there are no variations to try, just schedule one run
if len(variations_to_try) == 0:
Expand Down Expand Up @@ -315,7 +324,7 @@ def create_configs(self, run_count: int):
"embed_model"
] = indexing_config["embed_model"][1]

return indexing_config, evaluation_config
return indexing_config, evaluation_config, n_runs

def create_pipelines(self, indexing_config, evaluation_config):
# create indexing pipeline
Expand Down Expand Up @@ -352,20 +361,3 @@ def create_pipelines(self, indexing_config, evaluation_config):
logger.info({**self.shared_args, **self.eval_args, **evaluation_config})

return indexing_pipeline, evaluation_pipeline

def run_indexing_pipeline(self, run_count, indexing_config, indexing_pipeline):
logger.info(
f"Starting indexing pipeline of run #{run_count} with {indexing_config}",
)
self.runner.run(indexing_pipeline)

def run_evaluation_pipeline(
self,
run_count,
evaluation_config,
evaluation_pipeline,
):
logger.info(
f"Starting evaluation pipeline of run #{run_count} with {evaluation_config}",
)
self.runner.run(input=evaluation_pipeline, extra_volumes=self.extra_volumes)

0 comments on commit ff5aa21

Please sign in to comment.