Skip to content

Commit

Permalink
Bump to 0.9.0 (#66)
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilippeMoussalli authored Jan 16, 2024
1 parent e4c02eb commit b910710
Show file tree
Hide file tree
Showing 10 changed files with 135 additions and 76 deletions.
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
fondant==0.8.0
notebook==7.0.6
fondant==0.9.0
notebook==7.0.6
weaviate-client==3.25.3
2 changes: 1 addition & 1 deletion src/components/aggregate_eval_results/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
fondant[component]==0.8.dev6
fondant[component]==0.9.0
2 changes: 1 addition & 1 deletion src/components/text_cleaning/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
fondant[component]==0.8.dev4
fondant[component]==0.9.0
65 changes: 36 additions & 29 deletions src/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"metadata": {},
"source": [
"## Set up environment"
]
Expand Down Expand Up @@ -177,9 +175,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"metadata": {},
"source": [
"## Spin up the Weaviate vector store"
]
Expand Down Expand Up @@ -217,7 +213,7 @@
"metadata": {},
"outputs": [],
"source": [
"!docker compose -f weaviate/docker-compose.yaml up --detach"
"!docker compose -f weaviate_service/docker-compose.yaml up --detach"
]
},
{
Expand All @@ -227,15 +223,6 @@
"Make sure you have **Weaviate client v3**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install -q \"weaviate-client==3.*\" --disable-pip-version-check && echo \"Weaviate client installed successfully\""
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -263,7 +250,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Indexing pipeline"
"#### Indexing pipeline"
]
},
{
Expand Down Expand Up @@ -329,8 +316,7 @@
"# Parameters for the indexing pipeline\n",
"indexing_args = {\n",
" \"n_rows_to_load\": 1000,\n",
" \"chunk_size\": 1024,\n",
" \"chunk_overlap\": 8,\n",
" \"chunk_args\": {\"chunk_size\": 512, \"chunk_overlap\": 32}\n",
"}\n",
"\n",
"# Parameters for the GPU resources\n",
Expand Down Expand Up @@ -421,15 +407,14 @@
"import os\n",
"import pipeline_eval\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
"\n",
"evaluation_args = {\n",
" \"retrieval_top_k\": 2,\n",
" \"evaluation_module\": \"langchain.chat_models\",\n",
" \"evaluation_llm\": \"ChatOpenAI\",\n",
" \"evaluation_llm_kwargs\": {\n",
" \"openai_api_key\": os.environ[\"OPENAI_API_KEY\"], # TODO: Update with your key or use a different model\n",
" \"model_name\" : \"gpt-3.5-turbo\"\n",
" \"llm_module_name\": \"langchain.chat_models\",\n",
" \"llm_class_name\": \"ChatOpenAI\",\n",
" \"llm_kwargs\": {\n",
" \"openai_api_key\":\"\" , # TODO: Update with your key or use a different model\n",
" \"model_name\" : \"gpt-3.5-turbo\"\n",
" },\n",
" \"evaluation_metrics\": [\"context_precision\", \"context_relevancy\"]\n",
"}\n",
Expand All @@ -450,9 +435,13 @@
"metadata": {},
"outputs": [],
"source": [
"runner = DockerRunner()\n",
"extra_volumes = [str(os.path.join(os.path.abspath('.'), \"evaluation_datasets\")) + \":/evaldata\"]\n",
"runner.run(evaluation_pipeline, extra_volumes=extra_volumes)"
"if utils.check_weaviate_class_exists(\n",
" local_weaviate_client,\n",
" shared_args[\"weaviate_class\"]\n",
"): \n",
" runner = DockerRunner()\n",
" extra_volumes = [str(os.path.join(os.path.abspath('.'), \"evaluation_datasets\")) + \":/evaldata\"]\n",
" runner.run(evaluation_pipeline, extra_volumes=extra_volumes)"
]
},
{
Expand Down Expand Up @@ -507,6 +496,24 @@
"run_explorer_app(base_path=BASE_PATH)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To stop the Explore, run the cell below."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fondant.explore import stop_explorer_app\n",
"\n",
"stop_explorer_app()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -565,7 +572,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
55 changes: 31 additions & 24 deletions src/parameter_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"metadata": {},
"source": [
"## Set up environment"
]
Expand Down Expand Up @@ -172,9 +170,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"metadata": {},
"source": [
"## Spin up the Weaviate vector store"
]
Expand Down Expand Up @@ -219,16 +215,7 @@
"metadata": {},
"outputs": [],
"source": [
"!docker compose -f weaviate/docker-compose.yaml up --detach"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install -q \"weaviate-client==3.*\" --disable-pip-version-check && echo \"Weaviate client installed successfully\""
"!docker compose -f weaviate_service/docker-compose.yaml up --detach"
]
},
{
Expand Down Expand Up @@ -337,7 +324,7 @@
" 'chunk_overlap' : [64, 128, 192],\n",
"}\n",
"searchable_shared_params = {\n",
" 'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\"), (\"huggingface\", \"BAAI/bge-base-en-v1.5\")]\n",
" 'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\")]\n",
"}\n",
"searchable_eval_params = {\n",
" 'retrieval_top_k' : [2, 4, 8]\n",
Expand All @@ -363,7 +350,6 @@
"source": [
"from utils import get_host_ip\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
"\n",
"# configurable parameters\n",
"shared_args = {\n",
Expand All @@ -377,10 +363,12 @@
"eval_args = {\n",
" \"evaluation_set_filename\" : \"wikitext_1000_q.csv\",\n",
" \"evaluation_set_separator\" : \";\",\n",
" \"evaluation_module\": \"langchain.chat_models\",\n",
" \"evaluation_llm\": \"ChatOpenAI\",\n",
" \"evaluation_llm_kwargs\": {\"openai_api_key\": os.environ[\"OPENAI_API_KEY\"], #TODO Specify your key if you're using OpenAI\n",
" \"model_name\" : \"gpt-3.5-turbo\"}, # e.g. \"gpt-4\" or \"gpt-3.5-turbo\"\n",
" \"llm_module_name\": \"langchain.chat_models\",\n",
" \"llm_class_name\": \"ChatOpenAI\",\n",
" \"llm_kwargs\": {\n",
" \"openai_api_key\": \"\" , # TODO: Update with your key or use a different model\n",
" \"model_name\" : \"gpt-3.5-turbo\"\n",
" },\n",
" \"evaluation_metrics\" : [\"context_precision\", \"context_relevancy\"]\n",
"}\n",
"\n",
Expand Down Expand Up @@ -416,6 +404,7 @@
"outputs": [],
"source": [
"from utils import ParameterSearch\n",
"from utils import check_weaviate_class_exists\n",
"\n",
"mysearch = ParameterSearch(\n",
" searchable_index_params = searchable_index_params,\n",
Expand All @@ -430,7 +419,7 @@
" evaluation_set_path=evaluation_set_path,\n",
")\n",
"\n",
"results = mysearch.run()"
"results = mysearch.run(weaviate_client)"
]
},
{
Expand Down Expand Up @@ -549,6 +538,24 @@
"run_explorer_app(base_path=shared_args[\"base_path\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To stop the Explore, run the cell below."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fondant.explore import stop_explorer_app\n",
"\n",
"stop_explorer_app()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -607,7 +614,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
33 changes: 26 additions & 7 deletions src/pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@
"from pathlib import Path\n",
"from fondant.pipeline import Pipeline, Resources\n",
"\n",
"BASE_PATH = \"./data-dir\"\n",
"BASE_PATH = \"./data\"\n",
"Path(BASE_PATH).mkdir(parents=True, exist_ok=True)\n",
"\n",
"pipeline = Pipeline(\n",
Expand Down Expand Up @@ -187,8 +187,7 @@
"chunks = text.apply(\n",
" \"chunk_text\",\n",
" arguments={\n",
" \"chunk_size\": 512,\n",
" \"chunk_overlap\": 32,\n",
" \"chunk_args\": {\"chunk_size\": 512, \"chunk_overlap\": 32}\n",
" }\n",
")\n",
"\n",
Expand Down Expand Up @@ -252,7 +251,7 @@
"metadata": {},
"outputs": [],
"source": [
"!docker compose -f weaviate/docker-compose.yaml up --detach --quiet-pull"
"!docker compose -f weaviate_service/docker-compose.yaml up --detach --quiet-pull"
]
},
{
Expand Down Expand Up @@ -307,7 +306,18 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"To stop the Explorer and continue the notebook, press the stop button at the top of the notebook."
"To stop the Explore, run the cell below."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fondant.explore import stop_explorer_app\n",
"\n",
"stop_explorer_app()"
]
},
{
Expand Down Expand Up @@ -435,7 +445,7 @@
"outputs": [],
"source": [
"%%writefile components/text_cleaning/requirements.txt\n",
"fondant[component]==0.8.dev4"
"fondant[component]==0.9.0"
]
},
{
Expand Down Expand Up @@ -555,6 +565,15 @@
"!docker compose -f weaviate/docker-compose.yaml down"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stop_explorer_app()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -581,7 +600,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
12 changes: 6 additions & 6 deletions src/pipeline_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def create_pipeline(
embed_model: str = "all-MiniLM-L6-v2",
embed_api_key: dict = {},
retrieval_top_k: int = 3,
evaluation_module: str = "langchain.llms",
evaluation_llm: str = "OpenAI",
evaluation_llm_kwargs: dict = {"model_name": "gpt-3.5-turbo"},
llm_module_name: str = "langchain.chat_models",
llm_class_name: str = "ChatOpenAI",
llm_kwargs: dict = {"model_name": "gpt-3.5-turbo"},
evaluation_metrics: list = ["context_precision", "context_relevancy"],
number_of_accelerators=None,
accelerator_name=None,
Expand Down Expand Up @@ -72,9 +72,9 @@ def create_pipeline(
retriever_eval = retrieve_chunks.apply(
"evaluate_ragas",
arguments={
"module": evaluation_module,
"llm_name": evaluation_llm,
"llm_kwargs": evaluation_llm_kwargs,
"llm_module_name": llm_module_name,
"llm_class_name": llm_class_name,
"llm_kwargs": llm_kwargs,
},
produces={metric: pa.float32() for metric in evaluation_metrics},
)
Expand Down
Loading

0 comments on commit b910710

Please sign in to comment.