ml6team · PhilippeMoussalli · Jan 16, 2024 · Jan 12, 2024 · Jan 15, 2024 · Jan 15, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
-fondant==0.8.0
-notebook==7.0.6
+fondant==0.9.0
+notebook==7.0.6
+weaviate-client==3.25.3
diff --git a/src/components/aggregate_eval_results/requirements.txt b/src/components/aggregate_eval_results/requirements.txt
@@ -1 +1 @@
-fondant[component]==0.8.dev6
+fondant[component]==0.9.0
diff --git a/src/components/text_cleaning/requirements.txt b/src/components/text_cleaning/requirements.txt
@@ -1 +1 @@
-fondant[component]==0.8.dev4
+fondant[component]==0.9.0
diff --git a/src/evaluation.ipynb b/src/evaluation.ipynb
@@ -65,9 +65,7 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "jp-MarkdownHeadingCollapsed": true
-   },
+   "metadata": {},
    "source": [
     "## Set up environment"
    ]
@@ -177,9 +175,7 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "jp-MarkdownHeadingCollapsed": true
-   },
+   "metadata": {},
    "source": [
     "## Spin up the Weaviate vector store"
    ]
@@ -217,7 +213,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!docker compose -f weaviate/docker-compose.yaml up --detach"
+    "!docker compose -f weaviate_service/docker-compose.yaml up --detach"
    ]
   },
   {
@@ -227,15 +223,6 @@
     "Make sure you have **Weaviate client v3**"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install -q \"weaviate-client==3.*\" --disable-pip-version-check && echo \"Weaviate client installed successfully\""
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -263,7 +250,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Indexing pipeline"
+    "#### Indexing pipeline"
    ]
   },
   {
@@ -329,8 +316,7 @@
     "# Parameters for the indexing pipeline\n",
     "indexing_args = {\n",
     "    \"n_rows_to_load\": 1000,\n",
-    "    \"chunk_size\": 1024,\n",
-    "    \"chunk_overlap\": 8,\n",
+    "    \"chunk_args\": {\"chunk_size\": 512, \"chunk_overlap\": 32}\n",
     "}\n",
     "\n",
     "# Parameters for the GPU resources\n",
@@ -421,15 +407,14 @@
     "import os\n",
     "import pipeline_eval\n",
     "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
     "\n",
     "evaluation_args = {\n",
     "    \"retrieval_top_k\": 2,\n",
-    "    \"evaluation_module\": \"langchain.chat_models\",\n",
-    "    \"evaluation_llm\": \"ChatOpenAI\",\n",
-    "    \"evaluation_llm_kwargs\": {\n",
-    "                              \"openai_api_key\": os.environ[\"OPENAI_API_KEY\"],   # TODO: Update with your key or use a different model\n",
-    "                              \"model_name\" : \"gpt-3.5-turbo\"\n",
+    "    \"llm_module_name\": \"langchain.chat_models\",\n",
+    "    \"llm_class_name\": \"ChatOpenAI\",\n",
+    "    \"llm_kwargs\": {\n",
+    "      \"openai_api_key\":\"\" ,   # TODO: Update with your key or use a different model\n",
+    "      \"model_name\" : \"gpt-3.5-turbo\"\n",
     "    },\n",
     "    \"evaluation_metrics\": [\"context_precision\", \"context_relevancy\"]\n",
     "}\n",
@@ -450,9 +435,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "runner = DockerRunner()\n",
-    "extra_volumes = [str(os.path.join(os.path.abspath('.'), \"evaluation_datasets\")) + \":/evaldata\"]\n",
-    "runner.run(evaluation_pipeline, extra_volumes=extra_volumes)"
+    "if utils.check_weaviate_class_exists(\n",
+    "    local_weaviate_client,\n",
+    "    shared_args[\"weaviate_class\"]\n",
+    "): \n",
+    "    runner = DockerRunner()\n",
+    "    extra_volumes = [str(os.path.join(os.path.abspath('.'), \"evaluation_datasets\")) + \":/evaldata\"]\n",
+    "    runner.run(evaluation_pipeline, extra_volumes=extra_volumes)"
    ]
   },
   {
@@ -507,6 +496,24 @@
     "run_explorer_app(base_path=BASE_PATH)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To stop the Explore, run the cell below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fondant.explore import stop_explorer_app\n",
+    "\n",
+    "stop_explorer_app()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -565,7 +572,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

diff --git a/src/parameter_search.ipynb b/src/parameter_search.ipynb
@@ -59,9 +59,7 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "jp-MarkdownHeadingCollapsed": true
-   },
+   "metadata": {},
    "source": [
     "## Set up environment"
    ]
@@ -172,9 +170,7 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "jp-MarkdownHeadingCollapsed": true
-   },
+   "metadata": {},
    "source": [
     "## Spin up the Weaviate vector store"
    ]
@@ -219,16 +215,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!docker compose -f weaviate/docker-compose.yaml up --detach"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install -q \"weaviate-client==3.*\" --disable-pip-version-check && echo \"Weaviate client installed successfully\""
+    "!docker compose -f weaviate_service/docker-compose.yaml up --detach"
    ]
   },
   {
@@ -337,7 +324,7 @@
     "    'chunk_overlap' : [64, 128, 192],\n",
     "}\n",
     "searchable_shared_params = {\n",
-    "    'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\"), (\"huggingface\", \"BAAI/bge-base-en-v1.5\")]\n",
+    "    'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\")]\n",
     "}\n",
     "searchable_eval_params = {\n",
     "    'retrieval_top_k' : [2, 4, 8]\n",
@@ -363,7 +350,6 @@
    "source": [
     "from utils import get_host_ip\n",
     "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
     "\n",
     "# configurable parameters\n",
     "shared_args = {\n",
@@ -377,10 +363,12 @@
     "eval_args = {\n",
     "    \"evaluation_set_filename\" : \"wikitext_1000_q.csv\",\n",
     "    \"evaluation_set_separator\" : \";\",\n",
-    "    \"evaluation_module\": \"langchain.chat_models\",\n",
-    "    \"evaluation_llm\": \"ChatOpenAI\",\n",
-    "    \"evaluation_llm_kwargs\": {\"openai_api_key\": os.environ[\"OPENAI_API_KEY\"], #TODO Specify your key if you're using OpenAI\n",
-    "                              \"model_name\" : \"gpt-3.5-turbo\"}, # e.g. \"gpt-4\" or \"gpt-3.5-turbo\"\n",
+    "    \"llm_module_name\": \"langchain.chat_models\",\n",
+    "    \"llm_class_name\": \"ChatOpenAI\",\n",
+    "    \"llm_kwargs\": {\n",
+    "      \"openai_api_key\": \"\" ,   # TODO: Update with your key or use a different model\n",
+    "      \"model_name\" : \"gpt-3.5-turbo\"\n",
+    "    },\n",
     "    \"evaluation_metrics\" : [\"context_precision\", \"context_relevancy\"]\n",
     "}\n",
     "\n",
@@ -416,6 +404,7 @@
    "outputs": [],
    "source": [
     "from utils import ParameterSearch\n",
+    "from utils import check_weaviate_class_exists\n",
     "\n",
     "mysearch = ParameterSearch(\n",
     "    searchable_index_params = searchable_index_params,\n",
@@ -430,7 +419,7 @@
     "    evaluation_set_path=evaluation_set_path,\n",
     ")\n",
     "\n",
-    "results = mysearch.run()"
+    "results = mysearch.run(weaviate_client)"
    ]
   },
   {
@@ -549,6 +538,24 @@
     "run_explorer_app(base_path=shared_args[\"base_path\"])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To stop the Explore, run the cell below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fondant.explore import stop_explorer_app\n",
+    "\n",
+    "stop_explorer_app()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -607,7 +614,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

diff --git a/src/pipeline.ipynb b/src/pipeline.ipynb
@@ -130,7 +130,7 @@
     "from pathlib import Path\n",
     "from fondant.pipeline import Pipeline, Resources\n",
     "\n",
-    "BASE_PATH = \"./data-dir\"\n",
+    "BASE_PATH = \"./data\"\n",
     "Path(BASE_PATH).mkdir(parents=True, exist_ok=True)\n",
     "\n",
     "pipeline = Pipeline(\n",
@@ -187,8 +187,7 @@
     "chunks = text.apply(\n",
     "    \"chunk_text\",\n",
     "    arguments={\n",
-    "        \"chunk_size\": 512,\n",
-    "        \"chunk_overlap\": 32,\n",
+    "        \"chunk_args\": {\"chunk_size\": 512, \"chunk_overlap\": 32}\n",
     "    }\n",
     ")\n",
     "\n",
@@ -252,7 +251,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!docker compose -f weaviate/docker-compose.yaml up --detach --quiet-pull"
+    "!docker compose -f weaviate_service/docker-compose.yaml up --detach --quiet-pull"
    ]
   },
   {
@@ -307,7 +306,18 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To stop the Explorer and continue the notebook, press the stop button at the top of the notebook."
+    "To stop the Explore, run the cell below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fondant.explore import stop_explorer_app\n",
+    "\n",
+    "stop_explorer_app()"
    ]
   },
   {
@@ -435,7 +445,7 @@
    "outputs": [],
    "source": [
     "%%writefile components/text_cleaning/requirements.txt\n",
-    "fondant[component]==0.8.dev4"
+    "fondant[component]==0.9.0"
    ]
   },
   {
@@ -555,6 +565,15 @@
     "!docker compose -f weaviate/docker-compose.yaml down"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_explorer_app()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -581,7 +600,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

diff --git a/src/pipeline_eval.py b/src/pipeline_eval.py
@@ -16,9 +16,9 @@ def create_pipeline(
     embed_model: str = "all-MiniLM-L6-v2",
     embed_api_key: dict = {},
     retrieval_top_k: int = 3,
-    evaluation_module: str = "langchain.llms",
-    evaluation_llm: str = "OpenAI",
-    evaluation_llm_kwargs: dict = {"model_name": "gpt-3.5-turbo"},
+    llm_module_name: str = "langchain.chat_models",
+    llm_class_name: str = "ChatOpenAI",
+    llm_kwargs: dict = {"model_name": "gpt-3.5-turbo"},
     evaluation_metrics: list = ["context_precision", "context_relevancy"],
     number_of_accelerators=None,
     accelerator_name=None,
@@ -72,9 +72,9 @@ def create_pipeline(
     retriever_eval = retrieve_chunks.apply(
         "evaluate_ragas",
         arguments={
-            "module": evaluation_module,
-            "llm_name": evaluation_llm,
-            "llm_kwargs": evaluation_llm_kwargs,
+            "llm_module_name": llm_module_name,
+            "llm_class_name": llm_class_name,
+            "llm_kwargs": llm_kwargs,
         },
         produces={metric: pa.float32() for metric in evaluation_metrics},
     )
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		fondant[component]==0.8.dev6
		fondant[component]==0.9.0
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		fondant[component]==0.8.dev4
		fondant[component]==0.9.0