diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7e584dd5..d7460d48 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,6 +14,7 @@ on: jobs: notebook-tests: strategy: + fail-fast: false matrix: es_stack: - 8.14.2 diff --git a/notebooks/search/07-inference.ipynb b/notebooks/search/07-inference.ipynb index 44cff210..c92842d3 100644 --- a/notebooks/search/07-inference.ipynb +++ b/notebooks/search/07-inference.ipynb @@ -195,7 +195,7 @@ "source": [ "API_KEY = getpass(\"OpenAI API key: \")\n", "\n", - "client.inference.put_model(\n", + "client.inference.put(\n", " task_type=\"text_embedding\",\n", " inference_id=\"my_openai_embedding_model\",\n", " body={\n", @@ -211,7 +211,7 @@ "id": "1f2e48b7", "metadata": {}, "source": [ - "**NOTE:** If you use Elasticsearch 8.12, you must change `inference_id` in the snippet above to `model_id`! " + "**NOTE:** If you use Elasticsearch 8.12, you must change `inference_id` in the snippet above to `model_id`! And `inference.put` to `inference.put_model`." ] }, { @@ -409,7 +409,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.12.3 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -423,7 +423,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.12.5" }, "vscode": { "interpreter": { diff --git a/notebooks/search/09-semantic-text.ipynb b/notebooks/search/09-semantic-text.ipynb index f531f9e2..390ee2fa 100644 --- a/notebooks/search/09-semantic-text.ipynb +++ b/notebooks/search/09-semantic-text.ipynb @@ -4,7 +4,10 @@ "cell_type": "markdown", "id": "c2907fddfeac343a", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "# Semantic Search with Semantic Text\n", @@ -18,7 +21,10 @@ "cell_type": "markdown", "id": "3db37d2cf8264468", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Requirements\n", @@ -35,7 +41,10 @@ "cell_type": "markdown", "id": "7fe1ed0703a8d1d3", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Create Elastic Cloud deployment\n", @@ -47,7 +56,10 @@ "cell_type": "markdown", "id": "f9c8bd62c8241f90", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Install packages and connect with Elasticsearch Client\n", @@ -65,7 +77,10 @@ "execution_count": null, "id": "13fdf7656ced2da3", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -76,7 +91,10 @@ "cell_type": "markdown", "id": "9d54b112361d2f3d", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Next, we need to import the modules we need. \n", @@ -89,7 +107,10 @@ "execution_count": null, "id": "9a60627704e77ff6", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -104,7 +125,10 @@ "cell_type": "markdown", "id": "eb9498124146d8bb", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Now we can instantiate the Python Elasticsearch client.\n", @@ -118,7 +142,10 @@ "execution_count": null, "id": "6e14437dcce0f235", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -141,7 +168,10 @@ "cell_type": "markdown", "id": "89b6b7721f6d8599", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "### Enable Telemetry\n", @@ -154,7 +184,10 @@ "execution_count": null, "id": "5a7af618fb61f358", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -168,7 +201,10 @@ "cell_type": "markdown", "id": "cbbdaf9118a97732", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "### Test the Client\n", @@ -180,7 +216,10 @@ "execution_count": null, "id": "4cb0685fae12e034", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -191,7 +230,10 @@ "cell_type": "markdown", "id": "59e2223bf2c4331", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Refer to [the documentation](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new) to learn how to connect to a self-managed deployment.\n", @@ -203,7 +245,10 @@ "cell_type": "markdown", "id": "22fa643780acd44a", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Create the Inference Endpoint\n", @@ -218,12 +263,15 @@ "execution_count": null, "id": "8ee2188ea71324f5", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ "try:\n", - " client.inference.delete_model(inference_id=\"my-elser-endpoint\")\n", + " client.inference.delete(inference_id=\"my-elser-endpoint\")\n", "except exceptions.NotFoundError:\n", " # Inference endpoint does not exist\n", " pass\n", @@ -231,7 +279,7 @@ "try:\n", " client.options(\n", " request_timeout=60, max_retries=3, retry_on_timeout=True\n", - " ).inference.put_model(\n", + " ).inference.put(\n", " task_type=\"sparse_embedding\",\n", " inference_id=\"my-elser-endpoint\",\n", " body={\n", @@ -251,7 +299,10 @@ "cell_type": "markdown", "id": "e94fd66761fd8087", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Once the endpoint is created, we must wait until the backing ELSER service is deployed.\n", @@ -263,13 +314,14 @@ "execution_count": null, "id": "adb33329ce20b2f1", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ - "inference_endpoint_info = client.inference.get_model(\n", - " inference_id=\"my-elser-endpoint\",\n", - ")\n", + "inference_endpoint_info = client.inference.get(inference_id=\"my-elser-endpoint\")\n", "model_id = inference_endpoint_info[\"endpoints\"][0][\"service_settings\"][\"model_id\"]\n", "\n", "while True:\n", @@ -296,7 +348,10 @@ "cell_type": "markdown", "id": "818f7a72a83b5776", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Create the Index\n", @@ -309,7 +364,10 @@ "execution_count": null, "id": "ace87760606f67c6", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -334,7 +392,10 @@ "cell_type": "markdown", "id": "abc3ee7a1fddfa9b", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Notice how we configured the mappings. We defined `plot_semantic` as a `semantic_text` field.\n", @@ -346,7 +407,10 @@ "cell_type": "markdown", "id": "2b5a46b60660a489", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Populate the Index\n", @@ -359,7 +423,10 @@ "execution_count": null, "id": "24f0133923553d28", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -378,7 +445,10 @@ "cell_type": "markdown", "id": "6fff5932fcbac1b0", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Semantic Search\n", @@ -396,7 +466,10 @@ "execution_count": null, "id": "ad417b4b3f50c889", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -423,7 +496,10 @@ "cell_type": "markdown", "id": "22c4d4d395adb472", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "### Semantic Search with the `semantic` Query\n", @@ -437,7 +513,10 @@ "execution_count": null, "id": "1a8520ffc8a3efb3", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -453,7 +532,10 @@ "cell_type": "markdown", "id": "148fda24a3964aa9", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "These results demonstrate the power of semantic search.\n", @@ -469,7 +551,10 @@ "cell_type": "markdown", "id": "7c9bab225a745746", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "### Hybrid Search with the `semantic` Query\n", @@ -483,7 +568,10 @@ "execution_count": null, "id": "4f72f7906b918dc1", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -516,7 +604,10 @@ "cell_type": "markdown", "id": "d50d10ced4389107", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "These results demonstrate that the application of lexical search techniques can help focus the results, while retaining many of the advantages of semantic search.\n", @@ -536,7 +627,10 @@ "cell_type": "markdown", "id": "78be304240d6c695", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Conclusion\n", @@ -548,21 +642,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/notebooks/search/10-semantic-reranking-retriever-cohere.ipynb b/notebooks/search/10-semantic-reranking-retriever-cohere.ipynb index 5958cdb5..aedb1301 100644 --- a/notebooks/search/10-semantic-reranking-retriever-cohere.ipynb +++ b/notebooks/search/10-semantic-reranking-retriever-cohere.ipynb @@ -4,7 +4,10 @@ "cell_type": "markdown", "id": "c2907fddfeac343a", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "# Semantic Reranking with Cohere Reranker\n", @@ -20,7 +23,10 @@ "cell_type": "markdown", "id": "3db37d2cf8264468", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "# Requirements\n", @@ -40,7 +46,10 @@ "cell_type": "markdown", "id": "7fe1ed0703a8d1d3", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Create Elastic Cloud deployment\n", @@ -52,7 +61,10 @@ "cell_type": "markdown", "id": "f9c8bd62c8241f90", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Install packages and connect with Elasticsearch Client\n", @@ -68,7 +80,10 @@ "execution_count": null, "id": "13fdf7656ced2da3", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -79,7 +94,10 @@ "cell_type": "markdown", "id": "9d54b112361d2f3d", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Next, we need to import the modules we need.\n", @@ -92,7 +110,10 @@ "execution_count": null, "id": "9a60627704e77ff6", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -106,7 +127,10 @@ "cell_type": "markdown", "id": "eb9498124146d8bb", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Now we can instantiate the Python Elasticsearch client.\n", @@ -120,7 +144,10 @@ "execution_count": null, "id": "6e14437dcce0f235", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -143,7 +170,10 @@ "cell_type": "markdown", "id": "89b6b7721f6d8599", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "### Enable telemetry\n", @@ -156,7 +186,10 @@ "execution_count": null, "id": "5a7af618fb61f358", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -170,7 +203,10 @@ "cell_type": "markdown", "id": "cbbdaf9118a97732", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "### Test the Client\n", @@ -183,7 +219,10 @@ "execution_count": null, "id": "4cb0685fae12e034", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -194,7 +233,10 @@ "cell_type": "markdown", "id": "59e2223bf2c4331", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Refer to [the documentation](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new) to learn how to connect to a self-managed deployment.\n", @@ -228,7 +270,10 @@ "cell_type": "markdown", "id": "22fa643780acd44a", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "### Create the Inference Endpoint\n", @@ -246,7 +291,7 @@ "outputs": [], "source": [ "try:\n", - " client.inference.delete_model(inference_id=\"cohere-rerank-inference\")\n", + " client.inference.delete(inference_id=\"cohere-rerank-inference\")\n", "except exceptions.NotFoundError:\n", " # Inference endpoint does not exist\n", " pass\n", @@ -254,10 +299,10 @@ "try:\n", " client.options(\n", " request_timeout=60, max_retries=3, retry_on_timeout=True\n", - " ).inference.put_model(\n", + " ).inference.put(\n", " task_type=\"rerank\",\n", " inference_id=\"cohere-rerank-inference\",\n", - " model_config={\n", + " inference_config={\n", " \"service\": \"cohere\",\n", " \"service_settings\": {\n", " \"api_key\": COHERE_API_KEY,\n", @@ -277,7 +322,10 @@ "cell_type": "markdown", "id": "818f7a72a83b5776", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Create the Index\n", @@ -290,7 +338,10 @@ "execution_count": null, "id": "ace87760606f67c6", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -310,7 +361,10 @@ "cell_type": "markdown", "id": "2b5a46b60660a489", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Populate the Index\n", @@ -323,7 +377,10 @@ "execution_count": null, "id": "24f0133923553d28", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -342,7 +399,10 @@ "cell_type": "markdown", "id": "6fff5932fcbac1b0", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Search without reranking\n", @@ -360,7 +420,10 @@ "execution_count": null, "id": "ad417b4b3f50c889", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [], "source": [ @@ -531,7 +594,10 @@ "cell_type": "markdown", "id": "22c4d4d395adb472", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Much better! Not only are the top results semantically close to our query _\"the Moon covers the Sun\"_, the irrelevant results with a low score were discarded from the response. As a result, the list of articles we ended up with are indeed those that provide the best answer to our question.\n", @@ -545,7 +611,10 @@ "cell_type": "markdown", "id": "78be304240d6c695", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "## Conclusion\n", @@ -556,7 +625,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -570,7 +639,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.5" } }, "nbformat": 4, diff --git a/notebooks/search/11-semantic-reranking-hugging-face.ipynb b/notebooks/search/11-semantic-reranking-hugging-face.ipynb index 3624dbee..4f2a6709 100644 --- a/notebooks/search/11-semantic-reranking-hugging-face.ipynb +++ b/notebooks/search/11-semantic-reranking-hugging-face.ipynb @@ -322,7 +322,7 @@ { "data": { "text/plain": [ - "ObjectApiResponse({'endpoints': [{'model_id': 'my-msmarco-minilm-model', 'inference_id': 'my-msmarco-minilm-model', 'task_type': 'rerank', 'service': 'elasticsearch', 'service_settings': {'num_allocations': 1, 'num_threads': 1, 'model_id': 'cross-encoder__ms-marco-minilm-l-6-v2'}, 'task_settings': {'return_documents': True}}]})" + "ObjectApiResponse({'inference_id': 'my-msmarco-minilm-model', 'task_type': 'rerank', 'service': 'elasticsearch', 'service_settings': {'num_allocations': 1, 'num_threads': 1, 'model_id': 'cross-encoder__ms-marco-minilm-l-6-v2'}, 'task_settings': {'return_documents': True}})" ] }, "execution_count": 20, @@ -358,7 +358,7 @@ "metadata": {}, "outputs": [], "source": [ - "client.inference.get()" + "client.inference.get().body" ] }, { @@ -621,13 +621,23 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 }