From cf0c3e1778efe2d31780c13a897571ebb9de716f Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Tue, 26 Nov 2024 19:31:36 -0500 Subject: [PATCH 1/5] build rag with milvus and ollama tutorial --- .../build_RAG_with_milvus_and_ollama.ipynb | 612 ++++++++++++++++++ 1 file changed, 612 insertions(+) create mode 100644 bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb diff --git a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb new file mode 100644 index 000000000..e2b4c2b15 --- /dev/null +++ b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb @@ -0,0 +1,612 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "\"Open \n", + " \"GitHub" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build RAG with Milvus and Ollama\n", + "\n", + "[Ollama](https://ollama.com/) is an open-source platform that simplifies running and customizing large language models (LLMs) locally. It provides a user-friendly, cloud-free experience, enabling effortless model downloads, installation, and interaction without requiring advanced technical skills. With a growing library of pre-trained LLMs—from general-purpose to domain-specific—Ollama makes it easy to manage and customize models for various applications. It ensures data privacy and flexibility, empowering users to fine-tune, optimize, and deploy AI-driven solutions entirely on their machines.\n", + "\n", + "In this guide, we’ll show you how to leverage Ollama and Milvus to build a RAG (Retrieval-Augmented Generation) pipeline efficiently and securely." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Preparation\n", + "### Dependencies and Environment" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "shellscript" + }, + "ExecuteTime": { + "end_time": "2024-11-27T00:28:41.664145Z", + "start_time": "2024-11-27T00:28:40.979648Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pymilvus in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (2.4.9)\r\n", + "Requirement already satisfied: ollama in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (0.4.1)\r\n", + "Requirement already satisfied: setuptools>69 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (75.1.0)\r\n", + "Requirement already satisfied: grpcio>=1.49.1 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (1.68.0)\r\n", + "Requirement already satisfied: protobuf>=3.20.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (4.25.5)\r\n", + "Requirement already satisfied: environs<=9.5.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (9.5.0)\r\n", + "Requirement already satisfied: ujson>=2.0.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (5.10.0)\r\n", + "Requirement already satisfied: pandas>=1.2.4 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (2.2.3)\r\n", + "Requirement already satisfied: milvus-lite<2.5.0,>=2.4.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (2.4.10)\r\n", + "Requirement already satisfied: httpx<0.28.0,>=0.27.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from ollama) (0.27.0)\r\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.9.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from ollama) (2.10.2)\r\n", + "Requirement already satisfied: marshmallow>=3.0.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from environs<=9.5.0->pymilvus) (3.23.1)\r\n", + "Requirement already satisfied: python-dotenv in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from environs<=9.5.0->pymilvus) (1.0.1)\r\n", + "Requirement already satisfied: anyio in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (4.6.2)\r\n", + "Requirement already satisfied: certifi in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (2024.8.30)\r\n", + "Requirement already satisfied: httpcore==1.* in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.0.2)\r\n", + "Requirement already satisfied: idna in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.7)\r\n", + "Requirement already satisfied: sniffio in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.3.0)\r\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama) (0.14.0)\r\n", + "Requirement already satisfied: tqdm in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from milvus-lite<2.5.0,>=2.4.0->pymilvus) (4.67.0)\r\n", + "Requirement already satisfied: numpy>=1.23.2 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (1.26.4)\r\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (2.9.0.post0)\r\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (2024.1)\r\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (2024.2)\r\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.7.0)\r\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (2.27.1)\r\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (4.12.2)\r\n", + "Requirement already satisfied: packaging>=17.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from marshmallow>=3.0.0->environs<=9.5.0->pymilvus) (24.1)\r\n", + "Requirement already satisfied: six>=1.5 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.2.4->pymilvus) (1.16.0)\r\n" + ] + } + ], + "source": [ + "! pip install pymilvus ollama" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "> If you are using Google Colab, to enable dependencies just installed, you may need to **restart the runtime** (click on the \"Runtime\" menu at the top of the screen, and select \"Restart session\" from the dropdown menu)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare the data\n", + "\n", + "We use the FAQ pages from the [Milvus Documentation 2.4.x](https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip) as the private knowledge in our RAG, which is a good data source for a simple RAG pipeline.\n", + "\n", + "Download the zip file and extract documents to the folder `milvus_docs`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "vscode": { + "languageId": "shellscript" + }, + "ExecuteTime": { + "end_time": "2024-11-27T00:28:47.636969Z", + "start_time": "2024-11-27T00:28:46.764200Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-11-26 19:28:46-- https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\r\n", + "Resolving github.com (github.com)... 140.82.114.3\r\n", + "Connecting to github.com (github.com)|140.82.114.3|:443... connected.\r\n", + "HTTP request sent, awaiting response... 302 Found\r\n", + "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T002847Z&X-Amz-Expires=300&X-Amz-Signature=ba4dfe2429fa286e39303161ff97e2026f24d2ddda65c45091aa19970e97bfc2&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream [following]\r\n", + "--2024-11-26 19:28:46-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T002847Z&X-Amz-Expires=300&X-Amz-Signature=ba4dfe2429fa286e39303161ff97e2026f24d2ddda65c45091aa19970e97bfc2&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream\r\n", + "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\r\n", + "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.110.133|:443... connected.\r\n", + "HTTP request sent, awaiting response... 200 OK\r\n", + "Length: 613094 (599K) [application/octet-stream]\r\n", + "Saving to: ‘milvus_docs_2.4.x_en.zip’\r\n", + "\r\n", + "milvus_docs_2.4.x_e 100%[===================>] 598.72K 2.08MB/s in 0.3s \r\n", + "\r\n", + "2024-11-26 19:28:47 (2.08 MB/s) - ‘milvus_docs_2.4.x_en.zip’ saved [613094/613094]\r\n", + "\r\n" + ] + } + ], + "source": [ + "! wget https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\n", + "! unzip -q milvus_docs_2.4.x_en.zip -d milvus_docs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We load all markdown files from the folder `milvus_docs/en/faq`. For each document, we just simply use \"# \" to separate the content in the file, which can roughly separate the content of each main part of the markdown file." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:28:50.837060Z", + "start_time": "2024-11-27T00:28:50.831879Z" + } + }, + "outputs": [], + "source": [ + "from glob import glob\n", + "\n", + "text_lines = []\n", + "\n", + "for file_path in glob(\"milvus_docs/en/faq/*.md\", recursive=True):\n", + " with open(file_path, \"r\") as file:\n", + " file_text = file.read()\n", + "\n", + " text_lines += file_text.split(\"# \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare the LLM and Embedding Model\n", + "\n", + "We initialize a function to prepare and interact with the embedding model. Ollama's `embed` API enables efficient local embedding generation without requiring cloud dependencies.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:28:53.000116Z", + "start_time": "2024-11-27T00:28:52.776698Z" + } + }, + "outputs": [], + "source": [ + "from ollama import embed\n", + "\n", + "def emb_text(text):\n", + " response = embed(model=\"llama3.2\", input=text)\n", + " return response[\"embeddings\"][0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generate a test embedding and print its dimension and first few elements." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:28:56.283931Z", + "start_time": "2024-11-27T00:28:54.342810Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3072\n", + "[-0.04002771, -0.026724102, 0.026219232, 0.013881813, 0.018835567, 0.0034403328, 0.0030376604, 0.04784506, -0.025060177, -0.02351082]\n" + ] + } + ], + "source": [ + "test_embedding = emb_text(\"This is a test\")\n", + "embedding_dim = len(test_embedding)\n", + "print(embedding_dim)\n", + "print(test_embedding[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data into Milvus" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the Collection" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:29:00.032753Z", + "start_time": "2024-11-27T00:28:58.636849Z" + } + }, + "outputs": [], + "source": [ + "from pymilvus import MilvusClient\n", + "\n", + "milvus_client = MilvusClient(uri=\"./milvus_demo.db\")\n", + "\n", + "collection_name = \"my_rag_collection\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "> As for the argument of `MilvusClient`:\n", + "> - Setting the `uri` as a local file, e.g.`./milvus.db`, is the most convenient method, as it automatically utilizes [Milvus Lite](https://milvus.io/docs/milvus_lite.md) to store all data in this file.\n", + "> - If you have large scale of data, you can set up a more performant Milvus server on [docker or kubernetes](https://milvus.io/docs/quickstart.md). In this setup, please use the server uri, e.g.`http://localhost:19530`, as your `uri`.\n", + "> - If you want to use [Zilliz Cloud](https://zilliz.com/cloud), the fully managed cloud service for Milvus, adjust the `uri` and `token`, which correspond to the [Public Endpoint and Api key](https://docs.zilliz.com/docs/on-zilliz-cloud-console#free-cluster-details) in Zilliz Cloud." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if the collection already exists and drop it if it does." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:29:02.997548Z", + "start_time": "2024-11-27T00:29:02.990325Z" + } + }, + "outputs": [], + "source": [ + "if milvus_client.has_collection(collection_name):\n", + " milvus_client.drop_collection(collection_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a new collection with specified parameters. \n", + "\n", + "If we don't specify any field information, Milvus will automatically create a default `id` field for primary key, and a `vector` field to store the vector data. A reserved JSON field is used to store non-schema-defined fields and their values." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:29:07.392215Z", + "start_time": "2024-11-27T00:29:06.877218Z" + } + }, + "outputs": [], + "source": [ + "milvus_client.create_collection(\n", + " collection_name=collection_name,\n", + " dimension=embedding_dim,\n", + " metric_type=\"IP\", # Inner product distance\n", + " consistency_level=\"Strong\", # Strong consistency level\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insert data\n", + "Iterate through the text lines, create embeddings, and then insert the data into Milvus.\n", + "\n", + "Here is a new field `text`, which is a non-defined field in the collection schema. It will be automatically added to the reserved JSON dynamic field, which can be treated as a normal field at a high level." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:29:27.468575Z", + "start_time": "2024-11-27T00:29:08.982855Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Creating embeddings: 100%|██████████| 72/72 [00:18<00:00, 3.93it/s]\n" + ] + }, + { + "data": { + "text/plain": "{'insert_count': 72, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], 'cost': 0}" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from tqdm import tqdm\n", + "\n", + "data = []\n", + "\n", + "for i, line in enumerate(tqdm(text_lines, desc=\"Creating embeddings\")):\n", + " data.append({\"id\": i, \"vector\": emb_text(line), \"text\": line})\n", + "\n", + "milvus_client.insert(collection_name=collection_name, data=data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build RAG\n", + "\n", + "### Retrieve data for a query\n", + "\n", + "Let's specify a frequent question about Milvus." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:29:31.035877Z", + "start_time": "2024-11-27T00:29:31.031185Z" + } + }, + "outputs": [], + "source": [ + "question = \"How is data stored in milvus?\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Search for the question in the collection and retrieve the semantic top-3 matches." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:29:38.986015Z", + "start_time": "2024-11-27T00:29:38.064651Z" + } + }, + "outputs": [], + "source": [ + "search_res = milvus_client.search(\n", + " collection_name=collection_name,\n", + " data=[\n", + " emb_text(question)\n", + " ], # Use the `emb_text` function to convert the question to an embedding vector\n", + " limit=3, # Return top 3 results\n", + " search_params={\"metric_type\": \"IP\", \"params\": {}}, # Inner product distance\n", + " output_fields=[\"text\"], # Return the text field\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at the search results of the query\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:29:40.575325Z", + "start_time": "2024-11-27T00:29:40.569159Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " [\n", + " \"Does the query perform in memory? What are incremental data and historical data?\\n\\nYes. When a query request comes, Milvus searches both incremental data and historical data by loading them into memory. Incremental data are in the growing segments, which are buffered in memory before they reach the threshold to be persisted in storage engine, while historical data are from the sealed segments that are stored in the object storage. Incremental data and historical data together constitute the whole dataset to search.\\n\\n###\",\n", + " 0.4488127529621124\n", + " ],\n", + " [\n", + " \"Is Milvus available for concurrent search?\\n\\nYes. For queries on the same collection, Milvus concurrently searches the incremental and historical data. However, queries on different collections are conducted in series. Whereas the historical data can be an extremely huge dataset, searches on the historical data are relatively more time-consuming and essentially performed in series.\\n\\n###\",\n", + " 0.4455088973045349\n", + " ],\n", + " [\n", + " \"Does Milvus support inserting and searching data simultaneously?\\n\\nYes. Insert operations and query operations are handled by two separate modules that are mutually independent. From the client\\u2019s perspective, an insert operation is complete when the inserted data enters the message queue. However, inserted data are unsearchable until they are loaded to the query node. If the segment size does not reach the index-building threshold (512 MB by default), Milvus resorts to brute-force search and query performance may be diminished.\\n\\n###\",\n", + " 0.4366944432258606\n", + " ]\n", + "]\n" + ] + } + ], + "source": [ + "import json\n", + "\n", + "retrieved_lines_with_distances = [\n", + " (res[\"entity\"][\"text\"], res[\"distance\"]) for res in search_res[0]\n", + "]\n", + "print(json.dumps(retrieved_lines_with_distances, indent=4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use LLM to get a RAG response\n", + "\n", + "Convert the retrieved documents into a string format." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:30:53.192432Z", + "start_time": "2024-11-27T00:30:53.187596Z" + } + }, + "outputs": [], + "source": [ + "context = \"\\n\".join(\n", + " [line_with_distance[0] for line_with_distance in retrieved_lines_with_distances]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define system and user prompts for the Lanage Model. This prompt is assembled with the retrieved documents from Milvus." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-27T00:30:54.944338Z", + "start_time": "2024-11-27T00:30:54.937625Z" + } + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.\n", + "\"\"\"\n", + "USER_PROMPT = f\"\"\"\n", + "Use the following pieces of information enclosed in tags to provide an answer to the question enclosed in tags.\n", + "\n", + "{context}\n", + "\n", + "\n", + "{question}\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the `llama3.2` model provided by Ollama to generate a response based on the prompts.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2024-11-27T00:31:00.160024Z", + "start_time": "2024-11-27T00:30:56.879973Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the provided information, here's an answer to the question:\n", + "\n", + "Data in Milvus are stored in two segments: incremental data and historical data. \n", + "\n", + "- Incremental data are stored in growing segments that are buffered in memory before they reach the threshold to be persisted in storage engine.\n", + "- Historical data are stored in sealed segments that are stored in object storage.\n", + "\n", + "These two types of data together constitute the whole dataset to search.\n" + ] + } + ], + "source": [ + "from ollama import chat\n", + "from ollama import ChatResponse\n", + "\n", + "response: ChatResponse = chat(model='llama3.2', messages=[\n", + " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", + " {\"role\": \"user\", \"content\": USER_PROMPT},\n", + " ]\n", + ")\n", + "print(response['message']['content'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great! We have successfully built a RAG pipeline with Milvus and Ollama." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From c40bd517e2c97fa89108ffcacb6d3848ab13ce21 Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Tue, 26 Nov 2024 19:46:14 -0500 Subject: [PATCH 2/5] build rag with milvus and ollama tutorial: modified embedding function --- .../build_RAG_with_milvus_and_ollama.ipynb | 192 +++++++++--------- 1 file changed, 94 insertions(+), 98 deletions(-) diff --git a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb index e2b4c2b15..01d159719 100644 --- a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb +++ b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb @@ -33,53 +33,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "vscode": { "languageId": "shellscript" - }, - "ExecuteTime": { - "end_time": "2024-11-27T00:28:41.664145Z", - "start_time": "2024-11-27T00:28:40.979648Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pymilvus in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (2.4.9)\r\n", - "Requirement already satisfied: ollama in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (0.4.1)\r\n", - "Requirement already satisfied: setuptools>69 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (75.1.0)\r\n", - "Requirement already satisfied: grpcio>=1.49.1 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (1.68.0)\r\n", - "Requirement already satisfied: protobuf>=3.20.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (4.25.5)\r\n", - "Requirement already satisfied: environs<=9.5.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (9.5.0)\r\n", - "Requirement already satisfied: ujson>=2.0.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (5.10.0)\r\n", - "Requirement already satisfied: pandas>=1.2.4 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (2.2.3)\r\n", - "Requirement already satisfied: milvus-lite<2.5.0,>=2.4.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pymilvus) (2.4.10)\r\n", - "Requirement already satisfied: httpx<0.28.0,>=0.27.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from ollama) (0.27.0)\r\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.9.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from ollama) (2.10.2)\r\n", - "Requirement already satisfied: marshmallow>=3.0.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from environs<=9.5.0->pymilvus) (3.23.1)\r\n", - "Requirement already satisfied: python-dotenv in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from environs<=9.5.0->pymilvus) (1.0.1)\r\n", - "Requirement already satisfied: anyio in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (4.6.2)\r\n", - "Requirement already satisfied: certifi in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (2024.8.30)\r\n", - "Requirement already satisfied: httpcore==1.* in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.0.2)\r\n", - "Requirement already satisfied: idna in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.7)\r\n", - "Requirement already satisfied: sniffio in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.3.0)\r\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama) (0.14.0)\r\n", - "Requirement already satisfied: tqdm in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from milvus-lite<2.5.0,>=2.4.0->pymilvus) (4.67.0)\r\n", - "Requirement already satisfied: numpy>=1.23.2 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (1.26.4)\r\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (2.9.0.post0)\r\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (2024.1)\r\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pandas>=1.2.4->pymilvus) (2024.2)\r\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.7.0)\r\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (2.27.1)\r\n", - "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.9.0->ollama) (4.12.2)\r\n", - "Requirement already satisfied: packaging>=17.0 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from marshmallow>=3.0.0->environs<=9.5.0->pymilvus) (24.1)\r\n", - "Requirement already satisfied: six>=1.5 in /Users/jinhonglin/anaconda3/envs/myenv/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.2.4->pymilvus) (1.16.0)\r\n" - ] - } - ], + "outputs": [], "source": [ "! pip install pymilvus ollama" ] @@ -106,14 +66,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "vscode": { "languageId": "shellscript" }, "ExecuteTime": { - "end_time": "2024-11-27T00:28:47.636969Z", - "start_time": "2024-11-27T00:28:46.764200Z" + "end_time": "2024-11-27T00:44:58.181149Z", + "start_time": "2024-11-27T00:44:57.228382Z" } }, "outputs": [ @@ -121,21 +81,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-11-26 19:28:46-- https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\r\n", - "Resolving github.com (github.com)... 140.82.114.3\r\n", - "Connecting to github.com (github.com)|140.82.114.3|:443... connected.\r\n", + "--2024-11-26 19:44:57-- https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\r\n", + "Resolving github.com (github.com)... 140.82.113.4\r\n", + "Connecting to github.com (github.com)|140.82.113.4|:443... connected.\r\n", "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T002847Z&X-Amz-Expires=300&X-Amz-Signature=ba4dfe2429fa286e39303161ff97e2026f24d2ddda65c45091aa19970e97bfc2&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream [following]\r\n", - "--2024-11-26 19:28:46-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T002847Z&X-Amz-Expires=300&X-Amz-Signature=ba4dfe2429fa286e39303161ff97e2026f24d2ddda65c45091aa19970e97bfc2&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream\r\n", + "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T004457Z&X-Amz-Expires=300&X-Amz-Signature=506c30c65724ab7e7ebab4b6d2dec8ab849219e544049385edef378d6f9f9ff3&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream [following]\r\n", + "--2024-11-26 19:44:57-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T004457Z&X-Amz-Expires=300&X-Amz-Signature=506c30c65724ab7e7ebab4b6d2dec8ab849219e544049385edef378d6f9f9ff3&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream\r\n", "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\r\n", "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.110.133|:443... connected.\r\n", "HTTP request sent, awaiting response... 200 OK\r\n", "Length: 613094 (599K) [application/octet-stream]\r\n", "Saving to: ‘milvus_docs_2.4.x_en.zip’\r\n", "\r\n", - "milvus_docs_2.4.x_e 100%[===================>] 598.72K 2.08MB/s in 0.3s \r\n", + "milvus_docs_2.4.x_e 100%[===================>] 598.72K 1.75MB/s in 0.3s \r\n", "\r\n", - "2024-11-26 19:28:47 (2.08 MB/s) - ‘milvus_docs_2.4.x_en.zip’ saved [613094/613094]\r\n", + "2024-11-26 19:44:57 (1.75 MB/s) - ‘milvus_docs_2.4.x_en.zip’ saved [613094/613094]\r\n", "\r\n" ] } @@ -154,11 +114,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:28:50.837060Z", - "start_time": "2024-11-27T00:28:50.831879Z" + "end_time": "2024-11-27T00:45:00.235620Z", + "start_time": "2024-11-27T00:45:00.229892Z" } }, "outputs": [], @@ -180,25 +140,64 @@ "source": [ "### Prepare the LLM and Embedding Model\n", "\n", - "We initialize a function to prepare and interact with the embedding model. Ollama's `embed` API enables efficient local embedding generation without requiring cloud dependencies.\n" + "Ollama supports several embedding models, making it easy to build retrieval-augmented generation (RAG) applications. \n", + "\n", + "#### Example Embedding Models\n", + "| Model | Parameter Size |\n", + "|---------------------|----------------|\n", + "| `mxbai-embed-large` | 334M |\n", + "| `nomic-embed-text` | 137M |\n", + "| `all-minilm` | 23M |\n", + "\n", + "To generate vector embeddings, first pull the desired model:" ] }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[?25lpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠼ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠴ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠦ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠧ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠇ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠏ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest \r\n", + "pulling 819c2adf5ce6... 100% ▕████████████████▏ 669 MB \r\n", + "pulling c71d239df917... 100% ▕████████████████▏ 11 KB \r\n", + "pulling b837481ff855... 100% ▕████████████████▏ 16 B \r\n", + "pulling 38badd946f91... 100% ▕████████████████▏ 408 B \r\n", + "verifying sha256 digest \r\n", + "writing manifest \r\n", + "success \u001B[?25h\r\n" + ] + } + ], + "source": [ + "! ollama pull mxbai-embed-large" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-27T00:45:03.445904Z", + "start_time": "2024-11-27T00:45:01.839287Z" + } + }, + "execution_count": 3 + }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:28:53.000116Z", - "start_time": "2024-11-27T00:28:52.776698Z" + "end_time": "2024-11-27T00:45:05.001727Z", + "start_time": "2024-11-27T00:45:04.779258Z" } }, "outputs": [], "source": [ - "from ollama import embed\n", + "import ollama \n", "\n", "def emb_text(text):\n", - " response = embed(model=\"llama3.2\", input=text)\n", - " return response[\"embeddings\"][0]" + " response = ollama.embeddings(model=\"mxbai-embed-large\", prompt=text)\n", + " return response[\"embedding\"]" ] }, { @@ -213,8 +212,8 @@ "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:28:56.283931Z", - "start_time": "2024-11-27T00:28:54.342810Z" + "end_time": "2024-11-27T00:45:16.665318Z", + "start_time": "2024-11-27T00:45:15.913156Z" } }, "outputs": [ @@ -222,8 +221,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "3072\n", - "[-0.04002771, -0.026724102, 0.026219232, 0.013881813, 0.018835567, 0.0034403328, 0.0030376604, 0.04784506, -0.025060177, -0.02351082]\n" + "1024\n", + "[0.23276396095752716, 0.4257211685180664, 0.19724100828170776, 0.46120673418045044, -0.46039995551109314, -0.1413791924715042, -0.18261606991291046, -0.07602324336767197, 0.39991313219070435, 0.8337644338607788]\n" ] } ], @@ -253,8 +252,8 @@ "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:29:00.032753Z", - "start_time": "2024-11-27T00:28:58.636849Z" + "end_time": "2024-11-27T00:45:20.893063Z", + "start_time": "2024-11-27T00:45:19.275761Z" } }, "outputs": [], @@ -290,8 +289,8 @@ "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:29:02.997548Z", - "start_time": "2024-11-27T00:29:02.990325Z" + "end_time": "2024-11-27T00:45:22.173403Z", + "start_time": "2024-11-27T00:45:22.166288Z" } }, "outputs": [], @@ -314,8 +313,8 @@ "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:29:07.392215Z", - "start_time": "2024-11-27T00:29:06.877218Z" + "end_time": "2024-11-27T00:45:24.073034Z", + "start_time": "2024-11-27T00:45:23.557970Z" } }, "outputs": [], @@ -343,8 +342,8 @@ "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:29:27.468575Z", - "start_time": "2024-11-27T00:29:08.982855Z" + "end_time": "2024-11-27T00:45:28.536601Z", + "start_time": "2024-11-27T00:45:25.242813Z" } }, "outputs": [ @@ -352,7 +351,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Creating embeddings: 100%|██████████| 72/72 [00:18<00:00, 3.93it/s]\n" + "Creating embeddings: 100%|██████████| 72/72 [00:03<00:00, 22.48it/s]\n" ] }, { @@ -391,8 +390,8 @@ "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:29:31.035877Z", - "start_time": "2024-11-27T00:29:31.031185Z" + "end_time": "2024-11-27T00:45:30.019679Z", + "start_time": "2024-11-27T00:45:30.012806Z" } }, "outputs": [], @@ -412,8 +411,8 @@ "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:29:38.986015Z", - "start_time": "2024-11-27T00:29:38.064651Z" + "end_time": "2024-11-27T00:45:32.515409Z", + "start_time": "2024-11-27T00:45:32.433024Z" } }, "outputs": [], @@ -441,8 +440,8 @@ "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:29:40.575325Z", - "start_time": "2024-11-27T00:29:40.569159Z" + "end_time": "2024-11-27T00:45:33.712461Z", + "start_time": "2024-11-27T00:45:33.707456Z" } }, "outputs": [ @@ -452,16 +451,16 @@ "text": [ "[\n", " [\n", - " \"Does the query perform in memory? What are incremental data and historical data?\\n\\nYes. When a query request comes, Milvus searches both incremental data and historical data by loading them into memory. Incremental data are in the growing segments, which are buffered in memory before they reach the threshold to be persisted in storage engine, while historical data are from the sealed segments that are stored in the object storage. Incremental data and historical data together constitute the whole dataset to search.\\n\\n###\",\n", - " 0.4488127529621124\n", + " \" Where does Milvus store data?\\n\\nMilvus deals with two types of data, inserted data and metadata. \\n\\nInserted data, including vector data, scalar data, and collection-specific schema, are stored in persistent storage as incremental log. Milvus supports multiple object storage backends, including [MinIO](https://min.io/), [AWS S3](https://aws.amazon.com/s3/?nc1=h_ls), [Google Cloud Storage](https://cloud.google.com/storage?hl=en#object-storage-for-companies-of-all-sizes) (GCS), [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs), [Alibaba Cloud OSS](https://www.alibabacloud.com/product/object-storage-service), and [Tencent Cloud Object Storage](https://www.tencentcloud.com/products/cos) (COS).\\n\\nMetadata are generated within Milvus. Each Milvus module has its own metadata that are stored in etcd.\\n\\n###\",\n", + " 231.9398193359375\n", " ],\n", " [\n", - " \"Is Milvus available for concurrent search?\\n\\nYes. For queries on the same collection, Milvus concurrently searches the incremental and historical data. However, queries on different collections are conducted in series. Whereas the historical data can be an extremely huge dataset, searches on the historical data are relatively more time-consuming and essentially performed in series.\\n\\n###\",\n", - " 0.4455088973045349\n", + " \"How does Milvus flush data?\\n\\nMilvus returns success when inserted data are loaded to the message queue. However, the data are not yet flushed to the disk. Then Milvus' data node writes the data in the message queue to persistent storage as incremental logs. If `flush()` is called, the data node is forced to write all data in the message queue to persistent storage immediately.\\n\\n###\",\n", + " 226.48316955566406\n", " ],\n", " [\n", - " \"Does Milvus support inserting and searching data simultaneously?\\n\\nYes. Insert operations and query operations are handled by two separate modules that are mutually independent. From the client\\u2019s perspective, an insert operation is complete when the inserted data enters the message queue. However, inserted data are unsearchable until they are loaded to the query node. If the segment size does not reach the index-building threshold (512 MB by default), Milvus resorts to brute-force search and query performance may be diminished.\\n\\n###\",\n", - " 0.4366944432258606\n", + " \"What is the maximum dataset size Milvus can handle?\\n\\n \\nTheoretically, the maximum dataset size Milvus can handle is determined by the hardware it is run on, specifically system memory and storage:\\n\\n- Milvus loads all specified collections and partitions into memory before running queries. Therefore, memory size determines the maximum amount of data Milvus can query.\\n- When new entities and and collection-related schema (currently only MinIO is supported for data persistence) are added to Milvus, system storage determines the maximum allowable size of inserted data.\\n\\n###\",\n", + " 210.60745239257812\n", " ]\n", "]\n" ] @@ -490,8 +489,8 @@ "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:30:53.192432Z", - "start_time": "2024-11-27T00:30:53.187596Z" + "end_time": "2024-11-27T00:45:36.228542Z", + "start_time": "2024-11-27T00:45:36.222780Z" } }, "outputs": [], @@ -513,8 +512,8 @@ "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:30:54.944338Z", - "start_time": "2024-11-27T00:30:54.937625Z" + "end_time": "2024-11-27T00:45:37.092446Z", + "start_time": "2024-11-27T00:45:37.089081Z" } }, "outputs": [], @@ -548,8 +547,8 @@ "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-11-27T00:31:00.160024Z", - "start_time": "2024-11-27T00:30:56.879973Z" + "end_time": "2024-11-27T00:45:44.528788Z", + "start_time": "2024-11-27T00:45:39.509786Z" } }, "outputs": [ @@ -557,14 +556,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Based on the provided information, here's an answer to the question:\n", - "\n", - "Data in Milvus are stored in two segments: incremental data and historical data. \n", + "According to the contextual passage snippet, Milvus stores its data in two types:\n", "\n", - "- Incremental data are stored in growing segments that are buffered in memory before they reach the threshold to be persisted in storage engine.\n", - "- Historical data are stored in sealed segments that are stored in object storage.\n", + "1. **Inserted data**: This includes vector data, scalar data, and collection-specific schema. It is stored in persistent storage as incremental logs using multiple object storage backends such as MinIO, AWS S3, Google Cloud Storage (GCS), Azure Blob Storage, Alibaba Cloud OSS, and Tencent Cloud Object Storage.\n", "\n", - "These two types of data together constitute the whole dataset to search.\n" + "2. **Metadata**: This is generated within Milvus and each module has its own metadata that are stored in etcd.\n" ] } ], From 79e7ffeb7464f18802ad1632c6581dd3cc2bcf26 Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Tue, 26 Nov 2024 19:48:48 -0500 Subject: [PATCH 3/5] build rag with milvus and ollama tutorial: run black --- .../build_RAG_with_milvus_and_ollama.ipynb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb index 01d159719..08c7e9922 100644 --- a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb +++ b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb @@ -159,14 +159,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[?25lpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠼ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠴ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠦ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠧ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠇ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠏ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest \r\n", + "\u001b[?25lpulling manifest ⠋ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠙ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠹ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠸ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠼ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠴ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠦ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠧ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠇ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠏ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠋ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠙ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠹ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠸ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest \r\n", "pulling 819c2adf5ce6... 100% ▕████████████████▏ 669 MB \r\n", "pulling c71d239df917... 100% ▕████████████████▏ 11 KB \r\n", "pulling b837481ff855... 100% ▕████████████████▏ 16 B \r\n", "pulling 38badd946f91... 100% ▕████████████████▏ 408 B \r\n", "verifying sha256 digest \r\n", "writing manifest \r\n", - "success \u001B[?25h\r\n" + "success \u001b[?25h\r\n" ] } ], @@ -193,7 +193,8 @@ }, "outputs": [], "source": [ - "import ollama \n", + "import ollama\n", + "\n", "\n", "def emb_text(text):\n", " response = ollama.embeddings(model=\"mxbai-embed-large\", prompt=text)\n", @@ -568,12 +569,14 @@ "from ollama import chat\n", "from ollama import ChatResponse\n", "\n", - "response: ChatResponse = chat(model='llama3.2', messages=[\n", + "response: ChatResponse = chat(\n", + " model=\"llama3.2\",\n", + " messages=[\n", " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", " {\"role\": \"user\", \"content\": USER_PROMPT},\n", - " ]\n", + " ],\n", ")\n", - "print(response['message']['content'])" + "print(response[\"message\"][\"content\"])" ] }, { From 10d42f52ef7b3647514f4920978f6391a304323e Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Tue, 26 Nov 2024 21:33:16 -0500 Subject: [PATCH 4/5] build rag with milvus and ollama tutorial: ollama pull --- .../build_RAG_with_milvus_and_ollama.ipynb | 170 +++++++++++------- 1 file changed, 107 insertions(+), 63 deletions(-) diff --git a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb index 08c7e9922..88a7041d0 100644 --- a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb +++ b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb @@ -66,14 +66,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": { "vscode": { "languageId": "shellscript" }, "ExecuteTime": { - "end_time": "2024-11-27T00:44:58.181149Z", - "start_time": "2024-11-27T00:44:57.228382Z" + "end_time": "2024-11-27T02:31:19.941547Z", + "start_time": "2024-11-27T02:31:18.944349Z" } }, "outputs": [ @@ -81,21 +81,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-11-26 19:44:57-- https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\r\n", - "Resolving github.com (github.com)... 140.82.113.4\r\n", - "Connecting to github.com (github.com)|140.82.113.4|:443... connected.\r\n", + "--2024-11-26 21:31:18-- https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\r\n", + "Resolving github.com (github.com)... 140.82.113.3\r\n", + "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\r\n", "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T004457Z&X-Amz-Expires=300&X-Amz-Signature=506c30c65724ab7e7ebab4b6d2dec8ab849219e544049385edef378d6f9f9ff3&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream [following]\r\n", - "--2024-11-26 19:44:57-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T004457Z&X-Amz-Expires=300&X-Amz-Signature=506c30c65724ab7e7ebab4b6d2dec8ab849219e544049385edef378d6f9f9ff3&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream\r\n", - "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\r\n", - "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.110.133|:443... connected.\r\n", + "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T023119Z&X-Amz-Expires=300&X-Amz-Signature=728fe1b2ff9ec0209b56038092558b2e76e8641f16cd8d73f55ded255dfaa883&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream [following]\r\n", + "--2024-11-26 21:31:19-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T023119Z&X-Amz-Expires=300&X-Amz-Signature=728fe1b2ff9ec0209b56038092558b2e76e8641f16cd8d73f55ded255dfaa883&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream\r\n", + "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...\r\n", + "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.109.133|:443... connected.\r\n", "HTTP request sent, awaiting response... 200 OK\r\n", "Length: 613094 (599K) [application/octet-stream]\r\n", "Saving to: ‘milvus_docs_2.4.x_en.zip’\r\n", "\r\n", - "milvus_docs_2.4.x_e 100%[===================>] 598.72K 1.75MB/s in 0.3s \r\n", + "milvus_docs_2.4.x_e 100%[===================>] 598.72K 1.84MB/s in 0.3s \r\n", "\r\n", - "2024-11-26 19:44:57 (1.75 MB/s) - ‘milvus_docs_2.4.x_en.zip’ saved [613094/613094]\r\n", + "2024-11-26 21:31:19 (1.84 MB/s) - ‘milvus_docs_2.4.x_en.zip’ saved [613094/613094]\r\n", "\r\n" ] } @@ -114,11 +114,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:00.235620Z", - "start_time": "2024-11-27T00:45:00.229892Z" + "end_time": "2024-11-27T02:31:21.292584Z", + "start_time": "2024-11-27T02:31:21.289126Z" } }, "outputs": [], @@ -159,14 +159,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[?25lpulling manifest ⠋ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠙ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠹ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠸ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠼ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠴ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠦ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠧ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠇ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠏ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠋ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠙ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠹ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠸ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest \r\n", + "\u001B[?25lpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠼ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠴ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest \r\n", "pulling 819c2adf5ce6... 100% ▕████████████████▏ 669 MB \r\n", "pulling c71d239df917... 100% ▕████████████████▏ 11 KB \r\n", "pulling b837481ff855... 100% ▕████████████████▏ 16 B \r\n", "pulling 38badd946f91... 100% ▕████████████████▏ 408 B \r\n", "verifying sha256 digest \r\n", "writing manifest \r\n", - "success \u001b[?25h\r\n" + "success \u001B[?25h\r\n" ] } ], @@ -176,19 +176,19 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-11-27T00:45:03.445904Z", - "start_time": "2024-11-27T00:45:01.839287Z" + "end_time": "2024-11-27T02:31:23.218390Z", + "start_time": "2024-11-27T02:31:22.412385Z" } }, - "execution_count": 3 + "execution_count": 4 }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:05.001727Z", - "start_time": "2024-11-27T00:45:04.779258Z" + "end_time": "2024-11-27T02:31:24.155302Z", + "start_time": "2024-11-27T02:31:23.911501Z" } }, "outputs": [], @@ -210,11 +210,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:16.665318Z", - "start_time": "2024-11-27T00:45:15.913156Z" + "end_time": "2024-11-27T02:31:24.995157Z", + "start_time": "2024-11-27T02:31:24.888531Z" } }, "outputs": [ @@ -250,11 +250,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:20.893063Z", - "start_time": "2024-11-27T00:45:19.275761Z" + "end_time": "2024-11-27T02:31:28.149547Z", + "start_time": "2024-11-27T02:31:26.423307Z" } }, "outputs": [], @@ -287,11 +287,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:22.173403Z", - "start_time": "2024-11-27T00:45:22.166288Z" + "end_time": "2024-11-27T02:31:29.781316Z", + "start_time": "2024-11-27T02:31:29.777964Z" } }, "outputs": [], @@ -311,11 +311,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:24.073034Z", - "start_time": "2024-11-27T00:45:23.557970Z" + "end_time": "2024-11-27T02:31:31.170874Z", + "start_time": "2024-11-27T02:31:30.658968Z" } }, "outputs": [], @@ -340,11 +340,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:28.536601Z", - "start_time": "2024-11-27T00:45:25.242813Z" + "end_time": "2024-11-27T02:31:34.695446Z", + "start_time": "2024-11-27T02:31:31.429091Z" } }, "outputs": [ @@ -352,14 +352,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Creating embeddings: 100%|██████████| 72/72 [00:03<00:00, 22.48it/s]\n" + "Creating embeddings: 100%|██████████| 72/72 [00:03<00:00, 22.58it/s]\n" ] }, { "data": { "text/plain": "{'insert_count': 72, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], 'cost': 0}" }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -388,11 +388,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:30.019679Z", - "start_time": "2024-11-27T00:45:30.012806Z" + "end_time": "2024-11-27T02:31:34.700660Z", + "start_time": "2024-11-27T02:31:34.695886Z" } }, "outputs": [], @@ -409,11 +409,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:32.515409Z", - "start_time": "2024-11-27T00:45:32.433024Z" + "end_time": "2024-11-27T02:31:35.646122Z", + "start_time": "2024-11-27T02:31:35.599692Z" } }, "outputs": [], @@ -438,11 +438,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:33.712461Z", - "start_time": "2024-11-27T00:45:33.707456Z" + "end_time": "2024-11-27T02:31:36.523433Z", + "start_time": "2024-11-27T02:31:36.518040Z" } }, "outputs": [ @@ -487,11 +487,11 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:36.228542Z", - "start_time": "2024-11-27T00:45:36.222780Z" + "end_time": "2024-11-27T02:31:37.829743Z", + "start_time": "2024-11-27T02:31:37.825794Z" } }, "outputs": [], @@ -510,11 +510,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T00:45:37.092446Z", - "start_time": "2024-11-27T00:45:37.089081Z" + "end_time": "2024-11-27T02:31:38.696682Z", + "start_time": "2024-11-27T02:31:38.693360Z" } }, "outputs": [], @@ -537,19 +537,50 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Use the `llama3.2` model provided by Ollama to generate a response based on the prompts.\n" + "Use the `llama3.2:1b` model provided by Ollama to generate a response based on the prompts. Let's pull the model first.\n" ] }, { "cell_type": "code", - "execution_count": 15, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[?25lpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠼ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest \r\n", + "pulling 74701a8c35f6... 100% ▕████████████████▏ 1.3 GB \r\n", + "pulling 966de95ca8a6... 100% ▕████████████████▏ 1.4 KB \r\n", + "pulling fcc5a6bec9da... 100% ▕████████████████▏ 7.7 KB \r\n", + "pulling a70ff7e570d9... 100% ▕████████████████▏ 6.0 KB \r\n", + "pulling 4f659a1e86d7... 100% ▕████████████████▏ 485 B \r\n", + "verifying sha256 digest \r\n", + "writing manifest \r\n", + "success \u001B[?25h\r\n" + ] + } + ], + "source": [ + "! ollama pull llama3.2:1b" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-27T02:31:43.997697Z", + "start_time": "2024-11-27T02:31:40.274056Z" + } + }, + "execution_count": 16 + }, + { + "cell_type": "code", + "execution_count": 18, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-11-27T00:45:44.528788Z", - "start_time": "2024-11-27T00:45:39.509786Z" + "end_time": "2024-11-27T02:32:27.646330Z", + "start_time": "2024-11-27T02:32:25.070759Z" } }, "outputs": [ @@ -557,11 +588,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "According to the contextual passage snippet, Milvus stores its data in two types:\n", + "\n", + "\n", + "Milvus stores data in two main categories:\n", + "\n", + "1. **Inserted data**: This includes vector data, scalar data, and collection-specific schema that are persisted as incremental logs on a persistent storage system such as:\n", + " * MinIO\n", + " * AWS S3\n", + " * Google Cloud Storage (GCS)\n", + " * Azure Blob Storage\n", + " * Alibaba Cloud OSS\n", + " * Tencent Cloud Object Storage (COS)\n", + "\n", + "2. **Metadata**: These are generated within Milvus and stored in an etcd module, which is a distributed key-value store.\n", "\n", - "1. **Inserted data**: This includes vector data, scalar data, and collection-specific schema. It is stored in persistent storage as incremental logs using multiple object storage backends such as MinIO, AWS S3, Google Cloud Storage (GCS), Azure Blob Storage, Alibaba Cloud OSS, and Tencent Cloud Object Storage.\n", + "In terms of data storage size, the maximum dataset size that Milvus can handle is determined by the hardware it is run on, specifically:\n", "\n", - "2. **Metadata**: This is generated within Milvus and each module has its own metadata that are stored in etcd.\n" + "* System memory: This determines the maximum amount of data Milvus can query.\n", + "* Storage: When new entities and collection-related schema are added to Milvus, system storage also determines the maximum allowable size of inserted data.\n" ] } ], @@ -570,7 +614,7 @@ "from ollama import ChatResponse\n", "\n", "response: ChatResponse = chat(\n", - " model=\"llama3.2\",\n", + " model=\"llama3.2:1b\",\n", " messages=[\n", " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", " {\"role\": \"user\", \"content\": USER_PROMPT},\n", From 8841debc9daeeec135712c1b992038a08c7e53da Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Tue, 26 Nov 2024 21:49:13 -0500 Subject: [PATCH 5/5] build rag with milvus and ollama tutorial: llama3.2 (3B) --- .../build_RAG_with_milvus_and_ollama.ipynb | 193 +++++++++--------- 1 file changed, 93 insertions(+), 100 deletions(-) diff --git a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb index 88a7041d0..9374b76c0 100644 --- a/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb +++ b/bootcamp/tutorials/integration/build_RAG_with_milvus_and_ollama.ipynb @@ -66,14 +66,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "vscode": { "languageId": "shellscript" }, "ExecuteTime": { - "end_time": "2024-11-27T02:31:19.941547Z", - "start_time": "2024-11-27T02:31:18.944349Z" + "end_time": "2024-11-27T02:47:21.129074Z", + "start_time": "2024-11-27T02:47:19.934551Z" } }, "outputs": [ @@ -81,21 +81,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-11-26 21:31:18-- https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\r\n", - "Resolving github.com (github.com)... 140.82.113.3\r\n", - "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\r\n", + "--2024-11-26 21:47:19-- https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\r\n", + "Resolving github.com (github.com)... 140.82.112.4\r\n", + "Connecting to github.com (github.com)|140.82.112.4|:443... connected.\r\n", "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T023119Z&X-Amz-Expires=300&X-Amz-Signature=728fe1b2ff9ec0209b56038092558b2e76e8641f16cd8d73f55ded255dfaa883&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream [following]\r\n", - "--2024-11-26 21:31:19-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T023119Z&X-Amz-Expires=300&X-Amz-Signature=728fe1b2ff9ec0209b56038092558b2e76e8641f16cd8d73f55ded255dfaa883&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream\r\n", + "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T024720Z&X-Amz-Expires=300&X-Amz-Signature=7808b77cbdaa7e122196bcd75a73f29f2540333a350c4830bbdf5f286e876304&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream [following]\r\n", + "--2024-11-26 21:47:20-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/267273319/c52902a0-e13c-4ca7-92e0-086751098a05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241127T024720Z&X-Amz-Expires=300&X-Amz-Signature=7808b77cbdaa7e122196bcd75a73f29f2540333a350c4830bbdf5f286e876304&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dmilvus_docs_2.4.x_en.zip&response-content-type=application%2Foctet-stream\r\n", "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...\r\n", "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.109.133|:443... connected.\r\n", "HTTP request sent, awaiting response... 200 OK\r\n", "Length: 613094 (599K) [application/octet-stream]\r\n", "Saving to: ‘milvus_docs_2.4.x_en.zip’\r\n", "\r\n", - "milvus_docs_2.4.x_e 100%[===================>] 598.72K 1.84MB/s in 0.3s \r\n", + "milvus_docs_2.4.x_e 100%[===================>] 598.72K 1.20MB/s in 0.5s \r\n", "\r\n", - "2024-11-26 21:31:19 (1.84 MB/s) - ‘milvus_docs_2.4.x_en.zip’ saved [613094/613094]\r\n", + "2024-11-26 21:47:20 (1.20 MB/s) - ‘milvus_docs_2.4.x_en.zip’ saved [613094/613094]\r\n", "\r\n" ] } @@ -114,11 +114,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:21.292584Z", - "start_time": "2024-11-27T02:31:21.289126Z" + "end_time": "2024-11-27T02:47:25.104740Z", + "start_time": "2024-11-27T02:47:25.101395Z" } }, "outputs": [], @@ -140,16 +140,12 @@ "source": [ "### Prepare the LLM and Embedding Model\n", "\n", - "Ollama supports several embedding models, making it easy to build retrieval-augmented generation (RAG) applications. \n", + "Ollama supports multiple models for both LLM-based tasks and embedding generation, making it easy to develop retrieval-augmented generation (RAG) applications. For this setup:\n", "\n", - "#### Example Embedding Models\n", - "| Model | Parameter Size |\n", - "|---------------------|----------------|\n", - "| `mxbai-embed-large` | 334M |\n", - "| `nomic-embed-text` | 137M |\n", - "| `all-minilm` | 23M |\n", + "- We will use **Llama 3.2 (3B)** as our LLM for text generation tasks.\n", + "- For embedding generation, we will use **mxbai-embed-large**, a 334M parameter model optimized for semantic similarity.\n", "\n", - "To generate vector embeddings, first pull the desired model:" + "Before starting, ensure both models are pulled locally:" ] }, { @@ -159,14 +155,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[?25lpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠼ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠴ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest \r\n", + "\u001b[?25lpulling manifest ⠋ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠙ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠹ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠸ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠼ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠴ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest \r\n", "pulling 819c2adf5ce6... 100% ▕████████████████▏ 669 MB \r\n", "pulling c71d239df917... 100% ▕████████████████▏ 11 KB \r\n", "pulling b837481ff855... 100% ▕████████████████▏ 16 B \r\n", "pulling 38badd946f91... 100% ▕████████████████▏ 408 B \r\n", "verifying sha256 digest \r\n", "writing manifest \r\n", - "success \u001B[?25h\r\n" + "success \u001b[?25h\r\n" ] } ], @@ -176,19 +172,60 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-11-27T02:31:23.218390Z", - "start_time": "2024-11-27T02:31:22.412385Z" + "end_time": "2024-11-27T02:47:27.543374Z", + "start_time": "2024-11-27T02:47:26.773196Z" + } + }, + "execution_count": 3 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[?25lpulling manifest ⠋ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠙ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠹ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠸ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠼ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest ⠴ \u001b[?25h\u001b[?25l\u001b[2K\u001b[1Gpulling manifest \r\n", + "pulling dde5aa3fc5ff... 100% ▕████████████████▏ 2.0 GB \r\n", + "pulling 966de95ca8a6... 100% ▕████████████████▏ 1.4 KB \r\n", + "pulling fcc5a6bec9da... 100% ▕████████████████▏ 7.7 KB \r\n", + "pulling a70ff7e570d9... 100% ▕████████████████▏ 6.0 KB \r\n", + "pulling 56bb8bd477a5... 100% ▕████████████████▏ 96 B \r\n", + "pulling 34bb5ab01051... 100% ▕████████████████▏ 561 B \r\n", + "verifying sha256 digest \r\n", + "writing manifest \r\n", + "success \u001b[?25h\r\n" + ] + } + ], + "source": [ + "! ollama pull llama3.2" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-27T02:47:28.821964Z", + "start_time": "2024-11-27T02:47:27.994522Z" } }, "execution_count": 4 }, + { + "cell_type": "markdown", + "source": [ + "With these models ready, we can proceed to implement LLM-driven generation and embedding-based retrieval workflows.\n" + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:24.155302Z", - "start_time": "2024-11-27T02:31:23.911501Z" + "end_time": "2024-11-27T02:47:37.869891Z", + "start_time": "2024-11-27T02:47:37.637416Z" } }, "outputs": [], @@ -213,8 +250,8 @@ "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:24.995157Z", - "start_time": "2024-11-27T02:31:24.888531Z" + "end_time": "2024-11-27T02:47:40.957739Z", + "start_time": "2024-11-27T02:47:40.093056Z" } }, "outputs": [ @@ -253,8 +290,8 @@ "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:28.149547Z", - "start_time": "2024-11-27T02:31:26.423307Z" + "end_time": "2024-11-27T02:47:43.669801Z", + "start_time": "2024-11-27T02:47:42.118638Z" } }, "outputs": [], @@ -290,8 +327,8 @@ "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:29.781316Z", - "start_time": "2024-11-27T02:31:29.777964Z" + "end_time": "2024-11-27T02:47:45.796899Z", + "start_time": "2024-11-27T02:47:45.787086Z" } }, "outputs": [], @@ -314,8 +351,8 @@ "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:31.170874Z", - "start_time": "2024-11-27T02:31:30.658968Z" + "end_time": "2024-11-27T02:47:47.144411Z", + "start_time": "2024-11-27T02:47:46.620312Z" } }, "outputs": [], @@ -343,8 +380,8 @@ "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:34.695446Z", - "start_time": "2024-11-27T02:31:31.429091Z" + "end_time": "2024-11-27T02:47:51.481223Z", + "start_time": "2024-11-27T02:47:48.221138Z" } }, "outputs": [ @@ -352,7 +389,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Creating embeddings: 100%|██████████| 72/72 [00:03<00:00, 22.58it/s]\n" + "Creating embeddings: 100%|██████████| 72/72 [00:03<00:00, 22.56it/s]\n" ] }, { @@ -391,8 +428,8 @@ "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:34.700660Z", - "start_time": "2024-11-27T02:31:34.695886Z" + "end_time": "2024-11-27T02:47:51.983084Z", + "start_time": "2024-11-27T02:47:51.977698Z" } }, "outputs": [], @@ -412,8 +449,8 @@ "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:35.646122Z", - "start_time": "2024-11-27T02:31:35.599692Z" + "end_time": "2024-11-27T02:47:53.074097Z", + "start_time": "2024-11-27T02:47:52.987898Z" } }, "outputs": [], @@ -441,8 +478,8 @@ "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:36.523433Z", - "start_time": "2024-11-27T02:31:36.518040Z" + "end_time": "2024-11-27T02:47:54.530671Z", + "start_time": "2024-11-27T02:47:54.525077Z" } }, "outputs": [ @@ -490,8 +527,8 @@ "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:37.829743Z", - "start_time": "2024-11-27T02:31:37.825794Z" + "end_time": "2024-11-27T02:47:56.619344Z", + "start_time": "2024-11-27T02:47:56.614058Z" } }, "outputs": [], @@ -513,8 +550,8 @@ "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2024-11-27T02:31:38.696682Z", - "start_time": "2024-11-27T02:31:38.693360Z" + "end_time": "2024-11-27T02:47:57.596480Z", + "start_time": "2024-11-27T02:47:57.592721Z" } }, "outputs": [], @@ -537,50 +574,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Use the `llama3.2:1b` model provided by Ollama to generate a response based on the prompts. Let's pull the model first.\n" + "Use the `llama3.2` model provided by Ollama to generate a response based on the prompts.\n" ] }, { "cell_type": "code", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001B[?25lpulling manifest ⠋ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠙ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠹ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠸ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest ⠼ \u001B[?25h\u001B[?25l\u001B[2K\u001B[1Gpulling manifest \r\n", - "pulling 74701a8c35f6... 100% ▕████████████████▏ 1.3 GB \r\n", - "pulling 966de95ca8a6... 100% ▕████████████████▏ 1.4 KB \r\n", - "pulling fcc5a6bec9da... 100% ▕████████████████▏ 7.7 KB \r\n", - "pulling a70ff7e570d9... 100% ▕████████████████▏ 6.0 KB \r\n", - "pulling 4f659a1e86d7... 100% ▕████████████████▏ 485 B \r\n", - "verifying sha256 digest \r\n", - "writing manifest \r\n", - "success \u001B[?25h\r\n" - ] - } - ], - "source": [ - "! ollama pull llama3.2:1b" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-11-27T02:31:43.997697Z", - "start_time": "2024-11-27T02:31:40.274056Z" - } - }, - "execution_count": 16 - }, - { - "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-11-27T02:32:27.646330Z", - "start_time": "2024-11-27T02:32:25.070759Z" + "end_time": "2024-11-27T02:48:03.947222Z", + "start_time": "2024-11-27T02:48:00.029787Z" } }, "outputs": [ @@ -588,24 +594,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "\n", - "Milvus stores data in two main categories:\n", - "\n", - "1. **Inserted data**: This includes vector data, scalar data, and collection-specific schema that are persisted as incremental logs on a persistent storage system such as:\n", - " * MinIO\n", - " * AWS S3\n", - " * Google Cloud Storage (GCS)\n", - " * Azure Blob Storage\n", - " * Alibaba Cloud OSS\n", - " * Tencent Cloud Object Storage (COS)\n", - "\n", - "2. **Metadata**: These are generated within Milvus and stored in an etcd module, which is a distributed key-value store.\n", + "According to the provided context, data in Milvus is stored in two types:\n", "\n", - "In terms of data storage size, the maximum dataset size that Milvus can handle is determined by the hardware it is run on, specifically:\n", + "1. **Inserted data**: Storing data in persistent storage as incremental log. It supports multiple object storage backends such as MinIO, AWS S3, Google Cloud Storage (GCS), Azure Blob Storage, Alibaba Cloud OSS, and Tencent Cloud Object Storage.\n", "\n", - "* System memory: This determines the maximum amount of data Milvus can query.\n", - "* Storage: When new entities and collection-related schema are added to Milvus, system storage also determines the maximum allowable size of inserted data.\n" + "2. **Metadata**: Generated within Milvus and stored in etcd.\n" ] } ], @@ -614,7 +607,7 @@ "from ollama import ChatResponse\n", "\n", "response: ChatResponse = chat(\n", - " model=\"llama3.2:1b\",\n", + " model=\"llama3.2\",\n", " messages=[\n", " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", " {\"role\": \"user\", \"content\": USER_PROMPT},\n",