From 6a255943b4768f7fb606d013932d8533d8f638d4 Mon Sep 17 00:00:00 2001
From: Rohit Prasad <rohit.prasad15@gmail.com>
Date: Wed, 13 Nov 2024 14:46:54 -0800
Subject: [PATCH] Cleaning up old file. (#48)

---
 examples/RAG.ipynb | 286 ---------------------------------------------
 1 file changed, 286 deletions(-)
 delete mode 100644 examples/RAG.ipynb

diff --git a/examples/RAG.ipynb b/examples/RAG.ipynb
deleted file mode 100644
index 0f9cd445..00000000
--- a/examples/RAG.ipynb
+++ /dev/null
@@ -1,286 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:09:30.151056Z",
-     "start_time": "2024-07-28T19:09:30.144028Z"
-    }
-   },
-   "source": [
-    "# load api keys from the `.env` file. An example of this can be found in `.env.sample`\n",
-    "from dotenv import load_dotenv; load_dotenv()"
-   ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 9
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T18:59:06.786489Z",
-     "start_time": "2024-07-28T18:56:34.662611Z"
-    }
-   },
-   "source": [
-    "# Get Data\n",
-    "#import csv\n",
-    "\n",
-    "from datasets import load_dataset\n",
-    "ds = load_dataset(\"stanfordnlp/sentiment140\")\n"
-   ],
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Downloading data: 100%|██████████| 81.4M/81.4M [01:55<00:00, 706kB/s] \n",
-      "Generating train split: 100%|██████████| 1600000/1600000 [00:34<00:00, 46744.97 examples/s]\n",
-      "Generating test split: 100%|██████████| 498/498 [00:00<00:00, 30145.67 examples/s]\n"
-     ]
-    }
-   ],
-   "execution_count": 3
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T18:59:53.134465Z",
-     "start_time": "2024-07-28T18:59:53.114471Z"
-    }
-   },
-   "source": [
-    "docs = ds['train'][:2000]['text']"
-   ],
-   "outputs": [],
-   "execution_count": 4
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:02:35.199055Z",
-     "start_time": "2024-07-28T18:59:58.617763Z"
-    }
-   },
-   "source": [
-    "# Get our encoder to encode \n",
-    "from sentence_transformers import SentenceTransformer \n",
-    "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n",
-    "data_emb = model.encode(docs) #16 seconds on M1 Mac 8gb\n"
-   ],
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/ksolo/Library/Caches/pypoetry/virtualenvs/aisuite-HUywTnIy-py3.12/lib/python3.12/site-packages/sentence_transformers/evaluation/SentenceEvaluator.py:81: SyntaxWarning: invalid escape sequence '\\g'\n",
-      "  return re.sub(r\"([a-z])([A-Z])\", \"\\g<1> \\g<2>\", class_name)\n",
-      "/Users/ksolo/Library/Caches/pypoetry/virtualenvs/aisuite-HUywTnIy-py3.12/lib/python3.12/site-packages/sentence_transformers/model_card.py:524: SyntaxWarning: invalid escape sequence '\\d'\n",
-      "  if dataset_name and re.match(\"_dataset_\\d+\", dataset_name):\n",
-      "/Users/ksolo/Library/Caches/pypoetry/virtualenvs/aisuite-HUywTnIy-py3.12/lib/python3.12/site-packages/sentence_transformers/losses/DenoisingAutoEncoderLoss.py:16: SyntaxWarning: invalid escape sequence '\\_'\n",
-      "  \"\"\"\n"
-     ]
-    }
-   ],
-   "execution_count": 5
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:03:06.618999Z",
-     "start_time": "2024-07-28T19:03:03.296772Z"
-    }
-   },
-   "source": [
-    "# Now set up the vector store to accept the data\n",
-    "\n",
-    "import chromadb\n",
-    "\n",
-    "chroma_client = chromadb.Client()\n",
-    "collection = chroma_client.create_collection(name=\"SampleDB\")\n",
-    "\n",
-    "collection.add(\n",
-    "    embeddings=data_emb.tolist(),\n",
-    "    documents=docs,\n",
-    "    ids=[str(idx) for idx in range(len(data_emb))]) # Doc ID's are required\n",
-    "\n"
-   ],
-   "outputs": [],
-   "execution_count": 6
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:03:49.751009Z",
-     "start_time": "2024-07-28T19:03:49.727998Z"
-    }
-   },
-   "source": [
-    "#prep the question in the encoding space\n",
-    "\n",
-    "question = 'What is the status of the chalkboard?'\n",
-    "question_emb = model.encode(question)\n",
-    "\n",
-    "results = collection.query(query_embeddings=question_emb.tolist(), n_results=10)\n",
-    "\n",
-    "context = ' '.join(results['documents'][0]) # Pulling out a lists of lists\n"
-   ],
-   "outputs": [],
-   "execution_count": 7
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:04:36.973296Z",
-     "start_time": "2024-07-28T19:04:36.952794Z"
-    }
-   },
-   "source": [
-    "# Call the models to determine the answer by response\n",
-    "\n",
-    "prompt = f'Given the following data, Please answer the question:  \\n\\n ##question \\n {question}\\n\\n ##context \\n {context}'\n",
-    "\n",
-    "import aisuite as ai\n",
-    "client = ai.Client()\n",
-    "\n",
-    "messages = [\n",
-    "    {\"role\": \"system\", \"content\": \"You are a helpful agent, who answers with brevity. \"},\n",
-    "    {\"role\": \"user\", \"content\": prompt},\n",
-    "]\n"
-   ],
-   "outputs": [],
-   "execution_count": 8
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:09:41.132595Z",
-     "start_time": "2024-07-28T19:09:40.634038Z"
-    }
-   },
-   "source": [
-    "#groq_llama3_8b = \"groq:llama3-8b-8192\" \n",
-    "response = client.chat.completions.create(model=\"groq:llama3-70b-8192\", messages=messages)\n",
-    "\n",
-    "print(response.choices[0].message.content)\n"
-   ],
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The status of the chalkboard is: USELESS (because there is no chalk).\n"
-     ]
-    }
-   ],
-   "execution_count": 10
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:10:50.448134Z",
-     "start_time": "2024-07-28T19:10:50.443295Z"
-    }
-   },
-   "source": [
-    "results['documents'][0][0]"
-   ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"Damn... I don't have any chalk! MY CHALKBOARD IS USELESS \""
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 11
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-07-28T19:11:15.766909Z",
-     "start_time": "2024-07-28T19:11:12.732107Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "response = client.chat.completions.create(model=\"anthropic:claude-3-opus-20240229\", messages=messages)\n",
-    "print(response.choices[0].message.content)"
-   ],
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Based on the context provided, the status of the chalkboard is useless because the person does not have any chalk.\n"
-     ]
-    }
-   ],
-   "execution_count": 12
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}