From a5049d67cfbcacb18559e064e5ca1c726c14fe3b Mon Sep 17 00:00:00 2001 From: Joan Martinez Date: Wed, 28 Feb 2024 16:12:55 +0100 Subject: [PATCH] fix: fix notebook --- .../node_postprocessor/JinaRerank.ipynb | 261 +++++++++++++----- 1 file changed, 192 insertions(+), 69 deletions(-) diff --git a/docs/examples/node_postprocessor/JinaRerank.ipynb b/docs/examples/node_postprocessor/JinaRerank.ipynb index 7ba9321894680..0f765ecdbc02e 100644 --- a/docs/examples/node_postprocessor/JinaRerank.ipynb +++ b/docs/examples/node_postprocessor/JinaRerank.ipynb @@ -29,7 +29,8 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-postprocessor-jinaai-rerank" + "%pip install llama-index-postprocessor-jinaai-rerank\n", + "%pip install llama-index-embeddings-jinaai" ] }, { @@ -43,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -55,53 +56,156 @@ ] }, { - "attachments": {}, + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.embeddings.jinaai import JinaEmbedding\n", + "\n", + "api_key = os.environ[\"JINA_API_KEY\"]\n", + "jina_embeddings = JinaEmbedding(api_key=api_key)\n", + "\n", + "# load documents\n", + "import requests\n", + "\n", + "url = \"https://niketeam-asset-download.nike.net/catalogs/2024/2024_Nike%20Kids_02_09_24.pdf?cb=09302022\"\n", + "response = requests.get(url)\n", + "with open(\"Nike_Catalog.pdf\", \"wb\") as f:\n", + " f.write(response.content)\n", + "reader = SimpleDirectoryReader(input_files=[\"Nike_Catalog.pdf\"])\n", + "documents = reader.load_data()\n", + "\n", + "# build index\n", + "index = VectorStoreIndex.from_documents(\n", + " documents=documents, embed_model=jina_embeddings\n", + ")" + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [ - "Download Data" + "#### Retrieve top 10 most relevant nodes, without using a reranker" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "!mkdir -p 'data/paul_graham/'\n", - "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'" + "query_engine = index.as_query_engine(similarity_top_k=10)\n", + "response = query_engine.query(\n", + " \"What is the best jersey by Nike in terms of fabric?\",\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "56\n", + "Sustainable MaterialsNIKE KIDS SOCCER – GOALKEEPER\n", + "KIDS NIKE DRY LS US PARK IV GK JERSEY \n", + "CJ6073 $42.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 04/01/20\n", + "END DATE: 12/31/25\n", + "Goal keepers jersey with graphic print on sleeves and across upper back panel, mesh back for breathability, \n", + "slim fit with soft hand feel, shoulder seams rolled forward for better graphic visibility, straight seam across \n", + "back, mesh back for breathability – gameday graphic print inspired by retro campos gk design . \n", + "Body width: 16.3\", Body length: 22\" (size medium).\n", + "010 Black/White/(White) 012 Wolf Grey/White/(Black) 702 Volt/White/(Black)\n", + "KIDS NIKE DRY PARK III SHORT \n", + "BV6866 $20.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 04/01/20\n", + "END DATE: 12/31/25\n", + "Dri-FIT angled side seam short (slim fit) with soft hand feel updated fit for better mobility/comfort . \n", + "Hip width: 16.9\", Inseam length: 7\" (size medium).\n", + "010 Black/White/(White) 012 Wolf Grey/Black/(Black) 702 Volt/(Black)\n", + "NIKE ACADEMY OTC SOCK (UNISEX) \n", + "SX5728 $12.00\n", + "Sold in prepacks of 6.\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 93% nylon/6% polyester/1% spandex.\n", + "OFFER DATE: 01/01/17\n", + "END DATE: 12/31/23\n", + "Game day sock with fold-over cuff, articulated foot specific footbed for superior fit and contrast Swoosh \n", + "design trademark at ankle. Sold in prepacks of 6.\n", + "010 Black/(White) 018 Wolf Grey/(Black) 702 Volt/(Black)\n", + "Sustainable Materials 0.8641328028479249\n", + "\n", + "\n", + "NIKE KIDS SOCCER – STOCK42\n", + "Sustainable Materials\n", + "KIDS NIKE DRI-FIT US SS \n", + "CHALLENGE IV JERSEY\n", + "DH8368 $42.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 01/01/22\n", + "END DATE: 12/31/23\n", + "The Nike Dri-FIT Challenge IV Jersey brings subtle style and modern performance to the field. Sweat-\n", + "wicking fabric helps keep you dry and comfortable from the first whistle to the last minute.\n", + "010 Black/Black/White/(White) 012 Wolf Grey/Wolf Grey/Black/(Black)\n", + "100 White/White/White/(Black) 341 Gorge Green/Gorge Green/White/(White)\n", + "419 College Navy/College Navy/White/(White) 448 Valor Blue/Valor Blue/White/(White)\n", + "480 Game Royal/Game Royal/White/(White) 657 University Red/University Red/White/(White)\n", + "692 Team Maroon/Team Maroon/White/(White) 702 Volt/Volt/Black/(Black)\n", + "891 Team Orange/Team Orange/Black/(Black)\n", + "NEW\n", + "KIDS NIKE DRI-FIT CHALLENGE V JERSEY \n", + "SS US\n", + "FD7427 $47.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 01/01/24\n", + "END DATE: 12/31/25\n", + "The Nike Dri-FIT Challenge Jersey V is designed to keep your players cool and comfortable through 90 \n", + "minutes and beyond. Mesh on the back and side panels offer breathability where athletes need it most. \n", + "Body and sleeves are a Nike Dri-FIT knit fabric that moves sweat away to help keep players dry. This top \n", + "is made with 100% recycled material. Side panel construction uses a more efficient pattern to help reduce \n", + "material waste. Slim fit for a tailored look and feel.\n", + "010 Black/White/(White) 012 Wolf Grey/Black/(Black) 100 White/Black/(Black)\n", + "341 Gorge Green/White/(White) 419 College Navy/White/(White) 448 Valor Blue/White/(White)\n", + "480 Game Royal/White/(White) 657 University Red/White/(White) 692 Team Maroon/White/(White)\n", + "702 Volt/Black/(Black) 891 Team Orange/Black/(Black)\n", + "BACK VIEW 0.863721033128725\n" + ] + } + ], "source": [ - "from llama_index.embeddings.jinaai import JinaEmbedding\n", - "\n", - "api_key = os.environ[\"JINA_API_KEY\"]\n", - "jina_embeddings = JinaEmbedding(api_key=api_key)\n", - "\n", - "# load documents\n", - "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()\n", - "\n", - "# build index\n", - "index = VectorStoreIndex.from_documents(\n", - " documents=documents, embed_model=jina_embeddings\n", - ")" + "print(response.source_nodes[0].text, response.source_nodes[0].score)\n", + "print(\"\\n\")\n", + "print(response.source_nodes[1].text, response.source_nodes[1].score)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Retrieve top 10 most relevant nodes, then rerank with Jina Rerank" + "#### Retrieve top 10 most relevant nodes, but then rerank using Jina Reranker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By employing a reranker model, the prompt can be given more relevant context. This will lead to a more accurate response by the LLM." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -113,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -121,63 +225,81 @@ " similarity_top_k=10, node_postprocessors=[jina_rerank]\n", ")\n", "response = query_engine.query(\n", - " \"What did Sam Altman do in this essay?\",\n", + " \"What is the best jersey by Nike in terms of fabric?\",\n", ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "This seemed strange advice, because YC was doing great. But if there was one thing rarer than Rtm offering advice, it was Rtm being wrong. So this set me thinking. It was true that on my current trajectory, YC would be the last thing I did, because it was only taking up more of my attention. It had already eaten Arc, and was in the process of eating essays too. Either YC was my life's work or I'd have to leave eventually. And it wasn't, so I would.\n", - "\n", - "In the summer of 2012 my mother had a stroke, and the cause turned out to be a blood clot caused by colon cancer. The stroke destroyed her balance, and she was put in a nursing home, but she really wanted to get out of it and back to her house, and my sister and I were determined to help her do it. I used to fly up to Oregon to visit her regularly, and I had a lot of time to think on those flights. On one of them I realized I was ready to hand YC over to someone else.\n", - "\n", - "I asked Jessica if she wanted to be president, but she didn't, so we decided we'd try to recruit Sam Altman. We talked to Robert and Trevor and we agreed to make it a complete changing of the guard. Up till that point YC had been controlled by the original LLC we four had started. But we wanted YC to last for a long time, and to do that it couldn't be controlled by the founders. So if Sam said yes, we'd let him reorganize YC. Robert and I would retire, and Jessica and Trevor would become ordinary partners.\n", - "\n", - "When we asked Sam if he wanted to be president of YC, initially he said no. He wanted to start a startup to make nuclear reactors. But I kept at it, and in October 2013 he finally agreed. We decided he'd take over starting with the winter 2014 batch. For the rest of 2013 I left running YC more and more to Sam, partly so he could learn the job, and partly because I was focused on my mother, whose cancer had returned.\n", - "\n", - "She died on January 15, 2014. We knew this was coming, but it was still hard when it did.\n", - "\n", - "I kept working on YC till March, to help get that batch of startups through Demo Day, then I checked out pretty completely. (I still talk to alumni and to new startups working on things I'm interested in, but that only takes a few hours a week.)\n", - "\n", - "What should I do next? Rtm's advice hadn't included anything about that. I wanted to do something completely different, so I decided I'd paint. I wanted to see how good I could get if I really focused on it. So the day after I stopped working on YC, I started painting. I was rusty and it took a while to get back into shape, but it was at least completely engaging. [18]\n", - "\n", - "I spent most of the rest of 2014 painting. I'd never been able to work so uninterruptedly before, and I got to be better than I had been. Not good enough, but better. Then in November, right in the middle of a painting, I ran out of steam. Up till that point I'd always been curious to see how the painting I was working on would turn out, but suddenly finishing this one seemed like a chore. So I stopped working on it and cleaned my brushes and haven't painted since. So far anyway.\n", - "\n", - "I realize that sounds rather wimpy. But attention is a zero sum game. If you can choose what to work on, and you choose a project that's not the best one (or at least a good one) for you, then it's getting in the way of another project that is. And at 50 there was some opportunity cost to screwing around.\n", - "\n", - "I started writing essays again, and wrote a bunch of new ones over the next few months. I even wrote a couple that weren't about startups. Then in March 2015 I started working on Lisp again.\n", - "\n", - "The distinctive thing about Lisp is that its core is a language defined by writing an interpreter in itself. It wasn't originally intended as a programming language in the ordinary sense. It was meant to be a formal model of computation, an alternative to the Turing machine. If you want to write an interpreter for a language in itself, what's the minimum set of predefined operators you need? The Lisp that John McCarthy invented, or more accurately discovered, is an answer to that question. 0.09585607796907425\n", - "\n", - "\n", - "If he even knew about the strange classes I was taking, he never said anything.\n", - "\n", - "So now I was in a PhD program in computer science, yet planning to be an artist, yet also genuinely in love with Lisp hacking and working away at On Lisp. In other words, like many a grad student, I was working energetically on multiple projects that were not my thesis.\n", - "\n", - "I didn't see a way out of this situation. I didn't want to drop out of grad school, but how else was I going to get out? I remember when my friend Robert Morris got kicked out of Cornell for writing the internet worm of 1988, I was envious that he'd found such a spectacular way to get out of grad school.\n", - "\n", - "Then one day in April 1990 a crack appeared in the wall. I ran into professor Cheatham and he asked if I was far enough along to graduate that June. I didn't have a word of my dissertation written, but in what must have been the quickest bit of thinking in my life, I decided to take a shot at writing one in the 5 weeks or so that remained before the deadline, reusing parts of On Lisp where I could, and I was able to respond, with no perceptible delay \"Yes, I think so. I'll give you something to read in a few days.\"\n", - "\n", - "I picked applications of continuations as the topic. In retrospect I should have written about macros and embedded languages. There's a whole world there that's barely been explored. But all I wanted was to get out of grad school, and my rapidly written dissertation sufficed, just barely.\n", - "\n", - "Meanwhile I was applying to art schools. I applied to two: RISD in the US, and the Accademia di Belli Arti in Florence, which, because it was the oldest art school, I imagined would be good. RISD accepted me, and I never heard back from the Accademia, so off to Providence I went.\n", - "\n", - "I'd applied for the BFA program at RISD, which meant in effect that I had to go to college again. This was not as strange as it sounds, because I was only 25, and art schools are full of people of different ages. RISD counted me as a transfer sophomore and said I had to do the foundation that summer. The foundation means the classes that everyone has to take in fundamental subjects like drawing, color, and design.\n", - "\n", - "Toward the end of the summer I got a big surprise: a letter from the Accademia, which had been delayed because they'd sent it to Cambridge England instead of Cambridge Massachusetts, inviting me to take the entrance exam in Florence that fall. This was now only weeks away. My nice landlady let me leave my stuff in her attic. I had some money saved from consulting work I'd done in grad school; there was probably enough to last a year if I lived cheaply. Now all I had to do was learn Italian.\n", - "\n", - "Only stranieri (foreigners) had to take this entrance exam. In retrospect it may well have been a way of excluding them, because there were so many stranieri attracted by the idea of studying art in Florence that the Italian students would otherwise have been outnumbered. I was in decent shape at painting and drawing from the RISD foundation that summer, but I still don't know how I managed to pass the written exam. I remember that I answered the essay question by writing about Cezanne, and that I cranked up the intellectual level as high as I could to make the most of my limited vocabulary. [2]\n", + "NIKE KIDS SOCCER – STOCK41Sustainable Materials\n", + "Sustainable Materials\n", + "KIDS DRI-FIT ADV VAPOR IV JERSEY US SS\n", + "DR0837 $77.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 01/01/23\n", + "END DATE: 12/31/24\n", + "Step on to the field ready for fast-paced play in the Nike Dri-FIT ADV Vapor Jersey. Engineered for \n", + "optimal breathability, its moisture-wicking design helps keep you dry and cool under match-day pressure. \n", + "Lightweight fabric in a relaxed, easy fit combats cling so you can focus on being the first to the ball. Lower \n", + "insets line up perfectly with design details on the Nike Dri-FIT ADV Vapor IV Shorts to create an on-field \n", + "look worthy of pro-level play. \n", + "010 Black/Black/Black/(White) 100 White/White/White/(Black)\n", + "419 College Navy/College Navy/Game Royal/(White) 480 Game Royal/Game Royal/College Navy/(White)\n", + "657 University Red/University Red/Bright Crimson/(White)\n", + "BACK VIEW\n", + "GRAPHIC KNIT DETAIL\n", + "KIDS NIKE DRI-FIT US SS STRIKE III JERSEY\n", + "DR0913 $50.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 01/01/23\n", + "END DATE: 12/31/24\n", + "Take the field in match-ready style in the lightweight Nike Strike Jersey. A relaxed, easy fit ensures that \n", + "nothing comes between you and the ball, and sweat-wicking fabric works with breathable mesh to help \n", + "keep you cool and composed during fast-paced play. Ribbed insets stretch with you to let you move without \n", + "restrictions. Embroidered Swoosh design trademark. \n", + "010 Black/Black/Black/(White) 011 Black/Volt/Volt/(White)\n", + "012 Wolf Grey/Black/Black/(White) 100 White/White/White/(Black)\n", + "419 College Navy/College Navy/Game Royal/(White) 448 Valor Blue/College Navy/College Navy/(White)\n", + "480 Game Royal/College Navy/College Navy/(White) 657 University Red/Bright Crimson/Bright Crimson/(White)\n", + "GRAPHIC KNIT DETAIL 0.3603765070438385\n", "\n", - "I'm only up to age 25 and already there are such conspicuous patterns. Here I was, yet again about to attend some august institution in the hopes of learning about some prestigious subject, and yet again about to be disappointed. The students and faculty in the painting department at the Accademia were the nicest people you could imagine, but they had long since arrived at an arrangement whereby the students wouldn't require the faculty to teach anything, and in return the faculty wouldn't require the students to learn anything. And at the same time all involved would adhere outwardly to the conventions of a 19th century atelier. We actually had one of those little stoves, fed with kindling, that you see in 19th century studio paintings, and a nude model sitting as close to it as possible without getting burned. Except hardly anyone else painted her besides me. The rest of the students spent their time chatting or occasionally trying to imitate things they'd seen in American art magazines.\n", "\n", - "Our model turned out to live just down the street from me. 0.058560825884342194\n" + "NIKE KIDS SOCCER – STOCK45\n", + "Sustainable MaterialsKIDS NIKE DRI-FIT US LS TIEMPO\n", + "PREMIER II JERSEY\n", + "DH8407 $32.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 01/01/22\n", + "END DATE: 12/31/26\n", + "The Nike Dri-FIT Tiempo Premier II Jersey brings you the cool performance of sweat-wicking fabric and a \n", + "mesh back panel kick in when the game heats up.\n", + "010 Black/White/(White) 100 White/White/(Black) 419 College Navy/White/(White)\n", + "480 Game Royal/White/(White) 657 University Red/White/(White)\n", + "KIDS NIKE DRI-FIT US SS TIEMPO\n", + "PREMIER II JERSEY\n", + "DH8390 $27.00\n", + "SIZES: XS, S, M, L, XL\n", + "FABRIC: 100% polyester.\n", + "OFFER DATE: 01/01/22\n", + "END DATE: 12/31/26\n", + "The Nike Dri-FIT Tiempo Premier II Jersey brings you the cool performance of sweat-wicking fabric and a \n", + "mesh back panel kick in when the game heats up.\n", + "010 Black/White/(White) 012 Wolf Grey/Black/(Black) 100 White/White/(Black)\n", + "341 Gorge Green/White/(White) 419 College Navy/White/(White) 448 Valor Blue/White/(White)\n", + "480 Game Royal/White/(White) 547 Court Purple/White/(White) 616 Vivid Pink/Black/(Black)\n", + "657 University Red/White/(White) 692 Team Maroon/White/(White) 702 Volt/Black/(Black)\n", + "891 Team Orange/Black/(Black)\n", + "Sustainable Materials 0.35767972469329834\n" ] } ], @@ -203,7 +325,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.10.11" } }, "nbformat": 4,