Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve example #28

Merged
merged 1 commit into from
Mar 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 86 additions & 88 deletions example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,42 @@ package main
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"runtime"
"strconv"
"strings"

"github.com/philippgille/chromem-go"
"github.com/sashabaranov/go-openai"
)

const (
question = "Wich smooth jazz album received a Grammy nomination in 2009? I want to know the album name and artist."
question = "How many Albatros L 74 planes were produced?"
// We use a local LLM running in ollama: https://ollama.com/
ollamaBaseURL = "http://localhost:11434/v1"
// We use a very small model that doesn't need much resources and is fast, but
// doesn't have much knowledge: https://ollama.com/library/tinyllama
ollamaModel = "tinyllama:1.1b"
// doesn't have much knowledge: https://ollama.com/library/gemma
// We found Gemma 2B to be superior to TinyLlama (1.1B), Stable LM 2 (1.6B)
// and Phi-2 (2.7B) for the retrieval augmented generation (RAG) use case.
ollamaModel = "gemma:2b"
)

func main() {
ctx := context.Background()

// First we ask an LLM a fairly specific question that it won't know the answer
// to.
// Warm up ollama, in case the model isn't loaded yet
log.Println("Warming up ollama...")
_ = askLLM(ctx, nil, "Hello!")

// First we ask an LLM a fairly specific question that it likely won't know
// the answer to.
log.Println("Question: " + question)
log.Println("Asking LLM...")
reply := askLLM(ctx, "", question)
fmt.Printf("\nInitial reply from the LLM:\n" +
"===========================\n\n" +
reply + "\n\n")
reply := askLLM(ctx, nil, question)
log.Printf("Initial reply from the LLM: \"" + reply + "\"\n")

// Now we use our vector database for retrieval augmented generation (RAG),
// which means we provide the LLM with relevant knowledge.
Expand All @@ -45,48 +50,51 @@ func main() {
if err != nil {
panic(err)
}
// Create collection.
// Create collection if it wasn't loaded from persistent storage yet.
// We don't pass any embedding function, leading to the default being used (OpenAI
// text-embedding-3-small), which requires the OPENAI_API_KEY environment variable
// to be set.
collection, err := db.CreateCollection("Wikipedia", nil, nil)
if err != nil {
panic(err)
}
// Add docs to the collection.
// Here we use a DBpedia sample, where each line contains the lead section/introduction
// to some Wikipedia article and its category.
f, err := os.Open("dbpedia_sample.jsonl")
collection, err := db.GetOrCreateCollection("Wikipedia", nil, nil)
if err != nil {
panic(err)
}
d := json.NewDecoder(f)
var ids []string
var metadatas []map[string]string
var texts []string
log.Println("Reading JSON lines...")
// In this example we just read the first 20 lines, but in a real-world scenario
// you'd read the entire file.
for i := 0; i < 20; i++ {
var article struct {
Text string `json:"text"`
Category string `json:"category"`
// Add docs to the collection, if the collection was just created (and not
// loaded from persistent storage).
if collection.Count() == 0 {
// Here we use a DBpedia sample, where each line contains the lead section/introduction
// to some Wikipedia article and its category.
f, err := os.Open("dbpedia_sample.jsonl")
if err != nil {
panic(err)
}
d := json.NewDecoder(f)
var ids []string
var metadatas []map[string]string
var texts []string
log.Println("Reading JSON lines...")
for i := 1; ; i++ {
var article struct {
Text string `json:"text"`
Category string `json:"category"`
}
err := d.Decode(&article)
if err == io.EOF {
break // reached end of file
} else if err != nil {
panic(err)
}

ids = append(ids, strconv.Itoa(i))
metadatas = append(metadatas, map[string]string{"category": article.Category})
texts = append(texts, article.Text)
}
err := d.Decode(&article)
if err == io.EOF {
break // reached end of file
} else if err != nil {
log.Println("Adding documents to chromem-go, including creating their embeddings via OpenAI API...")
err = collection.AddConcurrently(ctx, ids, nil, metadatas, texts, runtime.NumCPU())
if err != nil {
panic(err)
}

ids = append(ids, strconv.Itoa(i))
metadatas = append(metadatas, map[string]string{"category": article.Category})
texts = append(texts, article.Text)
}
log.Println("Adding documents to chromem-go...")
err = collection.AddConcurrently(ctx, ids, nil, metadatas, texts, runtime.NumCPU())
if err != nil {
panic(err)
} else {
log.Println("Not reading JSON lines because collection was loaded from persistent storage.")
}

// Search for documents similar to the one we added just by passing the original
Expand All @@ -103,68 +111,58 @@ func main() {

// Now we can ask the LLM again, augmenting the question with the knowledge we retrieved.
// In this example we just use both retrieved documents as context.
context := docRes[0].Document + "\n\n" + docRes[1].Document
contexts := []string{docRes[0].Document, docRes[1].Document}
log.Println("Asking LLM with augmented question...")
reply = askLLM(ctx, context, question)
fmt.Printf("\nReply after augmenting the question with knowledge:\n" +
"===================================================\n\n" +
reply + "\n\n")
reply = askLLM(ctx, contexts, question)
log.Printf("Reply after augmenting the question with knowledge: \"" + reply + "\"\n")

/* Output (can differ slightly on each run):

2024/02/17 15:25:04 Asking LLM...

Initial reply from the LLM:
===========================

"The Album That Received A Grammy Nominated In 2009" or "A Smooth Jazz Album That Was Nominated For The Grammy Award In 2009".

2024/02/17 15:25:06 Setting up chromem-go...
2024/02/17 15:25:06 Reading JSON lines...
2024/02/17 15:25:06 Adding documents to chromem-go...
2024/02/17 15:25:08 Querying chromem-go...
2024/02/17 15:25:08 Asking LLM with augmented question...

Reply after augmenting the question with knowledge:
===================================================

"The Spice of Life" by Earl Klugh. The nomination was for Best Pop Instrumental Album at the 51st Grammy Awards in 2009.
2024/03/02 14:52:40 Warming up ollama...
2024/03/02 14:52:42 Question: How many Albatros L 74 planes were produced?
2024/03/02 14:52:42 Asking LLM...
2024/03/02 14:52:45 Initial reply from the LLM: "I am unable to provide a specific number for the number of Albatros L 74 planes produced, as I do not have access to real-time information or comprehensive records."
2024/03/02 14:52:45 Setting up chromem-go...
2024/03/02 14:52:45 Reading JSON lines...
2024/03/02 14:52:45 Adding documents to chromem-go, including creating their embeddings via OpenAI API...
2024/03/02 14:52:55 Querying chromem-go...
2024/03/02 14:52:55 Asking LLM with augmented question...
2024/03/02 14:53:01 Reply after augmenting the question with knowledge: "Answer: Only two Albatros L 74 planes were produced."
*/
}

func askLLM(ctx context.Context, context, question string) string {
func askLLM(ctx context.Context, contexts []string, question string) string {
// We use a local LLM running in ollama, which has an OpenAI-compatible API.
// openAIClient := openai.NewClient(os.Getenv("OPENAI_API_KEY"))
openAIClient := openai.NewClientWithConfig(openai.ClientConfig{
BaseURL: ollamaBaseURL,
HTTPClient: http.DefaultClient,
})
res, err := openAIClient.CreateChatCompletion(ctx, openai.ChatCompletionRequest{
// Model: openai.GPT3Dot5Turbo,
Model: ollamaModel,
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleSystem,
Content: "You are a helpful assistant. You answer the user's questions. Combine your knowledge with the context that the user might provide, as it's likely relevant to the user's question. If you are not sure, say that you don't know the answer.",
},
{
Role: openai.ChatMessageRoleUser,
Content: "Context: " + context,
},
{
Role: openai.ChatMessageRoleUser,
Content: "Question: " + question,
},
{
Role: openai.ChatMessageRoleAssistant,
Content: "Based on your provided context and question, I think the answer is:",
},
messages := []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleSystem,
Content: "You are a helpful assistant. You answer the user's questions in a concise manner. If you are not sure, say that you don't know the answer. If the user provides contexts, use them to answer their question.",
},
}
// Add contexts in reverse order, as many LLMs prioritize the latest message
// or rather forget about older ones (despite fitting into the LLM context).
for i := len(contexts) - 1; i >= 0; i-- {
messages = append(messages, openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Context:" + contexts[i],
})
}
messages = append(messages, openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Question: " + question,
})
res, err := openAIClient.CreateChatCompletion(ctx, openai.ChatCompletionRequest{
Model: ollamaModel,
Messages: messages,
})
if err != nil {
panic(err)
}
reply := res.Choices[0].Message.Content
reply = strings.TrimSpace(reply)

return reply
}