KNN search results don't look right #2105

deepakbkar · 2024-11-24T07:18:01Z

I am using bleve v2.4.3 and I have also built blevesearch/faiss@b747c55a

With the following sample program, I get this output:
KNN search results: Document ID: doc2, Score: 340282346638528859811704183484516925440.000000 Document ID: doc3, Score: 0.001000 Document ID: doc1, Score: 0.001000 Document ID: doc4, Score: 0.000917 Document ID: doc5, Score: 0.000752

When I set boost value to 0, I get this:
KNN search results: Document ID: doc4, Score: NaN Document ID: doc3, Score: NaN Document ID: doc2, Score: 340282346638528859811704183484516925440.000000 Document ID: doc1, Score: NaN Document ID: doc5, Score: NaN

I used ChatGPT to know what the result should look like and it mentioned the result should be:
Doc1: Score should be approximately 31.62 Doc2: Score should be 0 (exact match) Doc3: Score should be approximately 31.62 Doc4: Score should be approximately 22.47 Doc5: Score should be approximately 36.47

Please let me know if these numbers are alright or if the code has any issues.

func main() {
	// Delete example.bleve folder before starting
	if err := os.RemoveAll("example.bleve"); err != nil {
		log.Printf("Error deleting example.bleve folder: %v", err)
	}

	// Define multiple documents with vector fields
	docs := []struct {
		Id   string    `json:"id"`
		Text string    `json:"text"`
		Vec  []float32 `json:"vec"`
	}{
		{Id: "doc1", Text: "hello from united states", Vec: []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
		{Id: "doc2", Text: "greetings from canada", Vec: []float32{10, 11, 12, 13, 14, 15, 16, 17, 18, 19}},
		{Id: "doc3", Text: "bonjour from france", Vec: []float32{20, 21, 22, 23, 24, 25, 26, 27, 28, 29}},
		{Id: "doc4", Text: "hola from spain", Vec: []float32{5, 4, 3, 2, 1, 0, 9, 8, 7, 6}},
		{Id: "doc5", Text: "ciao from italy", Vec: []float32{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}},
	}

	// Define the text field mapping
	textFieldMapping := mapping.NewTextFieldMapping()

	// Define the vector field mapping
	vectorFieldMapping := mapping.NewVectorFieldMapping()
	vectorFieldMapping.Dims = 10
	vectorFieldMapping.Similarity = "l2_norm" // Euclidean distance

	// Create an index mapping
	bleveMapping := mapping.NewIndexMapping()
	bleveMapping.DefaultMapping = mapping.NewDocumentMapping()
	bleveMapping.DefaultMapping.Dynamic = false
	bleveMapping.DefaultMapping.AddFieldMappingsAt("text", textFieldMapping)
	bleveMapping.DefaultMapping.AddFieldMappingsAt("vec", vectorFieldMapping)

	// Create a new index
	index, err := bleve.New("example.bleve", bleveMapping)
	if err != nil {
		log.Fatal(err)
	}

	// Index the documents
	for _, doc := range docs {
		err = index.Index(doc.Id, doc)
		if err != nil {
			log.Fatal(err)
		}
	}

	// Create a search request with KNN
	searchRequest := bleve.NewSearchRequest(query.NewMatchNoneQuery())
	searchRequest.AddKNN(
		"vec", // Vector field name
		[]float32{10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, // Query vector (ensure this is also 10-dimensional)
		5, // k (number of nearest neighbors)
		1, 
	)

	// Execute the search
	searchResult, err := index.Search(searchRequest)
	if err != nil {
		log.Fatal(err)
	}

	// Print the search results
	fmt.Println("KNN search results:")
	for _, hit := range searchResult.Hits {
		fmt.Printf("Document ID: %s, Score: %f\n", hit.ID, hit.Score)
	}
}

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

KNN search results don't look right #2105

KNN search results don't look right #2105

deepakbkar commented Nov 24, 2024

KNN search results don't look right #2105

KNN search results don't look right #2105

Comments

deepakbkar commented Nov 24, 2024