From 7b8cbee91d7ab4e5db36277d4da4d89f77e7b0b0 Mon Sep 17 00:00:00 2001 From: Saikat Sarkar Date: Fri, 10 Jan 2025 11:49:03 -0700 Subject: [PATCH 1/9] Resolve merge conflict --- .../public/utils/create_query.ts | 87 +++++++------------ 1 file changed, 32 insertions(+), 55 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts index 63cdcdf76bb65..57f416707fb0c 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts +++ b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts @@ -5,7 +5,7 @@ * 2.0. */ -import { RetrieverContainer } from '@elastic/elasticsearch/lib/api/types'; +import { RetrieverContainer, SearchHighlight } from '@elastic/elasticsearch/lib/api/types'; import { IndicesQuerySourceFields, QuerySourceFields } from '../types'; export type IndexFields = Record; @@ -36,6 +36,8 @@ const SUGGESTED_SOURCE_FIELDS = [ 'text_field', ]; +const SEMANTIC_FIELD_TYPE = 'semantic'; + interface Matches { queryMatches: any[]; knnMatches: any[]; @@ -52,7 +54,7 @@ export function createQuery( rerankOptions: ReRankOptions = { rrf: true, } -): { retriever: RetrieverContainer } { +): { retriever: RetrieverContainer; highlight?: SearchHighlight } { const indices = Object.keys(fieldDescriptors); const boolMatches = Object.keys(fields).reduce( (acc, index) => { @@ -64,60 +66,8 @@ export function createQuery( const semanticMatches = indexFields.map((field) => { const semanticField = indexFieldDescriptors.semantic_fields.find((x) => x.field === field); - const isSourceField = sourceFields[index].includes(field); - - // this is needed to get the inner_hits for the source field - // we cant rely on only the semantic field - // in future inner_hits option will be added to semantic - if (semanticField && isSourceField) { - if (semanticField.embeddingType === 'dense_vector') { - const filter = - semanticField.indices.length < indices.length - ? { filter: { terms: { _index: semanticField.indices } } } - : {}; - return { - nested: { - path: `${semanticField.field}.inference.chunks`, - query: { - knn: { - field: `${semanticField.field}.inference.chunks.embeddings`, - ...filter, - query_vector_builder: { - text_embedding: { - model_id: semanticField.inferenceId, - model_text: '{query}', - }, - }, - }, - }, - inner_hits: { - size: 2, - name: `${index}.${semanticField.field}`, - _source: [`${semanticField.field}.inference.chunks.text`], - }, - }, - }; - } else if (semanticField.embeddingType === 'sparse_vector') { - return { - nested: { - path: `${semanticField.field}.inference.chunks`, - query: { - sparse_vector: { - inference_id: semanticField.inferenceId, - field: `${semanticField.field}.inference.chunks.embeddings`, - query: '{query}', - }, - }, - inner_hits: { - size: 2, - name: `${index}.${semanticField.field}`, - _source: [`${semanticField.field}.inference.chunks.text`], - }, - }, - }; - } - } else if (semanticField) { + if (semanticField) { return { semantic: { field: semanticField.field, @@ -241,11 +191,24 @@ export function createQuery( // for single Elser support to make it easy to read - skips bool query if (boolMatches.queryMatches.length === 1 && boolMatches.knnMatches.length === 0) { + const semanticField = boolMatches.queryMatches[0].semantic?.field ?? null; return { retriever: { standard: { query: boolMatches.queryMatches[0], }, + ...(semanticField + ? { + highlight: { + fields: { + [semanticField]: { + type: SEMANTIC_FIELD_TYPE, + number_of_fragments: 2, + }, + }, + }, + } + : {}), }, }; } @@ -285,12 +248,26 @@ export function createQuery( }; }); + const semanticField = matches.find((match) => match.semantic)?.semantic.field ?? null; + return { retriever: { rrf: { retrievers, }, }, + ...(semanticField + ? { + highlight: { + fields: { + [semanticField]: { + type: SEMANTIC_FIELD_TYPE, + number_of_fragments: 2, + }, + }, + }, + } + : {}), }; } From bc73e738c718216ea0e170763a307881d873f629 Mon Sep 17 00:00:00 2001 From: Saikat Sarkar Date: Tue, 7 Jan 2025 12:07:36 -0700 Subject: [PATCH 2/9] Add option to highlight multiple semantic_text fields --- .../public/utils/create_query.ts | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts index 57f416707fb0c..8b0dbf5bb67ba 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts +++ b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts @@ -248,7 +248,9 @@ export function createQuery( }; }); - const semanticField = matches.find((match) => match.semantic)?.semantic.field ?? null; + const semanticFields = matches + .filter((match) => match.semantic) + .map((match) => match.semantic.field); return { retriever: { @@ -256,15 +258,16 @@ export function createQuery( retrievers, }, }, - ...(semanticField + ...(semanticFields.length > 0 ? { highlight: { - fields: { - [semanticField]: { + fields: semanticFields.reduce((acc, field) => { + acc[field] = { type: SEMANTIC_FIELD_TYPE, number_of_fragments: 2, - }, - }, + }; + return acc; + }, {}), }, } : {}), From 7d561d7d127d205bfc14c49ffc4ff8ca2194ac2c Mon Sep 17 00:00:00 2001 From: Saikat Sarkar Date: Tue, 7 Jan 2025 12:31:05 -0700 Subject: [PATCH 3/9] Build context from the highlight section --- .../server/utils/get_value_for_selected_field.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts index 5556e407de979..6bb379e8571d2 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts @@ -14,11 +14,8 @@ export const getValueForSelectedField = (hit: SearchHit, path: string): string = } // for semantic_text matches - const innerHitPath = `${hit._index}.${path}`; - if (!!hit.inner_hits?.[innerHitPath]) { - return hit.inner_hits[innerHitPath].hits.hits - .map((innerHit) => innerHit._source.text) - .join('\n --- \n'); + if (hit.highlight) { + return Object.values(hit.highlight).flat().join('\n --- \n'); } return has(hit._source, `${path}.text`) From f842c458ca72f292d9a4cccf351bb66bd53c3c2c Mon Sep 17 00:00:00 2001 From: Saikat Sarkar Date: Wed, 8 Jan 2025 16:01:28 -0700 Subject: [PATCH 4/9] Check whether semantic_text field is a source field --- .../public/utils/create_query.ts | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts index 8b0dbf5bb67ba..c947a9a5f5d75 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts +++ b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts @@ -192,24 +192,32 @@ export function createQuery( // for single Elser support to make it easy to read - skips bool query if (boolMatches.queryMatches.length === 1 && boolMatches.knnMatches.length === 0) { const semanticField = boolMatches.queryMatches[0].semantic?.field ?? null; + + let isSourceField = false; + indices.forEach((index) => { + if (sourceFields[index].includes(semanticField)) { + isSourceField = true; + } + }); + return { retriever: { standard: { query: boolMatches.queryMatches[0], }, - ...(semanticField - ? { - highlight: { - fields: { - [semanticField]: { - type: SEMANTIC_FIELD_TYPE, - number_of_fragments: 2, - }, + }, + ...(isSourceField + ? { + highlight: { + fields: { + [semanticField]: { + type: SEMANTIC_FIELD_TYPE, + number_of_fragments: 2, }, }, - } - : {}), - }, + }, + } + : {}), }; } From dfce42faabc63aef07fb10103aee92a3020fefa4 Mon Sep 17 00:00:00 2001 From: Saikat Sarkar Date: Wed, 8 Jan 2025 17:34:59 -0700 Subject: [PATCH 5/9] Add ordering on score --- .../search_playground/public/utils/create_query.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts index c947a9a5f5d75..cf0a1846bfb65 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts +++ b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts @@ -213,6 +213,7 @@ export function createQuery( [semanticField]: { type: SEMANTIC_FIELD_TYPE, number_of_fragments: 2, + order: 'score', }, }, }, @@ -258,7 +259,16 @@ export function createQuery( const semanticFields = matches .filter((match) => match.semantic) - .map((match) => match.semantic.field); + .map((match) => match.semantic.field) + .filter((field) => { + let isSourceField = false; + indices.forEach((index) => { + if (sourceFields[index].includes(field)) { + isSourceField = true; + } + }); + return isSourceField; + }); return { retriever: { @@ -273,6 +283,7 @@ export function createQuery( acc[field] = { type: SEMANTIC_FIELD_TYPE, number_of_fragments: 2, + order: 'score', }; return acc; }, {}), From b9624a17aa7e483c2e919c8b7cd446b789ac7834 Mon Sep 17 00:00:00 2001 From: Saikat Sarkar Date: Wed, 8 Jan 2025 18:12:08 -0700 Subject: [PATCH 6/9] Update the unit tests --- .../public/utils/create_query.test.ts | 56 ++++++++----------- .../server/lib/conversational_chain.test.ts | 14 +---- .../get_value_for_selected_field.test.ts | 41 ++++---------- 3 files changed, 36 insertions(+), 75 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts index c4c986e7b06e6..d6001dd1f2224 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts @@ -516,20 +516,9 @@ describe('create_query', () => { { standard: { query: { - nested: { - inner_hits: { - _source: ['field2.inference.chunks.text'], - name: 'index1.field2', - size: 2, - }, - path: 'field2.inference.chunks', - query: { - sparse_vector: { - field: 'field2.inference.chunks.embeddings', - inference_id: 'model2', - query: '{query}', - }, - }, + semantic: { + field: 'field2', + query: '{query}', }, }, }, @@ -542,6 +531,15 @@ describe('create_query', () => { ], }, }, + highlight: { + fields: { + field2: { + number_of_fragments: 2, + order: 'score', + type: 'semantic', + }, + }, + }, }); }); @@ -638,24 +636,9 @@ describe('create_query', () => { { standard: { query: { - nested: { - inner_hits: { - _source: ['field2.inference.chunks.text'], - name: 'index1.field2', - size: 2, - }, - path: 'field2.inference.chunks', - query: { - knn: { - field: 'field2.inference.chunks.embeddings', - query_vector_builder: { - text_embedding: { - model_id: 'model2', - model_text: '{query}', - }, - }, - }, - }, + semantic: { + field: 'field2', + query: '{query}', }, }, }, @@ -668,6 +651,15 @@ describe('create_query', () => { ], }, }, + highlight: { + fields: { + field2: { + number_of_fragments: 2, + order: 'score', + type: 'semantic', + }, + }, + }, }); }); diff --git a/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts b/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts index d8958da6ff112..76f03525724fc 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts @@ -237,19 +237,7 @@ describe('conversational chain', () => { { _index: 'index', _id: '1', - inner_hits: { - 'index.field': { - hits: { - hits: [ - { - _source: { - text: 'value', - }, - }, - ], - }, - }, - }, + highlight: { semantic_text_des: ['value'] }, }, ], expectedDocs: [ diff --git a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts index 7eae929cc70c0..a4b59b2818eda 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts @@ -78,49 +78,30 @@ describe('getValueForSelectedField', () => { expect(getValueForSelectedField(hit, 'bla.sources')).toBe(''); }); - test('should return when its a chunked passage', () => { + test('should return when it has highlighted messages', () => { const hit = { - _index: 'sample-index', + _index: 'books', _id: '8jSNY48B6iHEi98DL1C-', _score: 0.7789394, _source: { - test: 'The Shawshank Redemption', + test: 'The Big Bang and Black Holes', metadata: { source: - 'Over the course of several years, two convicts form a friendship, seeking consolation and, eventually, redemption through basic compassion', + 'This book explores the origins of the universe, beginning with the Big Bang—an immense explosion that created space, time, and matter. It delves into how black holes, regions of space where gravity is so strong that not even light can escape, play a crucial role in the evolution of galaxies and the universe as a whole. Stephen Hawking’s groundbreaking discoveries about black hole radiation, often referred to as Hawking Radiation, are also discussed in detail.', }, }, - inner_hits: { - 'sample-index.test': { - hits: { - hits: [ - { - _source: { - text: 'Over the course of several years', - }, - }, - { - _source: { - text: 'two convicts form a friendship', - }, - }, - { - _source: { - text: 'seeking consolation and, eventually, redemption through basic compassion', - }, - }, - ], - }, - }, + highlight: { + semantic_text_des: [ + 'This book explores the origins of the universe.', + 'The beginning with the Big Bang—an immense explosion that created space, time, and matter. It delves into how black holes, regions of space where gravity is so strong that not even light can escape, play a crucial role in the evolution of galaxies and the universe as a whole. Stephen Hawking’s groundbreaking discoveries about black hole radiation, often referred to as Hawking Radiation, are also discussed in detail.', + ], }, }; expect(getValueForSelectedField(hit as any, 'test')).toMatchInlineSnapshot(` - "Over the course of several years - --- - two convicts form a friendship + "This book explores the origins of the universe. --- - seeking consolation and, eventually, redemption through basic compassion" + The beginning with the Big Bang—an immense explosion that created space, time, and matter. It delves into how black holes, regions of space where gravity is so strong that not even light can escape, play a crucial role in the evolution of galaxies and the universe as a whole. Stephen Hawking’s groundbreaking discoveries about black hole radiation, often referred to as Hawking Radiation, are also discussed in detail." `); }); From cf0334d5d18681d41fa1d67376510ad679910481 Mon Sep 17 00:00:00 2001 From: Saikat Sarkar Date: Fri, 10 Jan 2025 12:49:02 -0700 Subject: [PATCH 7/9] Update the content of view code --- .../__snapshots__/py_lang_client.test.tsx.snap | 11 ++++++----- .../components/view_code/examples/py_lang_client.tsx | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap index 0001f45600ec2..00950530db5ed 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap +++ b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap @@ -40,11 +40,12 @@ def get_elasticsearch_results(): def create_openai_prompt(results): context = \\"\\" for hit in results: - inner_hit_path = f\\"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}\\" - - ## For semantic_text matches, we need to extract the text from the inner_hits - if 'inner_hits' in hit and inner_hit_path in hit['inner_hits']: - context += '\\\\n --- \\\\n'.join(inner_hit['_source']['text'] for inner_hit in hit['inner_hits'][inner_hit_path]['hits']['hits']) + ## For semantic_text matches, we need to extract the text from the highlighted field + if "highlight" in hit: + highlighted_texts = [] + for values in hit["highlight"].values(): + highlighted_texts.extend(values) + context += "\\\\n --- \\\\n".join(highlighted_texts) else: source_field = index_source_fields.get(hit[\\"_index\\"])[0] hit_context = hit[\\"_source\\"][source_field] diff --git a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx index a2d92583c6b63..746ecd293ad5e 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx +++ b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx @@ -40,11 +40,12 @@ def get_elasticsearch_results(): def create_openai_prompt(results): context = "" for hit in results: - inner_hit_path = f"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}" - - ## For semantic_text matches, we need to extract the text from the inner_hits - if 'inner_hits' in hit and inner_hit_path in hit['inner_hits']: - context += '\\n --- \\n'.join(inner_hit['_source']['text'] for inner_hit in hit['inner_hits'][inner_hit_path]['hits']['hits']) + ## For semantic_text matches, we need to extract the text from the highlighted field + if "highlight" in hit: + highlighted_texts = [] + for values in hit["highlight"].values(): + highlighted_texts.extend(values) + context += "\\n --- \\n".join(highlighted_texts) else: source_field = index_source_fields.get(hit["_index"])[0] hit_context = hit["_source"][source_field] From 08b6918c152c7b3d17a4593d857f9f29ed7ce3de Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Fri, 17 Jan 2025 12:04:55 +0000 Subject: [PATCH 8/9] update for highlight to use field path --- .../search_playground/server/lib/conversational_chain.test.ts | 2 +- .../server/utils/get_value_for_selected_field.test.ts | 2 +- .../server/utils/get_value_for_selected_field.ts | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts b/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts index 76f03525724fc..5a59ddead7d9c 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts @@ -237,7 +237,7 @@ describe('conversational chain', () => { { _index: 'index', _id: '1', - highlight: { semantic_text_des: ['value'] }, + highlight: { field: ['value'] }, }, ], expectedDocs: [ diff --git a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts index a4b59b2818eda..11351c56adb97 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts @@ -91,7 +91,7 @@ describe('getValueForSelectedField', () => { }, }, highlight: { - semantic_text_des: [ + test: [ 'This book explores the origins of the universe.', 'The beginning with the Big Bang—an immense explosion that created space, time, and matter. It delves into how black holes, regions of space where gravity is so strong that not even light can escape, play a crucial role in the evolution of galaxies and the universe as a whole. Stephen Hawking’s groundbreaking discoveries about black hole radiation, often referred to as Hawking Radiation, are also discussed in detail.', ], diff --git a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts index 6bb379e8571d2..fe0772a314327 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts @@ -14,8 +14,8 @@ export const getValueForSelectedField = (hit: SearchHit, path: string): string = } // for semantic_text matches - if (hit.highlight) { - return Object.values(hit.highlight).flat().join('\n --- \n'); + if (hit.highlight && hit.highlight[path]) { + return hit.highlight[path].flat().join('\n --- \n'); } return has(hit._source, `${path}.text`) From 9efecdafa75bc1f6a39e39c886b174bce5aa6c4b Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Fri, 17 Jan 2025 17:07:32 +0000 Subject: [PATCH 9/9] update snapshot --- .../examples/__snapshots__/py_lang_client.test.tsx.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap index 00950530db5ed..7944e0ecc188a 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap +++ b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap @@ -41,11 +41,11 @@ def create_openai_prompt(results): context = \\"\\" for hit in results: ## For semantic_text matches, we need to extract the text from the highlighted field - if "highlight" in hit: + if \\"highlight\\" in hit: highlighted_texts = [] - for values in hit["highlight"].values(): + for values in hit[\\"highlight\\"].values(): highlighted_texts.extend(values) - context += "\\\\n --- \\\\n".join(highlighted_texts) + context += \\"\\\\n --- \\\\n\\".join(highlighted_texts) else: source_field = index_source_fields.get(hit[\\"_index\\"])[0] hit_context = hit[\\"_source\\"][source_field]