Skip to content

Commit

Permalink
Update semantic_text query to use highlighting option
Browse files Browse the repository at this point in the history
  • Loading branch information
saikatsarkar056 committed Jan 7, 2025
1 parent d5c804b commit cda8a3d
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export interface ChatMessage {
content: string;
}

interface SemanticField {
export interface SemanticField {
field: string;
inferenceId: string;
embeddingType: 'sparse_vector' | 'dense_vector';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
* 2.0.
*/

import { RetrieverContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import {
RetrieverContainer,
SearchHighlight,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { IndicesQuerySourceFields, QuerySourceFields } from '../types';

export type IndexFields = Record<string, string[]>;
Expand Down Expand Up @@ -52,7 +55,7 @@ export function createQuery(
rerankOptions: ReRankOptions = {
rrf: true,
}
): { retriever: RetrieverContainer } {
): { retriever: RetrieverContainer; highlight?: SearchHighlight } {
const indices = Object.keys(fieldDescriptors);
const boolMatches = Object.keys(fields).reduce<Matches>(
(acc, index) => {
Expand All @@ -64,60 +67,11 @@ export function createQuery(

const semanticMatches = indexFields.map((field) => {
const semanticField = indexFieldDescriptors.semantic_fields.find((x) => x.field === field);
const isSourceField = sourceFields[index].includes(field);

// this is needed to get the inner_hits for the source field
// we cant rely on only the semantic field
// in future inner_hits option will be added to semantic
if (semanticField && isSourceField) {
if (semanticField.embeddingType === 'dense_vector') {
const filter =
semanticField.indices.length < indices.length
? { filter: { terms: { _index: semanticField.indices } } }
: {};

return {
nested: {
path: `${semanticField.field}.inference.chunks`,
query: {
knn: {
field: `${semanticField.field}.inference.chunks.embeddings`,
...filter,
query_vector_builder: {
text_embedding: {
model_id: semanticField.inferenceId,
model_text: '{query}',
},
},
},
},
inner_hits: {
size: 2,
name: `${index}.${semanticField.field}`,
_source: [`${semanticField.field}.inference.chunks.text`],
},
},
};
} else if (semanticField.embeddingType === 'sparse_vector') {
return {
nested: {
path: `${semanticField.field}.inference.chunks`,
query: {
sparse_vector: {
inference_id: semanticField.inferenceId,
field: `${semanticField.field}.inference.chunks.embeddings`,
query: '{query}',
},
},
inner_hits: {
size: 2,
name: `${index}.${semanticField.field}`,
_source: [`${semanticField.field}.inference.chunks.text`],
},
},
};
}
} else if (semanticField) {
if (semanticField) {
return {
semantic: {
field: semanticField.field,
Expand Down Expand Up @@ -241,11 +195,24 @@ export function createQuery(

// for single Elser support to make it easy to read - skips bool query
if (boolMatches.queryMatches.length === 1 && boolMatches.knnMatches.length === 0) {
const semanticField = boolMatches.queryMatches[0].semantic?.field ?? null;
return {
retriever: {
standard: {
query: boolMatches.queryMatches[0],
},
...(semanticField
? {
highlight: {
fields: {
[semanticField]: {
type: 'semantic',
number_of_fragments: 2,
},
},
},
}
: {}),
},
};
}
Expand Down Expand Up @@ -285,12 +252,26 @@ export function createQuery(
};
});

const semanticField = matches.find((match) => match.semantic)?.semantic.field ?? null;

return {
retriever: {
rrf: {
retrievers,
},
},
...(semanticField
? {
highlight: {
fields: {
[semanticField]: {
type: 'semantic',
number_of_fragments: 2,
},
},
},
}
: {}),
};
}

Expand Down

0 comments on commit cda8a3d

Please sign in to comment.