Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Add auto-generation of index mappings & settings based on processors #553

Merged
merged 1 commit into from
Dec 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 71 additions & 21 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,31 +77,81 @@ export const SEARCH_CONNECTORS_NODE_API_PATH = `${BASE_CONNECTOR_NODE_API_PATH}/
* based on the specified remote model from a remote service, if found
*/

// Cohere
export const COHERE_DIMENSIONS = {
[`embed-english-v3.0`]: 1024,
[`embed-english-light-v3.0`]: 384,
[`embed-multilingual-v3.0`]: 1024,
[`embed-multilingual-light-v3.0`]: 384,
[`embed-english-v2.0`]: 4096,
[`embed-english-light-v2.0`]: 1024,
[`embed-multilingual-v2.0`]: 768,
interface RemoteEmbeddingModelConfig {
dimension: number;
fieldName: string;
}

// Amazon BedRock
export const BEDROCK_CONFIGS = {
[`amazon.titan-embed-text-v1`]: {
dimension: 1536,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`amazon.titan-embed-text-v2`]: {
dimension: 1024,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`amazon.titan-embed-image-v1`]: {
dimension: 1024,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`cohere.embed-english-v3`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`cohere.embed-multilingual-v3`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
};

// OpenAI
export const OPENAI_DIMENSIONS = {
[`text-embedding-3-small`]: 1536,
[`text-embedding-3-large`]: 3072,
[`text-embedding-ada-002`]: 1536,
// Cohere
export const COHERE_CONFIGS = {
[`embed-english-v3.0`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-english-light-v3.0`]: {
dimension: 384,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-multilingual-v3.0`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-multilingual-light-v3.0`]: {
dimension: 384,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-english-v2.0`]: {
dimension: 4096,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-english-light-v2.0`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-multilingual-v2.0`]: {
dimension: 768,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
};

// Amazon BedRock
export const BEDROCK_DIMENSIONS = {
[`amazon.titan-embed-text-v1`]: 1536,
[`amazon.titan-embed-text-v2`]: 1024,
[`amazon.titan-embed-image-v1`]: 1024,
[`cohere.embed-english-v3`]: 1024, // same as Cohere directly
[`cohere.embed-multilingual-v3`]: 1024, // same as Cohere directly
// OpenAI
export const OPENAI_CONFIGS = {
[`text-embedding-3-small`]: {
dimension: 1536,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`text-embedding-3-large`]: {
dimension: 3072,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`text-embedding-ada-002`]: {
dimension: 1536,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
};

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,129 @@
* SPDX-License-Identifier: Apache-2.0
*/

import React from 'react';
import React, { useEffect } from 'react';
import { useSelector } from 'react-redux';
import { isEmpty } from 'lodash';
import {
EuiAccordion,
EuiFlexGroup,
EuiFlexItem,
EuiSpacer,
} from '@elastic/eui';
import { JsonField } from '../input_fields';
import { getIn, useFormikContext } from 'formik';
import { WorkflowFormValues } from '../../../../../common';
import { AppState } from '../../../../store';
import {
getEmbeddingField,
getEmbeddingModelDimensions,
getUpdatedIndexMappings,
getUpdatedIndexSettings,
isKnnIndex,
removeVectorFieldFromIndexMappings,
} from '../../../../utils';

interface AdvancedSettingsProps {}

/**
* Input component for configuring ingest-side advanced settings
*/
export function AdvancedSettings(props: AdvancedSettingsProps) {
const { values, setFieldValue } = useFormikContext<WorkflowFormValues>();
const { models, connectors } = useSelector((state: AppState) => state.ml);
const ingestMLProcessors = (Object.values(
values?.ingest?.enrich
) as any[]).filter((ingestProcessor) => ingestProcessor?.model !== undefined);
const ingestProcessorModelIds = ingestMLProcessors
.map((ingestProcessor) => ingestProcessor?.model?.id as string | undefined)
.filter((modelId) => !isEmpty(modelId));
const indexMappingsPath = 'ingest.index.mappings';
const indexSettingsPath = 'ingest.index.settings';
const curMappings = getIn(values, indexMappingsPath);
const curSettings = getIn(values, indexSettingsPath);

// listen on when processor with models are added / removed. dynamically update index
// settings to be knn-enabled or knn-disabled.
useEffect(() => {
if (ingestProcessorModelIds.length > 0) {
ingestProcessorModelIds.forEach((ingestProcessorModelId) => {
const processorModel = Object.values(models).find(
(model) => model.id === ingestProcessorModelId
);
if (processorModel?.connectorId !== undefined) {
const processorConnector = connectors[processorModel?.connectorId];
const dimension = getEmbeddingModelDimensions(processorConnector);

// If a dimension is found, it is a known embedding model.
// Ensure the index is configured to be knn-enabled.
if (dimension !== undefined) {
if (!isKnnIndex(curSettings)) {
setFieldValue(
indexSettingsPath,
getUpdatedIndexSettings(curSettings, true)
);
}
}
}
});
} else {
if (isKnnIndex(curSettings)) {
setFieldValue(
indexSettingsPath,
getUpdatedIndexSettings(curSettings, false)
);
}
}
}, [ingestProcessorModelIds.length]);

// listener on when there are updates to any ingest processors. Try to update
// any index mappings accordingly, such as setting the knn_vector mappings
// for models that output vector embeddings, or removing any mappings, if no ML
// processor defined.
useEffect(() => {
if (ingestMLProcessors.length > 0) {
ingestMLProcessors.forEach((ingestMLProcessor) => {
const processorModel = Object.values(models).find(
(model) => model.id === ingestMLProcessor?.model?.id
);
if (processorModel?.connectorId !== undefined) {
const processorConnector = connectors[processorModel?.connectorId];
const dimension = getEmbeddingModelDimensions(processorConnector);
const embeddingFieldName = getEmbeddingField(
processorConnector,
ingestMLProcessor
);
if (embeddingFieldName !== undefined && dimension !== undefined) {
setFieldValue(
indexMappingsPath,
getUpdatedIndexMappings(
curMappings,
embeddingFieldName,
dimension
)
);
}
}
});
} else {
setFieldValue(
indexMappingsPath,
removeVectorFieldFromIndexMappings(curMappings)
);
}
}, [getIn(values, 'ingest.enrich')]);

return (
<EuiFlexGroup direction="column">
<EuiFlexItem grow={false}>
<EuiAccordion id="advancedSettings" buttonContent="Advanced settings">
<EuiSpacer size="s" />
<EuiFlexGroup direction="column">
<EuiFlexItem>
<JsonField
label="Index mappings"
fieldPath={'ingest.index.mappings'}
/>
<JsonField label="Index mappings" fieldPath={indexMappingsPath} />
</EuiFlexItem>
<EuiFlexItem>
<JsonField
label="Index settings"
fieldPath={'ingest.index.settings'}
/>
<JsonField label="Index settings" fieldPath={indexSettingsPath} />
</EuiFlexItem>
</EuiFlexGroup>
</EuiAccordion>
Expand Down
36 changes: 5 additions & 31 deletions public/pages/workflows/new_workflow/quick_configure_inputs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,18 @@ import {
EuiCompressedFieldNumber,
} from '@elastic/eui';
import {
BEDROCK_DIMENSIONS,
COHERE_DIMENSIONS,
DEFAULT_IMAGE_FIELD,
DEFAULT_LLM_RESPONSE_FIELD,
DEFAULT_TEXT_FIELD,
DEFAULT_VECTOR_FIELD,
MODEL_STATE,
Model,
ModelInterface,
OPENAI_DIMENSIONS,
QuickConfigureFields,
WORKFLOW_TYPE,
} from '../../../../common';
import { AppState } from '../../../store';
import { parseModelInputs } from '../../../utils';
import { getEmbeddingModelDimensions, parseModelInputs } from '../../../utils';
import { get } from 'lodash';

interface QuickConfigureInputsProps {
Expand Down Expand Up @@ -121,33 +118,10 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) {
if (selectedModel?.connectorId !== undefined) {
const connector = connectors[selectedModel.connectorId];
if (connector !== undefined) {
// some APIs allow specifically setting the dimensions at runtime,
// so we check for that first.
if (connector.parameters?.dimensions !== undefined) {
setFieldValues({
...fieldValues,
embeddingLength: connector.parameters?.dimensions,
});
} else if (connector.parameters?.model !== undefined) {
const dimensions =
// @ts-ignore
COHERE_DIMENSIONS[connector.parameters?.model] ||
// @ts-ignore
OPENAI_DIMENSIONS[connector.parameters?.model] ||
// @ts-ignore
BEDROCK_DIMENSIONS[connector.parameters?.model];
if (dimensions !== undefined) {
setFieldValues({
...fieldValues,
embeddingLength: dimensions,
});
}
} else {
setFieldValues({
...fieldValues,
embeddingLength: undefined,
});
}
setFieldValues({
...fieldValues,
embeddingLength: getEmbeddingModelDimensions(connector),
});
}
}
}, [fieldValues.modelId, deployedModels, connectors]);
Expand Down
Loading
Loading