Skip to content

Commit

Permalink
Add auto-generation of index mappings & settings based on processors (#…
Browse files Browse the repository at this point in the history
…552)

Signed-off-by: Tyler Ohlsen <[email protected]>
(cherry picked from commit 08b97d3)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
github-actions[bot] committed Dec 28, 2024
1 parent 8ee7843 commit fae5cdf
Show file tree
Hide file tree
Showing 4 changed files with 336 additions and 62 deletions.
92 changes: 71 additions & 21 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,31 +77,81 @@ export const SEARCH_CONNECTORS_NODE_API_PATH = `${BASE_CONNECTOR_NODE_API_PATH}/
* based on the specified remote model from a remote service, if found
*/

// Cohere
export const COHERE_DIMENSIONS = {
[`embed-english-v3.0`]: 1024,
[`embed-english-light-v3.0`]: 384,
[`embed-multilingual-v3.0`]: 1024,
[`embed-multilingual-light-v3.0`]: 384,
[`embed-english-v2.0`]: 4096,
[`embed-english-light-v2.0`]: 1024,
[`embed-multilingual-v2.0`]: 768,
interface RemoteEmbeddingModelConfig {
dimension: number;
fieldName: string;
}

// Amazon BedRock
export const BEDROCK_CONFIGS = {
[`amazon.titan-embed-text-v1`]: {
dimension: 1536,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`amazon.titan-embed-text-v2`]: {
dimension: 1024,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`amazon.titan-embed-image-v1`]: {
dimension: 1024,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`cohere.embed-english-v3`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`cohere.embed-multilingual-v3`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
};

// OpenAI
export const OPENAI_DIMENSIONS = {
[`text-embedding-3-small`]: 1536,
[`text-embedding-3-large`]: 3072,
[`text-embedding-ada-002`]: 1536,
// Cohere
export const COHERE_CONFIGS = {
[`embed-english-v3.0`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-english-light-v3.0`]: {
dimension: 384,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-multilingual-v3.0`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-multilingual-light-v3.0`]: {
dimension: 384,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-english-v2.0`]: {
dimension: 4096,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-english-light-v2.0`]: {
dimension: 1024,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
[`embed-multilingual-v2.0`]: {
dimension: 768,
fieldName: 'embeddings',
} as RemoteEmbeddingModelConfig,
};

// Amazon BedRock
export const BEDROCK_DIMENSIONS = {
[`amazon.titan-embed-text-v1`]: 1536,
[`amazon.titan-embed-text-v2`]: 1024,
[`amazon.titan-embed-image-v1`]: 1024,
[`cohere.embed-english-v3`]: 1024, // same as Cohere directly
[`cohere.embed-multilingual-v3`]: 1024, // same as Cohere directly
// OpenAI
export const OPENAI_CONFIGS = {
[`text-embedding-3-small`]: {
dimension: 1536,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`text-embedding-3-large`]: {
dimension: 3072,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
[`text-embedding-ada-002`]: {
dimension: 1536,
fieldName: 'embedding',
} as RemoteEmbeddingModelConfig,
};

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,129 @@
* SPDX-License-Identifier: Apache-2.0
*/

import React from 'react';
import React, { useEffect } from 'react';
import { useSelector } from 'react-redux';
import { isEmpty } from 'lodash';
import {
EuiAccordion,
EuiFlexGroup,
EuiFlexItem,
EuiSpacer,
} from '@elastic/eui';
import { JsonField } from '../input_fields';
import { getIn, useFormikContext } from 'formik';
import { WorkflowFormValues } from '../../../../../common';
import { AppState } from '../../../../store';
import {
getEmbeddingField,
getEmbeddingModelDimensions,
getUpdatedIndexMappings,
getUpdatedIndexSettings,
isKnnIndex,
removeVectorFieldFromIndexMappings,
} from '../../../../utils';

interface AdvancedSettingsProps {}

/**
* Input component for configuring ingest-side advanced settings
*/
export function AdvancedSettings(props: AdvancedSettingsProps) {
const { values, setFieldValue } = useFormikContext<WorkflowFormValues>();
const { models, connectors } = useSelector((state: AppState) => state.ml);
const ingestMLProcessors = (Object.values(
values?.ingest?.enrich
) as any[]).filter((ingestProcessor) => ingestProcessor?.model !== undefined);
const ingestProcessorModelIds = ingestMLProcessors
.map((ingestProcessor) => ingestProcessor?.model?.id as string | undefined)
.filter((modelId) => !isEmpty(modelId));
const indexMappingsPath = 'ingest.index.mappings';
const indexSettingsPath = 'ingest.index.settings';
const curMappings = getIn(values, indexMappingsPath);
const curSettings = getIn(values, indexSettingsPath);

// listen on when processor with models are added / removed. dynamically update index
// settings to be knn-enabled or knn-disabled.
useEffect(() => {
if (ingestProcessorModelIds.length > 0) {
ingestProcessorModelIds.forEach((ingestProcessorModelId) => {
const processorModel = Object.values(models).find(
(model) => model.id === ingestProcessorModelId
);
if (processorModel?.connectorId !== undefined) {
const processorConnector = connectors[processorModel?.connectorId];
const dimension = getEmbeddingModelDimensions(processorConnector);

// If a dimension is found, it is a known embedding model.
// Ensure the index is configured to be knn-enabled.
if (dimension !== undefined) {
if (!isKnnIndex(curSettings)) {
setFieldValue(
indexSettingsPath,
getUpdatedIndexSettings(curSettings, true)
);
}
}
}
});
} else {
if (isKnnIndex(curSettings)) {
setFieldValue(
indexSettingsPath,
getUpdatedIndexSettings(curSettings, false)
);
}
}
}, [ingestProcessorModelIds.length]);

// listener on when there are updates to any ingest processors. Try to update
// any index mappings accordingly, such as setting the knn_vector mappings
// for models that output vector embeddings, or removing any mappings, if no ML
// processor defined.
useEffect(() => {
if (ingestMLProcessors.length > 0) {
ingestMLProcessors.forEach((ingestMLProcessor) => {
const processorModel = Object.values(models).find(
(model) => model.id === ingestMLProcessor?.model?.id
);
if (processorModel?.connectorId !== undefined) {
const processorConnector = connectors[processorModel?.connectorId];
const dimension = getEmbeddingModelDimensions(processorConnector);
const embeddingFieldName = getEmbeddingField(
processorConnector,
ingestMLProcessor
);
if (embeddingFieldName !== undefined && dimension !== undefined) {
setFieldValue(
indexMappingsPath,
getUpdatedIndexMappings(
curMappings,
embeddingFieldName,
dimension
)
);
}
}
});
} else {
setFieldValue(
indexMappingsPath,
removeVectorFieldFromIndexMappings(curMappings)
);
}
}, [getIn(values, 'ingest.enrich')]);

return (
<EuiFlexGroup direction="column">
<EuiFlexItem grow={false}>
<EuiAccordion id="advancedSettings" buttonContent="Advanced settings">
<EuiSpacer size="s" />
<EuiFlexGroup direction="column">
<EuiFlexItem>
<JsonField
label="Index mappings"
fieldPath={'ingest.index.mappings'}
/>
<JsonField label="Index mappings" fieldPath={indexMappingsPath} />
</EuiFlexItem>
<EuiFlexItem>
<JsonField
label="Index settings"
fieldPath={'ingest.index.settings'}
/>
<JsonField label="Index settings" fieldPath={indexSettingsPath} />
</EuiFlexItem>
</EuiFlexGroup>
</EuiAccordion>
Expand Down
36 changes: 5 additions & 31 deletions public/pages/workflows/new_workflow/quick_configure_inputs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,18 @@ import {
EuiCompressedFieldNumber,
} from '@elastic/eui';
import {
BEDROCK_DIMENSIONS,
COHERE_DIMENSIONS,
DEFAULT_IMAGE_FIELD,
DEFAULT_LLM_RESPONSE_FIELD,
DEFAULT_TEXT_FIELD,
DEFAULT_VECTOR_FIELD,
MODEL_STATE,
Model,
ModelInterface,
OPENAI_DIMENSIONS,
QuickConfigureFields,
WORKFLOW_TYPE,
} from '../../../../common';
import { AppState } from '../../../store';
import { parseModelInputs } from '../../../utils';
import { getEmbeddingModelDimensions, parseModelInputs } from '../../../utils';
import { get } from 'lodash';

interface QuickConfigureInputsProps {
Expand Down Expand Up @@ -121,33 +118,10 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) {
if (selectedModel?.connectorId !== undefined) {
const connector = connectors[selectedModel.connectorId];
if (connector !== undefined) {
// some APIs allow specifically setting the dimensions at runtime,
// so we check for that first.
if (connector.parameters?.dimensions !== undefined) {
setFieldValues({
...fieldValues,
embeddingLength: connector.parameters?.dimensions,
});
} else if (connector.parameters?.model !== undefined) {
const dimensions =
// @ts-ignore
COHERE_DIMENSIONS[connector.parameters?.model] ||
// @ts-ignore
OPENAI_DIMENSIONS[connector.parameters?.model] ||
// @ts-ignore
BEDROCK_DIMENSIONS[connector.parameters?.model];
if (dimensions !== undefined) {
setFieldValues({
...fieldValues,
embeddingLength: dimensions,
});
}
} else {
setFieldValues({
...fieldValues,
embeddingLength: undefined,
});
}
setFieldValues({
...fieldValues,
embeddingLength: getEmbeddingModelDimensions(connector),
});
}
}
}, [fieldValues.modelId, deployedModels, connectors]);
Expand Down
Loading

0 comments on commit fae5cdf

Please sign in to comment.