Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Support optional params for all processors; onboard text_chunking processor #266

Merged
merged 1 commit into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,13 @@ export enum WORKFLOW_TYPE {
UNKNOWN = 'Unknown',
}

// the names should be consistent with the underlying implementation. used when generating the
// final ingest/search pipeline configurations.
export enum PROCESSOR_TYPE {
ML = 'ml_processor',
SPLIT = 'split_processor',
SORT = 'sort_processor',
ML = 'ml_inference',
SPLIT = 'split',
SORT = 'sort',
TEXT_CHUNKING = 'text_chunking',
}

export enum MODEL_TYPE {
Expand Down Expand Up @@ -118,6 +121,24 @@ export const ML_INFERENCE_DOCS_LINK =
'https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/#configuration-parameters';
export const ML_CHOOSE_MODEL_LINK =
'https://opensearch.org/docs/latest/ml-commons-plugin/integrating-ml-models/#choosing-a-model';
export const TEXT_CHUNKING_PROCESSOR_LINK =
'https://opensearch.org/docs/latest/ingest-pipelines/processors/text-chunking/';

/**
* Text chunking algorithm constants
*/
export enum TEXT_CHUNKING_ALGORITHM {
FIXED_TOKEN_LENGTH = 'fixed_token_length',
DELIMITER = 'delimiter',
}
export const FIXED_TOKEN_LENGTH_OPTIONAL_FIELDS = [
'token_limit',
'tokenizer',
'overlap_rate',
];
export const DELIMITER_OPTIONAL_FIELDS = ['delimiter'];
export const SHARED_OPTIONAL_FIELDS = ['max_chunk_limit', 'description', 'tag'];

/**
* MISCELLANEOUS
*/
Expand Down
7 changes: 4 additions & 3 deletions common/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ export type ConfigFieldType =
| 'select'
| 'model'
| 'map'
| 'mapArray';
| 'mapArray'
| 'boolean'
| 'number';

export type ConfigFieldValue = string | {};

export interface IConfigField {
type: ConfigFieldType;
id: string;
optional?: boolean;
label?: string;
value?: ConfigFieldValue;
selectOptions?: ConfigFieldValue[];
}
Expand All @@ -41,6 +41,7 @@ export interface IConfig {
id: string;
name: string;
fields: IConfigField[];
optionalFields?: IConfigField[];
}

export interface IProcessorConfig extends IConfig {
Expand Down
5 changes: 3 additions & 2 deletions public/configs/base_config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ export abstract class BaseConfig implements IConfig {
id: string;
name: string;
fields: IConfigField[];
// TODO: have a dedicated optional fields list to display more fields & have more
// flexibility for the users to customize
optionalFields?: IConfigField[];

// No-op constructor. If there are general / defaults for field values, add in here.
constructor() {
this.id = '';
this.name = '';
this.fields = [];
this.optionalFields = [];
}

// Persist a standard toObj() fn that all component classes can use. This is necessary
Expand All @@ -29,6 +29,7 @@ export abstract class BaseConfig implements IConfig {
id: this.id,
name: this.name,
fields: this.fields,
optionalFields: this.optionalFields,
} as IConfig;
}
}
1 change: 1 addition & 0 deletions public/configs/ingest_processors/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
export * from './ml_ingest_processor';
export * from './split_ingest_processor';
export * from './sort_ingest_processor';
export * from './text_chunking_ingest_processor';
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

import { PROCESSOR_TYPE, TEXT_CHUNKING_ALGORITHM } from '../../../common';
import { generateId } from '../../utils';
import { Processor } from '../processor';

/**
* The text chunking ingest processor
*/
export class TextChunkingIngestProcessor extends Processor {
constructor() {
super();
this.name = 'Text Chunking Processor';
this.type = PROCESSOR_TYPE.TEXT_CHUNKING;
this.id = generateId('text_chunking_processor_ingest');
this.fields = [
{
id: 'field_map',
type: 'map',
},
{
id: 'algorithm',
type: 'select',
selectOptions: [
TEXT_CHUNKING_ALGORITHM.FIXED_TOKEN_LENGTH,
TEXT_CHUNKING_ALGORITHM.DELIMITER,
],
},
];
// optional params include all of those possible from both text chunking algorithms.
// for more details, see https://opensearch.org/docs/latest/ingest-pipelines/processors/text-chunking/
// the list of optional params per algorithm and shared across algorithms is persisted in
// common/constants.ts
this.optionalFields = [
// fixed_token_length optional params
{
id: 'token_limit',
type: 'number',
value: 384,
},
{
id: 'tokenizer',
type: 'string',
value: 'standard',
},
{
id: 'overlap_rate',
type: 'number',
value: 0,
},
// delimiter optional params
{
id: 'delimiter',
type: 'string',
},
// shared optional params (independent of algorithm)
{
id: 'max_chunk_limit',
type: 'number',
value: 100,
},
{
id: 'description',
type: 'string',
},
{
id: 'tag',
type: 'string',
},
];
}
}
38 changes: 36 additions & 2 deletions public/configs/ml_processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,47 @@ export abstract class MLProcessor extends Processor {
type: 'model',
},
{
id: 'inputMap',
id: 'input_map',
type: 'mapArray',
},
{
id: 'outputMap',
id: 'output_map',
type: 'mapArray',
},
];
this.optionalFields = [
{
id: 'description',
type: 'string',
},
{
id: 'model_config',
type: 'json',
},
{
id: 'full_response_path',
type: 'boolean',
value: false,
},
{
id: 'ignore_missing',
type: 'boolean',
value: false,
},
{
id: 'ignore_failure',
type: 'boolean',
value: false,
},
{
id: 'max_prediction_tasks',
type: 'number',
value: 10,
},
{
id: 'tag',
type: 'string',
},
];
}
}
17 changes: 14 additions & 3 deletions public/configs/sort_processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,27 @@ export abstract class SortProcessor extends Processor {
{
id: 'field',
type: 'string',
label: 'Field',
},
];
this.optionalFields = [
{
id: 'order',
type: 'select',
label: 'Order',
optional: true,
selectOptions: [SORT_ORDER.ASC, SORT_ORDER.DESC],
value: SORT_ORDER.ASC,
},
{
id: 'target_field',
type: 'string',
},
{
id: 'description',
type: 'string',
},
{
id: 'tag',
type: 'string',
},
];
}
}
23 changes: 21 additions & 2 deletions public/configs/split_processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,31 @@ export abstract class SplitProcessor extends Processor {
{
id: 'field',
type: 'string',
label: 'Field',
},
{
id: 'separator',
type: 'string',
label: 'Separator',
},
];
this.optionalFields = [
// TODO: although listed in docs, this field doesn't seem to exist. Fails
// at regular API level.
// {
// id: 'preserve_field',
// type: 'boolean',
// value: false,
// },
{
id: 'target_field',
type: 'string',
},
{
id: 'description',
type: 'string',
},
{
id: 'tag',
type: 'string',
},
];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,41 @@

import React from 'react';
import { EuiFlexItem, EuiSpacer } from '@elastic/eui';
import { TextField, ModelField, SelectField } from './input_fields';
import { IConfig } from '../../../../common';
import {
TextField,
SelectField,
BooleanField,
NumberField,
} from './input_fields';
import { IConfigField } from '../../../../common';
import { camelCaseToTitleString } from '../../../utils';

/**
* A helper component to format all of the input fields for a component. Dynamically
* render based on the input type.
*/

interface ConfigFieldListProps {
config: IConfig;
configId: string;
configFields: IConfigField[];
baseConfigPath: string; // the base path of the nested config, if applicable. e.g., 'ingest.enrich'
onFormChange: () => void;
}

const CONFIG_FIELD_SPACER_SIZE = 'm';

export function ConfigFieldList(props: ConfigFieldListProps) {
const configFields = props.config.fields || [];
const configId = props.config.id;
return (
<EuiFlexItem grow={false}>
{configFields.map((field, idx) => {
{props.configFields.map((field, idx) => {
let el;
switch (field.type) {
case 'string': {
el = (
<EuiFlexItem key={idx}>
<TextField
// Default to ID if no optional formatted / prettified label provided
label={field.label || field.id}
fieldPath={`${props.baseConfigPath}.${configId}.${field.id}`}
label={camelCaseToTitleString(field.id)}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
showError={true}
onFormChange={props.onFormChange}
/>
Expand All @@ -49,7 +53,43 @@ export function ConfigFieldList(props: ConfigFieldListProps) {
<EuiFlexItem key={idx}>
<SelectField
field={field}
fieldPath={`${props.baseConfigPath}.${configId}.${field.id}`}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
onFormChange={props.onFormChange}
/>
<EuiSpacer size={CONFIG_FIELD_SPACER_SIZE} />
</EuiFlexItem>
);
break;
}
case 'boolean': {
el = (
<EuiFlexItem key={idx}>
<BooleanField
label={camelCaseToTitleString(field.id)}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
onFormChange={props.onFormChange}
enabledOption={{
id: 'true',
label: 'True',
}}
disabledOption={{
id: 'false',
label: 'False',
}}
showLabel={true}
/>
<EuiSpacer size={CONFIG_FIELD_SPACER_SIZE} />
</EuiFlexItem>
);
break;
}
case 'number': {
el = (
<EuiFlexItem key={idx}>
<NumberField
label={camelCaseToTitleString(field.id)}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
showError={true}
onFormChange={props.onFormChange}
/>
<EuiSpacer size={CONFIG_FIELD_SPACER_SIZE} />
Expand Down
Loading
Loading