Skip to content

Commit

Permalink
Support optional params for all processors; onboard text_chunking pro…
Browse files Browse the repository at this point in the history
…cessor (#265) (#266)

Signed-off-by: Tyler Ohlsen <[email protected]>
(cherry picked from commit e2d22c3)

Co-authored-by: Tyler Ohlsen <[email protected]>
  • Loading branch information
opensearch-trigger-bot[bot] and ohltyler authored Aug 6, 2024
1 parent fe93f51 commit 03b5c44
Show file tree
Hide file tree
Showing 26 changed files with 695 additions and 79 deletions.
27 changes: 24 additions & 3 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,13 @@ export enum WORKFLOW_TYPE {
UNKNOWN = 'Unknown',
}

// the names should be consistent with the underlying implementation. used when generating the
// final ingest/search pipeline configurations.
export enum PROCESSOR_TYPE {
ML = 'ml_processor',
SPLIT = 'split_processor',
SORT = 'sort_processor',
ML = 'ml_inference',
SPLIT = 'split',
SORT = 'sort',
TEXT_CHUNKING = 'text_chunking',
}

export enum MODEL_TYPE {
Expand Down Expand Up @@ -118,6 +121,24 @@ export const ML_INFERENCE_DOCS_LINK =
'https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/#configuration-parameters';
export const ML_CHOOSE_MODEL_LINK =
'https://opensearch.org/docs/latest/ml-commons-plugin/integrating-ml-models/#choosing-a-model';
export const TEXT_CHUNKING_PROCESSOR_LINK =
'https://opensearch.org/docs/latest/ingest-pipelines/processors/text-chunking/';

/**
* Text chunking algorithm constants
*/
export enum TEXT_CHUNKING_ALGORITHM {
FIXED_TOKEN_LENGTH = 'fixed_token_length',
DELIMITER = 'delimiter',
}
export const FIXED_TOKEN_LENGTH_OPTIONAL_FIELDS = [
'token_limit',
'tokenizer',
'overlap_rate',
];
export const DELIMITER_OPTIONAL_FIELDS = ['delimiter'];
export const SHARED_OPTIONAL_FIELDS = ['max_chunk_limit', 'description', 'tag'];

/**
* MISCELLANEOUS
*/
Expand Down
7 changes: 4 additions & 3 deletions common/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ export type ConfigFieldType =
| 'select'
| 'model'
| 'map'
| 'mapArray';
| 'mapArray'
| 'boolean'
| 'number';

export type ConfigFieldValue = string | {};

export interface IConfigField {
type: ConfigFieldType;
id: string;
optional?: boolean;
label?: string;
value?: ConfigFieldValue;
selectOptions?: ConfigFieldValue[];
}
Expand All @@ -41,6 +41,7 @@ export interface IConfig {
id: string;
name: string;
fields: IConfigField[];
optionalFields?: IConfigField[];
}

export interface IProcessorConfig extends IConfig {
Expand Down
5 changes: 3 additions & 2 deletions public/configs/base_config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ export abstract class BaseConfig implements IConfig {
id: string;
name: string;
fields: IConfigField[];
// TODO: have a dedicated optional fields list to display more fields & have more
// flexibility for the users to customize
optionalFields?: IConfigField[];

// No-op constructor. If there are general / defaults for field values, add in here.
constructor() {
this.id = '';
this.name = '';
this.fields = [];
this.optionalFields = [];
}

// Persist a standard toObj() fn that all component classes can use. This is necessary
Expand All @@ -29,6 +29,7 @@ export abstract class BaseConfig implements IConfig {
id: this.id,
name: this.name,
fields: this.fields,
optionalFields: this.optionalFields,
} as IConfig;
}
}
1 change: 1 addition & 0 deletions public/configs/ingest_processors/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
export * from './ml_ingest_processor';
export * from './split_ingest_processor';
export * from './sort_ingest_processor';
export * from './text_chunking_ingest_processor';
75 changes: 75 additions & 0 deletions public/configs/ingest_processors/text_chunking_ingest_processor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

import { PROCESSOR_TYPE, TEXT_CHUNKING_ALGORITHM } from '../../../common';
import { generateId } from '../../utils';
import { Processor } from '../processor';

/**
* The text chunking ingest processor
*/
export class TextChunkingIngestProcessor extends Processor {
constructor() {
super();
this.name = 'Text Chunking Processor';
this.type = PROCESSOR_TYPE.TEXT_CHUNKING;
this.id = generateId('text_chunking_processor_ingest');
this.fields = [
{
id: 'field_map',
type: 'map',
},
{
id: 'algorithm',
type: 'select',
selectOptions: [
TEXT_CHUNKING_ALGORITHM.FIXED_TOKEN_LENGTH,
TEXT_CHUNKING_ALGORITHM.DELIMITER,
],
},
];
// optional params include all of those possible from both text chunking algorithms.
// for more details, see https://opensearch.org/docs/latest/ingest-pipelines/processors/text-chunking/
// the list of optional params per algorithm and shared across algorithms is persisted in
// common/constants.ts
this.optionalFields = [
// fixed_token_length optional params
{
id: 'token_limit',
type: 'number',
value: 384,
},
{
id: 'tokenizer',
type: 'string',
value: 'standard',
},
{
id: 'overlap_rate',
type: 'number',
value: 0,
},
// delimiter optional params
{
id: 'delimiter',
type: 'string',
},
// shared optional params (independent of algorithm)
{
id: 'max_chunk_limit',
type: 'number',
value: 100,
},
{
id: 'description',
type: 'string',
},
{
id: 'tag',
type: 'string',
},
];
}
}
38 changes: 36 additions & 2 deletions public/configs/ml_processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,47 @@ export abstract class MLProcessor extends Processor {
type: 'model',
},
{
id: 'inputMap',
id: 'input_map',
type: 'mapArray',
},
{
id: 'outputMap',
id: 'output_map',
type: 'mapArray',
},
];
this.optionalFields = [
{
id: 'description',
type: 'string',
},
{
id: 'model_config',
type: 'json',
},
{
id: 'full_response_path',
type: 'boolean',
value: false,
},
{
id: 'ignore_missing',
type: 'boolean',
value: false,
},
{
id: 'ignore_failure',
type: 'boolean',
value: false,
},
{
id: 'max_prediction_tasks',
type: 'number',
value: 10,
},
{
id: 'tag',
type: 'string',
},
];
}
}
17 changes: 14 additions & 3 deletions public/configs/sort_processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,27 @@ export abstract class SortProcessor extends Processor {
{
id: 'field',
type: 'string',
label: 'Field',
},
];
this.optionalFields = [
{
id: 'order',
type: 'select',
label: 'Order',
optional: true,
selectOptions: [SORT_ORDER.ASC, SORT_ORDER.DESC],
value: SORT_ORDER.ASC,
},
{
id: 'target_field',
type: 'string',
},
{
id: 'description',
type: 'string',
},
{
id: 'tag',
type: 'string',
},
];
}
}
23 changes: 21 additions & 2 deletions public/configs/split_processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,31 @@ export abstract class SplitProcessor extends Processor {
{
id: 'field',
type: 'string',
label: 'Field',
},
{
id: 'separator',
type: 'string',
label: 'Separator',
},
];
this.optionalFields = [
// TODO: although listed in docs, this field doesn't seem to exist. Fails
// at regular API level.
// {
// id: 'preserve_field',
// type: 'boolean',
// value: false,
// },
{
id: 'target_field',
type: 'string',
},
{
id: 'description',
type: 'string',
},
{
id: 'tag',
type: 'string',
},
];
}
Expand Down
60 changes: 50 additions & 10 deletions public/pages/workflow_detail/workflow_inputs/config_field_list.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,41 @@

import React from 'react';
import { EuiFlexItem, EuiSpacer } from '@elastic/eui';
import { TextField, ModelField, SelectField } from './input_fields';
import { IConfig } from '../../../../common';
import {
TextField,
SelectField,
BooleanField,
NumberField,
} from './input_fields';
import { IConfigField } from '../../../../common';
import { camelCaseToTitleString } from '../../../utils';

/**
* A helper component to format all of the input fields for a component. Dynamically
* render based on the input type.
*/

interface ConfigFieldListProps {
config: IConfig;
configId: string;
configFields: IConfigField[];
baseConfigPath: string; // the base path of the nested config, if applicable. e.g., 'ingest.enrich'
onFormChange: () => void;
}

const CONFIG_FIELD_SPACER_SIZE = 'm';

export function ConfigFieldList(props: ConfigFieldListProps) {
const configFields = props.config.fields || [];
const configId = props.config.id;
return (
<EuiFlexItem grow={false}>
{configFields.map((field, idx) => {
{props.configFields.map((field, idx) => {
let el;
switch (field.type) {
case 'string': {
el = (
<EuiFlexItem key={idx}>
<TextField
// Default to ID if no optional formatted / prettified label provided
label={field.label || field.id}
fieldPath={`${props.baseConfigPath}.${configId}.${field.id}`}
label={camelCaseToTitleString(field.id)}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
showError={true}
onFormChange={props.onFormChange}
/>
Expand All @@ -49,7 +53,43 @@ export function ConfigFieldList(props: ConfigFieldListProps) {
<EuiFlexItem key={idx}>
<SelectField
field={field}
fieldPath={`${props.baseConfigPath}.${configId}.${field.id}`}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
onFormChange={props.onFormChange}
/>
<EuiSpacer size={CONFIG_FIELD_SPACER_SIZE} />
</EuiFlexItem>
);
break;
}
case 'boolean': {
el = (
<EuiFlexItem key={idx}>
<BooleanField
label={camelCaseToTitleString(field.id)}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
onFormChange={props.onFormChange}
enabledOption={{
id: 'true',
label: 'True',
}}
disabledOption={{
id: 'false',
label: 'False',
}}
showLabel={true}
/>
<EuiSpacer size={CONFIG_FIELD_SPACER_SIZE} />
</EuiFlexItem>
);
break;
}
case 'number': {
el = (
<EuiFlexItem key={idx}>
<NumberField
label={camelCaseToTitleString(field.id)}
fieldPath={`${props.baseConfigPath}.${props.configId}.${field.id}`}
showError={true}
onFormChange={props.onFormChange}
/>
<EuiSpacer size={CONFIG_FIELD_SPACER_SIZE} />
Expand Down
Loading

0 comments on commit 03b5c44

Please sign in to comment.