diff --git a/apps/api/src/common/repositories/dtos/create-knn-index.dto.ts b/apps/api/src/common/repositories/dtos/create-knn-index.dto.ts new file mode 100644 index 000000000..9e38f3147 --- /dev/null +++ b/apps/api/src/common/repositories/dtos/create-knn-index.dto.ts @@ -0,0 +1,26 @@ +/** + * Copyright 2023 LINE Corporation + * + * LINE Corporation licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +export class CreateKnnIndexDto { + index: string; + dimension: number; + spaceType: SpaceType; +} + +export enum SpaceType { + L2 = 'l2', + CosineSimil = 'cosinesimilar', + InnerProduct = 'innerproduct', +} \ No newline at end of file diff --git a/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts b/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts new file mode 100644 index 000000000..3d0815dca --- /dev/null +++ b/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts @@ -0,0 +1,20 @@ +/** + * Copyright 2023 LINE Corporation + * + * LINE Corporation licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +export class GetSimilarDataDto { + index: string; + topK: number; + embedding: number[]; +} diff --git a/apps/api/src/common/repositories/dtos/index.ts b/apps/api/src/common/repositories/dtos/index.ts index 180deb782..9447696cf 100644 --- a/apps/api/src/common/repositories/dtos/index.ts +++ b/apps/api/src/common/repositories/dtos/index.ts @@ -14,9 +14,11 @@ * under the License. */ export { CreateIndexDto } from './create-index.dto'; +export { CreateKnnIndexDto } from './create-knn-index.dto'; export { PutMappingsDto } from './put-mappings.dto'; export { CreateDataDto } from './create-data.dto'; export { GetDataDto } from './get-data.dto'; +export { GetSimilarDataDto } from './get-similar-data.dto'; export { UpdateDataDto } from './update-data.dto'; export { DeleteBulkDataDto } from './delete-bulk-data.dto'; export { ScrollDto } from './scroll.dto'; diff --git a/apps/api/src/common/repositories/opensearch.repository.spec.ts b/apps/api/src/common/repositories/opensearch.repository.spec.ts index c4075a903..05a91f712 100644 --- a/apps/api/src/common/repositories/opensearch.repository.spec.ts +++ b/apps/api/src/common/repositories/opensearch.repository.spec.ts @@ -23,8 +23,9 @@ import type { Client } from '@opensearch-project/opensearch'; import type { TextProperty } from '@opensearch-project/opensearch/api/_types/_common.mapping'; import { getMockProvider } from '@/test-utils/util-functions'; -import { CreateDataDto, PutMappingsDto } from './dtos'; +import {CreateDataDto, PutMappingsDto} from './dtos'; import { OpensearchRepository } from './opensearch.repository'; +import {SpaceType} from "@/common/repositories/dtos/create-knn-index.dto"; const MockClient = { indices: { @@ -283,4 +284,56 @@ describe('Opensearch Repository Test suite', () => { describe('getTotal', () => { return; }); + + describe('createKNNIndex', () => { + it('positive case', async () => { + const index = faker.number.int().toString(); + const spaceType = SpaceType.CosineSimil; + const dimension = 3072; + const indexName = 'si_' + index.toLowerCase() + '_' + spaceType.toString().toLowerCase(); + jest.spyOn(osClient.indices, 'create'); + jest.spyOn(osClient.indices, 'putAlias'); + + await osRepo.createKNNIndex({ index, dimension, spaceType }); + + expect(osClient.indices.create).toBeCalledTimes(1); + expect(osClient.indices.create).toBeCalledWith({ + index: indexName, + body: { + settings: { + index: { + knn: true, + 'knn.algo_param.ef_search': 100, + }, + }, + mappings: { + properties: { + embedding: { + type: 'knn_vector', + dimension: 3072, + method: { + name: 'hnsw', + space_type: spaceType, + engine: 'nmslib', + parameters: { + ef_construction: 100, + m: 16, + }, + }, + }, + }, + } + }, + }); + expect(osClient.indices.putAlias).toBeCalledTimes(1); + expect(osClient.indices.putAlias).toBeCalledWith({ + index: indexName, + name: index, + }); + }); + }) + + describe('getSimilarData', () => { + return; + }) }); diff --git a/apps/api/src/common/repositories/opensearch.repository.ts b/apps/api/src/common/repositories/opensearch.repository.ts index c5ac43539..1c8160367 100644 --- a/apps/api/src/common/repositories/opensearch.repository.ts +++ b/apps/api/src/common/repositories/opensearch.repository.ts @@ -25,11 +25,11 @@ import { } from '@nestjs/common'; import { Client, errors } from '@opensearch-project/opensearch'; -import type { +import { CreateDataDto, - CreateIndexDto, + CreateIndexDto, CreateKnnIndexDto, DeleteBulkDataDto, - GetDataDto, + GetDataDto, GetSimilarDataDto, PutMappingsDto, ScrollDto, UpdateDataDto, @@ -76,6 +76,64 @@ export class OpensearchRepository { }); } + async createKNNIndex({ index, spaceType }: CreateKnnIndexDto) { + const indexName = 'si_' + index.toLowerCase() + '_' + spaceType.toString().toLowerCase(); + + await this.opensearchClient.indices.create({ + index: indexName, + body: { + settings: { + index: { + knn: true, + 'knn.algo_param.ef_search': 100, + }, + }, + mappings: { + properties: { + embedding: { + type: 'knn_vector', + dimension: 3072, + method: { + name: 'hnsw', + space_type: spaceType, + engine: 'nmslib', + parameters: { + ef_construction: 100, + m: 16, + }, + }, + }, + }, + } + }, + }); + await this.opensearchClient.indices.putAlias({ + index: indexName, + name: index, + }); + } + + async getSimilarData({ index, topK, embedding }: GetSimilarDataDto) { + const { body } = await this.opensearchClient.search({ + index: index, + body: { + query: { + knn: { + embedding: { + vector: embedding, + k: topK, + }, + }, + }, + }, + }); + + return body.hits.hits.map((v) => ({ + id: v._id, + score: v._score, + })); + } + async putMappings({ index, mappings }: PutMappingsDto) { const { statusCode } = await this.opensearchClient.indices.exists({ index,