From e97e59a928eb809fce1550ac99e4d27aaf466515 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Fri, 21 Mar 2025 23:10:01 +0900 Subject: [PATCH 1/8] feat(opensearch): add CreateKnnIndexDto and implement createKnnIndex method for KNN index creation --- .../repositories/dtos/create-knn-index.dto.ts | 26 ++++++++++++ .../api/src/common/repositories/dtos/index.ts | 1 + .../repositories/opensearch.repository.ts | 41 ++++++++++++++++++- 3 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 apps/api/src/common/repositories/dtos/create-knn-index.dto.ts diff --git a/apps/api/src/common/repositories/dtos/create-knn-index.dto.ts b/apps/api/src/common/repositories/dtos/create-knn-index.dto.ts new file mode 100644 index 000000000..9e38f3147 --- /dev/null +++ b/apps/api/src/common/repositories/dtos/create-knn-index.dto.ts @@ -0,0 +1,26 @@ +/** + * Copyright 2023 LINE Corporation + * + * LINE Corporation licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +export class CreateKnnIndexDto { + index: string; + dimension: number; + spaceType: SpaceType; +} + +export enum SpaceType { + L2 = 'l2', + CosineSimil = 'cosinesimilar', + InnerProduct = 'innerproduct', +} \ No newline at end of file diff --git a/apps/api/src/common/repositories/dtos/index.ts b/apps/api/src/common/repositories/dtos/index.ts index 180deb782..693c084e9 100644 --- a/apps/api/src/common/repositories/dtos/index.ts +++ b/apps/api/src/common/repositories/dtos/index.ts @@ -14,6 +14,7 @@ * under the License. */ export { CreateIndexDto } from './create-index.dto'; +export { CreateKnnIndexDto } from './create-knn-index.dto'; export { PutMappingsDto } from './put-mappings.dto'; export { CreateDataDto } from './create-data.dto'; export { GetDataDto } from './get-data.dto'; diff --git a/apps/api/src/common/repositories/opensearch.repository.ts b/apps/api/src/common/repositories/opensearch.repository.ts index c5ac43539..7cc1ec5bb 100644 --- a/apps/api/src/common/repositories/opensearch.repository.ts +++ b/apps/api/src/common/repositories/opensearch.repository.ts @@ -25,9 +25,9 @@ import { } from '@nestjs/common'; import { Client, errors } from '@opensearch-project/opensearch'; -import type { +import { CreateDataDto, - CreateIndexDto, + CreateIndexDto, CreateKnnIndexDto, DeleteBulkDataDto, GetDataDto, PutMappingsDto, @@ -76,6 +76,43 @@ export class OpensearchRepository { }); } + async createKnnIndex({ index, spaceType }: CreateKnnIndexDto) { + const indexName = 'si_' + index + '_' + spaceType.toString(); + + await this.opensearchClient.indices.create({ + index: indexName, + body: { + settings: { + index: { + knn: true, + 'knn.algo_param.ef_search': 100, + }, + }, + mappings: { + properties: { + embedding: { + type: 'knn_vector', + dimension: 3072, + method: { + name: 'hnsw', + space_type: spaceType, + engine: 'nmslib', + parameters: { + ef_construction: 100, + m: 16, + }, + }, + }, + }, + } + }, + }); + await this.opensearchClient.indices.putAlias({ + index: indexName, + name: index, + }); + } + async putMappings({ index, mappings }: PutMappingsDto) { const { statusCode } = await this.opensearchClient.indices.exists({ index, From 46fda9b27d2ff654329b936cce0c5c60884393e3 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Fri, 21 Mar 2025 23:17:21 +0900 Subject: [PATCH 2/8] feat(opensearch): add GetSimilarDataDto and implement searchSimilarData method for KNN search functionality --- .../repositories/dtos/get-similar-data.dto.ts | 19 +++++++++++++++ .../api/src/common/repositories/dtos/index.ts | 1 + .../repositories/opensearch.repository.ts | 23 ++++++++++++++++++- 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 apps/api/src/common/repositories/dtos/get-similar-data.dto.ts diff --git a/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts b/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts new file mode 100644 index 000000000..1a5a9f1bd --- /dev/null +++ b/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts @@ -0,0 +1,19 @@ +/** + * Copyright 2023 LINE Corporation + * + * LINE Corporation licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +export class GetSimilarDataDto { + index: string; + embedding: number[]; +} diff --git a/apps/api/src/common/repositories/dtos/index.ts b/apps/api/src/common/repositories/dtos/index.ts index 693c084e9..9447696cf 100644 --- a/apps/api/src/common/repositories/dtos/index.ts +++ b/apps/api/src/common/repositories/dtos/index.ts @@ -18,6 +18,7 @@ export { CreateKnnIndexDto } from './create-knn-index.dto'; export { PutMappingsDto } from './put-mappings.dto'; export { CreateDataDto } from './create-data.dto'; export { GetDataDto } from './get-data.dto'; +export { GetSimilarDataDto } from './get-similar-data.dto'; export { UpdateDataDto } from './update-data.dto'; export { DeleteBulkDataDto } from './delete-bulk-data.dto'; export { ScrollDto } from './scroll.dto'; diff --git a/apps/api/src/common/repositories/opensearch.repository.ts b/apps/api/src/common/repositories/opensearch.repository.ts index 7cc1ec5bb..c04a2b9b7 100644 --- a/apps/api/src/common/repositories/opensearch.repository.ts +++ b/apps/api/src/common/repositories/opensearch.repository.ts @@ -29,7 +29,7 @@ import { CreateDataDto, CreateIndexDto, CreateKnnIndexDto, DeleteBulkDataDto, - GetDataDto, + GetDataDto, GetSimilarDataDto, PutMappingsDto, ScrollDto, UpdateDataDto, @@ -113,6 +113,27 @@ export class OpensearchRepository { }); } + async searchSimilarData({ index, embedding }: GetSimilarDataDto) { + const { body } = await this.opensearchClient.search({ + index: index, + body: { + query: { + knn: { + embedding: { + vector: embedding, + k: 5, + }, + }, + }, + }, + }); + + return body.hits.hits.map((v) => ({ + id: v._id, + score: v._score, + })); + } + async putMappings({ index, mappings }: PutMappingsDto) { const { statusCode } = await this.opensearchClient.indices.exists({ index, From 43774242bfd06dd04188193f42e895f3f6d48614 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Mon, 24 Mar 2025 19:09:41 +0900 Subject: [PATCH 3/8] feat(opensearch): rename createKnnIndex to createKNNIndex and normalize index name casing --- apps/api/src/common/repositories/opensearch.repository.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/common/repositories/opensearch.repository.ts b/apps/api/src/common/repositories/opensearch.repository.ts index c04a2b9b7..4b514532f 100644 --- a/apps/api/src/common/repositories/opensearch.repository.ts +++ b/apps/api/src/common/repositories/opensearch.repository.ts @@ -76,8 +76,8 @@ export class OpensearchRepository { }); } - async createKnnIndex({ index, spaceType }: CreateKnnIndexDto) { - const indexName = 'si_' + index + '_' + spaceType.toString(); + async createKNNIndex({ index, spaceType }: CreateKnnIndexDto) { + const indexName = 'si_' + index.toLowerCase() + '_' + spaceType.toString().toLowerCase(); await this.opensearchClient.indices.create({ index: indexName, From 2d8eeb856dc248d062963794f0f50d0f74e7bb88 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Wed, 26 Mar 2025 22:20:47 +0900 Subject: [PATCH 4/8] feat(opensearch): add unit test for createKNNIndex method in OpensearchRepository --- .../opensearch.repository.spec.ts | 55 ++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/apps/api/src/common/repositories/opensearch.repository.spec.ts b/apps/api/src/common/repositories/opensearch.repository.spec.ts index c4075a903..4ecfb369d 100644 --- a/apps/api/src/common/repositories/opensearch.repository.spec.ts +++ b/apps/api/src/common/repositories/opensearch.repository.spec.ts @@ -23,8 +23,9 @@ import type { Client } from '@opensearch-project/opensearch'; import type { TextProperty } from '@opensearch-project/opensearch/api/_types/_common.mapping'; import { getMockProvider } from '@/test-utils/util-functions'; -import { CreateDataDto, PutMappingsDto } from './dtos'; +import {CreateDataDto, CreateKnnIndexDto, PutMappingsDto} from './dtos'; import { OpensearchRepository } from './opensearch.repository'; +import {SpaceType} from "@/common/repositories/dtos/create-knn-index.dto"; const MockClient = { indices: { @@ -283,4 +284,56 @@ describe('Opensearch Repository Test suite', () => { describe('getTotal', () => { return; }); + + describe('createKNNIndex', () => { + it('positive case', async () => { + const index = faker.number.int().toString(); + const spaceType = SpaceType.CosineSimil; + const indexName = 'si_' + index.toLowerCase() + '_' + spaceType.toString().toLowerCase(); + jest.spyOn(osClient.indices, 'create'); + jest.spyOn(osClient.indices, 'putAlias'); + + await osRepo.createIndex({ index }); + + expect(osClient.indices.create).toBeCalledTimes(1); + expect(osClient.indices.create).toBeCalledWith({ + index: indexName, + body: { + settings: { + index: { + knn: true, + 'knn.algo_param.ef_search': 100, + }, + }, + mappings: { + properties: { + embedding: { + type: 'knn_vector', + dimension: 3072, + method: { + name: 'hnsw', + space_type: spaceType, + engine: 'nmslib', + parameters: { + ef_construction: 100, + m: 16, + }, + }, + }, + }, + } + }, + }); + expect(osClient.indices.putAlias).toBeCalledTimes(1); + expect(osClient.indices.putAlias).toBeCalledWith({ + index: indexName, + name: index, + }); + }); + }) + + describe('getSimilarData', () => { + // TODO: Implement this test + return; + }) }); From f789c0bba05d724b66fc272feeacca55a8de7d13 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Thu, 27 Mar 2025 07:41:51 +0900 Subject: [PATCH 5/8] refactor(opensearch): rename searchSimilarData to getSimilarData for clarity --- apps/api/src/common/repositories/opensearch.repository.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/common/repositories/opensearch.repository.ts b/apps/api/src/common/repositories/opensearch.repository.ts index 4b514532f..6b069eced 100644 --- a/apps/api/src/common/repositories/opensearch.repository.ts +++ b/apps/api/src/common/repositories/opensearch.repository.ts @@ -113,7 +113,7 @@ export class OpensearchRepository { }); } - async searchSimilarData({ index, embedding }: GetSimilarDataDto) { + async getSimilarData({ index, embedding }: GetSimilarDataDto) { const { body } = await this.opensearchClient.search({ index: index, body: { From d4357e6e722095d03444e6bd764958bee763ec81 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Thu, 27 Mar 2025 07:43:04 +0900 Subject: [PATCH 6/8] feat(opensearch): add topK parameter to GetSimilarDataDto and update getSimilarData method to use it --- apps/api/src/common/repositories/dtos/get-similar-data.dto.ts | 1 + apps/api/src/common/repositories/opensearch.repository.ts | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts b/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts index 1a5a9f1bd..3d0815dca 100644 --- a/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts +++ b/apps/api/src/common/repositories/dtos/get-similar-data.dto.ts @@ -15,5 +15,6 @@ */ export class GetSimilarDataDto { index: string; + topK: number; embedding: number[]; } diff --git a/apps/api/src/common/repositories/opensearch.repository.ts b/apps/api/src/common/repositories/opensearch.repository.ts index 6b069eced..1c8160367 100644 --- a/apps/api/src/common/repositories/opensearch.repository.ts +++ b/apps/api/src/common/repositories/opensearch.repository.ts @@ -113,7 +113,7 @@ export class OpensearchRepository { }); } - async getSimilarData({ index, embedding }: GetSimilarDataDto) { + async getSimilarData({ index, topK, embedding }: GetSimilarDataDto) { const { body } = await this.opensearchClient.search({ index: index, body: { @@ -121,7 +121,7 @@ export class OpensearchRepository { knn: { embedding: { vector: embedding, - k: 5, + k: topK, }, }, }, From 7b8929a2dc4fe3180d2bf9d00a4a6a9dcb53e866 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Thu, 27 Mar 2025 07:43:22 +0900 Subject: [PATCH 7/8] test(opensearch): remove TODO comment for getSimilarData test implementation --- apps/api/src/common/repositories/opensearch.repository.spec.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/api/src/common/repositories/opensearch.repository.spec.ts b/apps/api/src/common/repositories/opensearch.repository.spec.ts index 4ecfb369d..fa41d2340 100644 --- a/apps/api/src/common/repositories/opensearch.repository.spec.ts +++ b/apps/api/src/common/repositories/opensearch.repository.spec.ts @@ -333,7 +333,6 @@ describe('Opensearch Repository Test suite', () => { }) describe('getSimilarData', () => { - // TODO: Implement this test return; }) }); From 296109fa55b9cbd49e5754295c8eadcd2bc1b0a9 Mon Sep 17 00:00:00 2001 From: yoseplee Date: Thu, 27 Mar 2025 07:45:56 +0900 Subject: [PATCH 8/8] feat(opensearch): update createIndex to createKNNIndex with dimension parameter --- .../src/common/repositories/opensearch.repository.spec.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/common/repositories/opensearch.repository.spec.ts b/apps/api/src/common/repositories/opensearch.repository.spec.ts index fa41d2340..05a91f712 100644 --- a/apps/api/src/common/repositories/opensearch.repository.spec.ts +++ b/apps/api/src/common/repositories/opensearch.repository.spec.ts @@ -23,7 +23,7 @@ import type { Client } from '@opensearch-project/opensearch'; import type { TextProperty } from '@opensearch-project/opensearch/api/_types/_common.mapping'; import { getMockProvider } from '@/test-utils/util-functions'; -import {CreateDataDto, CreateKnnIndexDto, PutMappingsDto} from './dtos'; +import {CreateDataDto, PutMappingsDto} from './dtos'; import { OpensearchRepository } from './opensearch.repository'; import {SpaceType} from "@/common/repositories/dtos/create-knn-index.dto"; @@ -289,11 +289,12 @@ describe('Opensearch Repository Test suite', () => { it('positive case', async () => { const index = faker.number.int().toString(); const spaceType = SpaceType.CosineSimil; + const dimension = 3072; const indexName = 'si_' + index.toLowerCase() + '_' + spaceType.toString().toLowerCase(); jest.spyOn(osClient.indices, 'create'); jest.spyOn(osClient.indices, 'putAlias'); - await osRepo.createIndex({ index }); + await osRepo.createKNNIndex({ index, dimension, spaceType }); expect(osClient.indices.create).toBeCalledTimes(1); expect(osClient.indices.create).toBeCalledWith({