diff --git a/go.mod b/go.mod index 4638aa1..988c1d6 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.22.0 toolchain go1.22.5 require ( - github.com/ansys/allie-sharedtypes v0.0.0-20250203115929-b37a165edb46 + github.com/ansys/allie-sharedtypes v0.0.0-20250217150414-2bee9f9ca77e github.com/google/go-github/v56 v56.0.0 github.com/google/uuid v1.6.0 github.com/milvus-io/milvus-sdk-go/v2 v2.4.2 @@ -14,23 +14,24 @@ require ( github.com/tiktoken-go/tokenizer v0.2.0 github.com/tmc/langchaingo v0.1.12 golang.org/x/oauth2 v0.24.0 + golang.org/x/text v0.22.0 google.golang.org/grpc v1.70.0 nhooyr.io/websocket v1.8.17 ) require ( github.com/texttheater/golang-levenshtein v1.0.1 - google.golang.org/protobuf v1.36.4 // indirect + google.golang.org/protobuf v1.36.5 // indirect ) require ( github.com/AssemblyAI/assemblyai-go-sdk v1.7.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.0 // indirect - github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 // indirect + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.1 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.3.3 // indirect github.com/PuerkitoBio/goquery v1.9.2 // indirect github.com/andybalholm/cascadia v1.3.2 // indirect github.com/aymerick/douceur v0.2.0 // indirect @@ -70,12 +71,11 @@ require ( gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.32.0 // indirect + golang.org/x/crypto v0.33.0 // indirect golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect - golang.org/x/net v0.34.0 // indirect - golang.org/x/sync v0.10.0 // indirect - golang.org/x/sys v0.29.0 // indirect - golang.org/x/text v0.21.0 // indirect + golang.org/x/net v0.35.0 // indirect + golang.org/x/sync v0.11.0 // indirect + golang.org/x/sys v0.30.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index 09e824c..4b76402 100644 --- a/go.sum +++ b/go.sum @@ -4,20 +4,20 @@ github.com/AssemblyAI/assemblyai-go-sdk v1.7.0 h1:BoqkOoDyffe8z7JeQVO8H7I1Al3REy github.com/AssemblyAI/assemblyai-go-sdk v1.7.0/go.mod h1:ytTvsjAVL+nXZnzBfDagQ/LxDQaKL9W/eTiCo3ZuPJA= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0 h1:g0EZJwz7xkXQiZAI5xi9f3WWFYBlX1CPTrR+NDToRkQ= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0/go.mod h1:XCW7KnZet0Opnr7HccfUw1PLc4CjHqpcaxW8DHklNkQ= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.1 h1:1mvYtZfWQAnwNah/C+Z+Jb9rQH95LPE2vlmMuWAHJk8= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.1/go.mod h1:75I/mXtme1JyWFtz8GocPHVFyH421IBoZErnO16dd0k= -github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.1 h1:Bk5uOhSAenHyR5P61D/NzeQCv+4fEVV8mOkJ82NqpWw= -github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.1/go.mod h1:QZ4pw3or1WPmRBxf0cHd1tknzrT54WPBOQoGutCPvSU= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.2 h1:F0gBpfdPLGsw+nsgk6aqqkZS1jiixa5WwFe3fk/T3Ys= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.2/go.mod h1:SqINnQ9lVVdRlyC8cd1lCI0SdX4n2paeABd2K8ggfnE= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8= github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY= github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY= -github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.0 h1:WLUIpeyv04H0RCcQHaA4TNoyrQ39Ox7V+re+iaqzTe0= -github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.0/go.mod h1:hd8hTTIY3VmUVPRHNH7GVCHO3SHgXkJKZHReby/bnUQ= -github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.0 h1:eXnN9kaS8TiDwXjoie3hMRLuwdUBUMW9KRgOqB3mCaw= -github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.0/go.mod h1:XIpam8wumeZ5rVMuhdDQLMfIPDf1WO3IzrCRO3e3e3o= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.1 h1:mrkDCdkMsD4l9wjFGhofFHFrV43Y3c53RSLKOCJ5+Ow= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.1/go.mod h1:hPv41DbqMmnxcGralanA/kVlfdH5jv3T4LxGku2E1BY= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.1 h1:bFWuoEKg+gImo7pvkiQEFAc8ocibADgXeiLAxWhWmkI= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.1/go.mod h1:Vih/3yc6yac2JzU4hzpaDupBJP0Flaia9rXXrU8xyww= github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= -github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 h1:kYRSnvJju5gYVyhkij+RTJ/VR6QIUaCfWeaFm2ycsjQ= -github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= +github.com/AzureAD/microsoft-authentication-library-for-go v1.3.3 h1:H5xDQaE3XowWfhZRUpnfC+rGZMEVoSiji+b+/HFAPU4= +github.com/AzureAD/microsoft-authentication-library-for-go v1.3.3/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo= @@ -28,8 +28,8 @@ github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqR github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= -github.com/ansys/allie-sharedtypes v0.0.0-20250203115929-b37a165edb46 h1:HZckJx0ppRWjFeU1gAlqJQIlCZZt2t8d5muzJxqj2lg= -github.com/ansys/allie-sharedtypes v0.0.0-20250203115929-b37a165edb46/go.mod h1:w7m0j6P9+rJ+vD6s5YlwXDNM7JZFUDn/gGV5lSRuyfc= +github.com/ansys/allie-sharedtypes v0.0.0-20250217150414-2bee9f9ca77e h1:7NZ5yq5Z4aopY9THpbghhAJgkQ1S2v+QP1LHigZ5s9c= +github.com/ansys/allie-sharedtypes v0.0.0-20250217150414-2bee9f9ca77e/go.mod h1:U+gsqyWJdVbCKuDbVI4HEKnj+oW4OLHLT67SNWSvMAs= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= @@ -358,8 +358,8 @@ golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc= -golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI= golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= @@ -393,8 +393,8 @@ golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= -golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= +golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= +golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= @@ -407,8 +407,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -436,8 +436,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -450,8 +450,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -504,8 +504,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM= -google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= +google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/main.go b/main.go index dcc831d..c3c6432 100644 --- a/main.go +++ b/main.go @@ -17,6 +17,9 @@ var externalFunctionsFile string //go:embed pkg/externalfunctions/dataextraction.go var dataExtractionFile string +//go:embed pkg/externalfunctions/milvus.go +var milvusFile string + //go:embed pkg/externalfunctions/generic.go var genericFile string @@ -56,6 +59,7 @@ func main() { "knowledge_db": knowledgeDBFile, "llm_handler": llmHandlerFile, "ansys_gpt": ansysGPTFile, + "milvus": milvusFile, } // Load function definitions diff --git a/pkg/externalfunctions/dataextraction.go b/pkg/externalfunctions/dataextraction.go index 5dbfde0..d16b72e 100644 --- a/pkg/externalfunctions/dataextraction.go +++ b/pkg/externalfunctions/dataextraction.go @@ -768,14 +768,6 @@ func StoreElementsInVectorDatabase(elements []sharedtypes.CodeGenerationElement, vectorElements = append(vectorElements, vectorElement) } - // Initialize the vector database. - milvusClient, err := milvus.Initialize() - if err != nil { - errMessage := fmt.Sprintf("Error initializing the vector database: %v", err) - logging.Log.Error(&logging.ContextMap{}, errMessage) - panic(errMessage) - } - // Create the schema for this collection schemaFields := []milvus.SchemaField{ { @@ -783,14 +775,12 @@ func StoreElementsInVectorDatabase(elements []sharedtypes.CodeGenerationElement, Type: "string", }, { - Name: "dense_vector", - Type: "[]float32", - Dimension: config.GlobalConfig.EMBEDDINGS_DIMENSIONS, + Name: "dense_vector", + Type: "[]float32", }, { - Name: "sparse_vector", - Type: "map[uint]float32", - Dimension: config.GlobalConfig.EMBEDDINGS_DIMENSIONS, + Name: "sparse_vector", + Type: "map[uint]float32", }, { Name: "type", @@ -822,7 +812,7 @@ func StoreElementsInVectorDatabase(elements []sharedtypes.CodeGenerationElement, } // Create the collection. - err = milvus.CreateCollection(schema, milvusClient) + err = milvus.CreateCollection(schema) if err != nil { errMessage := fmt.Sprintf("Error creating the collection: %v", err) logging.Log.Error(&logging.ContextMap{}, errMessage) @@ -1111,14 +1101,6 @@ func StoreExamplesInVectorDatabase(examples []sharedtypes.CodeGenerationExample, batchSize = 2 } - // Initialize the vector database. - milvusClient, err := milvus.Initialize() - if err != nil { - errMessage := "error initializing the vector database" - logging.Log.Errorf(&logging.ContextMap{}, "%s: %v", errMessage, err) - panic(fmt.Errorf("%s: %v", errMessage, err)) - } - // Create the schema for this collection schemaFields := []milvus.SchemaField{ { @@ -1138,14 +1120,12 @@ func StoreExamplesInVectorDatabase(examples []sharedtypes.CodeGenerationExample, Type: "string", }, { - Name: "dense_vector", - Type: "[]float32", - Dimension: config.GlobalConfig.EMBEDDINGS_DIMENSIONS, + Name: "dense_vector", + Type: "[]float32", }, { - Name: "sparse_vector", - Type: "map[uint]float32", - Dimension: config.GlobalConfig.EMBEDDINGS_DIMENSIONS, + Name: "sparse_vector", + Type: "map[uint]float32", }, { Name: "text", @@ -1165,7 +1145,7 @@ func StoreExamplesInVectorDatabase(examples []sharedtypes.CodeGenerationExample, } // Create the collection. - err = milvus.CreateCollection(schema, milvusClient) + err = milvus.CreateCollection(schema) if err != nil { errMessage := "error creating the collection" logging.Log.Errorf(&logging.ContextMap{}, "%s: %v", errMessage, err) @@ -1341,14 +1321,6 @@ func StoreUserGuideSectionsInVectorDatabase(sections []sharedtypes.CodeGeneratio batchSize = 2 } - // Initialize the vector database. - milvusClient, err := milvus.Initialize() - if err != nil { - errMessage := "error initializing the vector database" - logging.Log.Errorf(&logging.ContextMap{}, "%s: %v", errMessage, err) - panic(fmt.Errorf("%s: %v", errMessage, err)) - } - // Create the schema for this collection schemaFields := []milvus.SchemaField{ { @@ -1380,14 +1352,12 @@ func StoreUserGuideSectionsInVectorDatabase(sections []sharedtypes.CodeGeneratio Type: "string", }, { - Name: "dense_vector", - Type: "[]float32", - Dimension: config.GlobalConfig.EMBEDDINGS_DIMENSIONS, + Name: "dense_vector", + Type: "[]float32", }, { - Name: "sparse_vector", - Type: "map[uint]float32", - Dimension: config.GlobalConfig.EMBEDDINGS_DIMENSIONS, + Name: "sparse_vector", + Type: "map[uint]float32", }, { Name: "text", @@ -1403,7 +1373,7 @@ func StoreUserGuideSectionsInVectorDatabase(sections []sharedtypes.CodeGeneratio } // Create the collection. - err = milvus.CreateCollection(schema, milvusClient) + err = milvus.CreateCollection(schema) if err != nil { errMessage := "error creating the collection" logging.Log.Errorf(&logging.ContextMap{}, "%s: %v", errMessage, err) @@ -1465,16 +1435,6 @@ func StoreUserGuideSectionsInVectorDatabase(sections []sharedtypes.CodeGeneratio panic(errMessage) } - // dummyDenseVector := make([]float32, config.GlobalConfig.EMBEDDINGS_DIMENSIONS) - // for i := range dummyDenseVector { - // dummyDenseVector[i] = 0.5 - // } - - // dummySparseVector := make(map[uint]float32) - // for i := 0; i < config.GlobalConfig.EMBEDDINGS_DIMENSIONS; i++ { - // dummySparseVector[uint(i)] = 0.5 - // } - // Assign embeddings to the vector database objects. for i := range vectorUserGuideSectionChunks { vectorUserGuideSectionChunks[i].DenseVector = denseEmbeddings[i] @@ -1505,15 +1465,77 @@ func StoreUserGuideSectionsInVectorDatabase(sections []sharedtypes.CodeGeneratio // // Parameters: // - elements: user guide sections. -func StoreUserGuideSectionsInGraphDatabase(sections []sharedtypes.CodeGenerationUserGuideSection) { +// - label: label for the sections (UserGuide by default). +func StoreUserGuideSectionsInGraphDatabase(sections []sharedtypes.CodeGenerationUserGuideSection, label string) { // Initialize the graph database. neo4j.Initialize(config.GlobalConfig.NEO4J_URI, config.GlobalConfig.NEO4J_USERNAME, config.GlobalConfig.NEO4J_PASSWORD) // Add the elements to the graph database. - neo4j.Neo4j_Driver.AddUserGuideSectionNodes(sections) + neo4j.Neo4j_Driver.AddUserGuideSectionNodes(sections, label) // Add the dependencies to the graph database. neo4j.Neo4j_Driver.CreateUserGuideSectionRelationships(sections) return } + +// CreateGeneralDataExtractionDocumentObjects creates general data extraction document objects from +// the provided document chunks, dense embeddings, and sparse embeddings. +// +// Tags: +// - @displayName: Create General Data Extraction Document Objects +// +// Parameters: +// - documentName: name of the document. +// - documentChunks: chunks of the document. +// - denseEmbeddings: dense embeddings of the document. +// - sparseEmbeddings: sparse embeddings of the document. +// +// Returns: +// - extractionData: general data extraction document objects in interface format. +func CreateGeneralDataExtractionDocumentObjects(documentName string, + documentChunks []string, + denseEmbeddings [][]float32, + sparseEmbeddings []map[uint]float32, +) (extractionData []interface{}) { + extractionDataObjects := []GeneralDataExtractionDocument{} + + // Generate GUIDs for each chunk in advance. + chunkGuids := make([]string, len(documentChunks)) + for j := 0; j < len(documentChunks); j++ { + guid := "d" + strings.ReplaceAll(uuid.New().String(), "-", "") + chunkGuids[j] = guid + } + + // Create vector database objects and assign PreviousChunk and NextChunk. + for j := 0; j < len(documentChunks); j++ { + documentChunkElement := GeneralDataExtractionDocument{ + Guid: chunkGuids[j], // Current chunk's GUID + DocumentName: documentName, + PreviousChunk: "", // Default empty + NextChunk: "", // Default empty + Text: documentChunks[j], + DenseVector: denseEmbeddings[j], + SparseVector: sparseEmbeddings[j], + } + + // Assign PreviousChunk and NextChunk GUIDs. + if j > 0 { + documentChunkElement.PreviousChunk = chunkGuids[j-1] + } + if j < len(documentChunks)-1 { + documentChunkElement.NextChunk = chunkGuids[j+1] + } + + // Add the new vector database object to the list. + extractionDataObjects = append(extractionDataObjects, documentChunkElement) + } + + // Convert []VectorDatabaseElement to []interface{} + extractionData = make([]interface{}, len(extractionDataObjects)) + for i, v := range extractionDataObjects { + extractionData[i] = v + } + + return extractionData +} diff --git a/pkg/externalfunctions/externalfunctions.go b/pkg/externalfunctions/externalfunctions.go index 0430c01..d6536d8 100644 --- a/pkg/externalfunctions/externalfunctions.go +++ b/pkg/externalfunctions/externalfunctions.go @@ -5,6 +5,7 @@ var ExternalFunctionsMap = map[string]interface{}{ "PerformVectorEmbeddingRequest": PerformVectorEmbeddingRequest, "PerformVectorEmbeddingRequestWithTokenLimitCatch": PerformVectorEmbeddingRequestWithTokenLimitCatch, "PerformBatchEmbeddingRequest": PerformBatchEmbeddingRequest, + "PerformBatchHybridEmbeddingRequest": PerformBatchHybridEmbeddingRequest, "PerformKeywordExtractionRequest": PerformKeywordExtractionRequest, "PerformGeneralRequest": PerformGeneralRequest, "PerformGeneralRequestWithImages": PerformGeneralRequestWithImages, @@ -48,16 +49,17 @@ var ExternalFunctionsMap = map[string]interface{}{ "AisPerformLLMFinalRequest": AisPerformLLMFinalRequest, // data extraction - "GetGithubFilesToExtract": GetGithubFilesToExtract, - "GetLocalFilesToExtract": GetLocalFilesToExtract, - "AppendStringSlices": AppendStringSlices, - "DownloadGithubFileContent": DownloadGithubFileContent, - "GetLocalFileContent": GetLocalFileContent, - "GetDocumentType": GetDocumentType, - "LangchainSplitter": LangchainSplitter, - "GenerateDocumentTree": GenerateDocumentTree, - "AddDataRequest": AddDataRequest, - "CreateCollectionRequest": CreateCollectionRequest, + "GetGithubFilesToExtract": GetGithubFilesToExtract, + "GetLocalFilesToExtract": GetLocalFilesToExtract, + "AppendStringSlices": AppendStringSlices, + "DownloadGithubFileContent": DownloadGithubFileContent, + "GetLocalFileContent": GetLocalFileContent, + "GetDocumentType": GetDocumentType, + "LangchainSplitter": LangchainSplitter, + "GenerateDocumentTree": GenerateDocumentTree, + "AddDataRequest": AddDataRequest, + "CreateCollectionRequest": CreateCollectionRequest, + "CreateGeneralDataExtractionDocumentObjects": CreateGeneralDataExtractionDocumentObjects, // generic "AssignStringToString": AssignStringToString, @@ -75,4 +77,8 @@ var ExternalFunctionsMap = map[string]interface{}{ "StoreExamplesInGraphDatabase": StoreExamplesInGraphDatabase, "StoreUserGuideSectionsInVectorDatabase": StoreUserGuideSectionsInVectorDatabase, "StoreUserGuideSectionsInGraphDatabase": StoreUserGuideSectionsInGraphDatabase, + + // milvus + "MilvusCreateCollection": MilvusCreateCollection, + "MilvusInsertData": MilvusInsertData, } diff --git a/pkg/externalfunctions/llmhandler.go b/pkg/externalfunctions/llmhandler.go index 59887c2..b143118 100644 --- a/pkg/externalfunctions/llmhandler.go +++ b/pkg/externalfunctions/llmhandler.go @@ -198,6 +198,50 @@ func PerformBatchEmbeddingRequest(input []string) (embeddedVectors [][]float32) return embedding32Array } +// PerformBatchHybridEmbeddingRequest performs a batch hybrid embedding request to LLM +// returning the sparse and dense embeddings +// +// Tags: +// - @displayName: Batch Hybrid Embeddings +// +// Parameters: +// - input: the input strings +// +// Returns: +// - denseEmbeddings: the dense embeddings in float32 format +// - sparseEmbeddings: the sparse embeddings in map format +func PerformBatchHybridEmbeddingRequest(input []string, maxBatchSize int) (denseEmbeddings [][]float32, sparseEmbeddings []map[uint]float32) { + processedEmbeddings := 0 + + // Process data in batches + for i := 0; i < len(input); i += maxBatchSize { + end := i + maxBatchSize + if end > len(input) { + end = len(input) + } + + // Create a batch of data to send to LLM handler + batchTextToEmbed := input[i:end] + + // Send http request + batchDenseEmbeddings, batchLexicalWeights, err := llmHandlerPerformVectorEmbeddingRequest(batchTextToEmbed, true) + if err != nil { + errMessage := fmt.Sprintf("Error performing batch embedding request: %v", err) + logging.Log.Error(&logging.ContextMap{}, errMessage) + panic(errMessage) + } + + // Add the embeddings to the list + denseEmbeddings = append(denseEmbeddings, batchDenseEmbeddings...) + sparseEmbeddings = append(sparseEmbeddings, batchLexicalWeights...) + + processedEmbeddings += len(batchTextToEmbed) + logging.Log.Infof(&logging.ContextMap{}, "Processed %d embeddings", processedEmbeddings) + } + + return denseEmbeddings, sparseEmbeddings +} + // PerformKeywordExtractionRequest performs a keywords extraction request to LLM // // Tags: diff --git a/pkg/externalfunctions/milvus.go b/pkg/externalfunctions/milvus.go new file mode 100644 index 0000000..e74c23d --- /dev/null +++ b/pkg/externalfunctions/milvus.go @@ -0,0 +1,92 @@ +package externalfunctions + +import ( + "github.com/ansys/allie-flowkit/pkg/privatefunctions/milvus" + "github.com/ansys/allie-sharedtypes/pkg/logging" +) + +// MilvusCreateCollection creates a collection in Milvus +// +// Tags: +// - @displayName: Create Milvus Collection +// +// Params: +// - collectionName (string): The name of the collection +// - schema (map[string]interface{}): The schema of the collection +func MilvusCreateCollection(collectionName string, schema []map[string]interface{}) { + // From schema to field schema + schemaObject := []milvus.SchemaField{} + if len(schema) != 0 { + for _, field := range schema { + schemaField := milvus.SchemaField{ + Name: field["name"].(string), + Type: field["type"].(string), + } + schemaObject = append(schemaObject, schemaField) + } + } else { + // Create default schema + schemaObject = []milvus.SchemaField{ + { + Name: "guid", + Type: "string", + }, + { + Name: "document_name", + Type: "string", + }, + { + Name: "previous_chunk", + Type: "string", + }, + { + Name: "next_chunk", + Type: "string", + }, + { + Name: "dense_vector", + Type: "[]float32", + }, + { + Name: "text", + Type: "string", + }, + } + } + + // Create custom schema + milvusSchema, err := milvus.CreateCustomSchema(collectionName, schemaObject, "") + if err != nil { + errorMessage := "Failed to create schema: " + err.Error() + logging.Log.Errorf(&logging.ContextMap{}, "%s", errorMessage) + panic(errorMessage) + } + + // Create collection + err = milvus.CreateCollection(milvusSchema) + if err != nil { + errorMessage := "Failed to create collection: " + err.Error() + logging.Log.Errorf(&logging.ContextMap{}, "%s", errorMessage) + panic(errorMessage) + } +} + +// MilvusInsertData inserts data into a collection in Milvus +// +// Tags: +// - @displayName: Insert Data into Milvus +// +// Params: +// - collectionName (string): The name of the collection +// - data ([]interface{}): The data to insert +// - idFieldName (string): The name of the field to use as the ID +// - idField (string): The ID field +func MilvusInsertData(collectionName string, data []interface{}, idFieldName string) { + // Insert data + err := milvus.InsertData(collectionName, data, idFieldName, idFieldName) + if err != nil { + errorMessage := "Failed to insert data: " + err.Error() + logging.Log.Errorf(&logging.ContextMap{}, "%s", errorMessage) + panic(errorMessage) + } +} diff --git a/pkg/externalfunctions/types.go b/pkg/externalfunctions/types.go index 43021f2..b26a2af 100644 --- a/pkg/externalfunctions/types.go +++ b/pkg/externalfunctions/types.go @@ -215,3 +215,13 @@ type TokenCountUpdateRequest struct { OutputToken int `json:"output_token"` Plattform string `json:"plattform"` } + +type GeneralDataExtractionDocument struct { + DocumentName string `json:"document_name"` + Guid string `json:"guid"` + PreviousChunk string `json:"previous_chunk"` + NextChunk string `json:"next_chunk"` + DenseVector []float32 `json:"dense_vector"` + SparseVector map[uint]float32 `json:"sparse_vector"` + Text string `json:"text"` +} diff --git a/pkg/privatefunctions/generic/generic.go b/pkg/privatefunctions/generic/generic.go index 0f43603..48bef69 100644 --- a/pkg/privatefunctions/generic/generic.go +++ b/pkg/privatefunctions/generic/generic.go @@ -6,8 +6,11 @@ import ( "fmt" "net/http" "reflect" + "strings" "github.com/ansys/allie-sharedtypes/pkg/logging" + "golang.org/x/text/cases" + "golang.org/x/text/language" ) // CreatePayloadAndSendHttpRequest creates a JSON payload from a request object and sends an HTTP POST request to the specified URL. @@ -91,21 +94,70 @@ func ExtractStringFieldFromStruct(data interface{}, fieldName string) (string, e v = v.Elem() } - // Ensure it's a struct - if v.Kind() != reflect.Struct { - return "", fmt.Errorf("expected struct but got %T", data) + // Ensure it's a struct or map[string]interface{} + if v.Kind() == reflect.Struct { + // Get field by name + field := v.FieldByName(fieldName) + if !field.IsValid() { + return "", fmt.Errorf("field '%s' not found", fieldName) + } + + // Ensure field is a string + if field.Kind() != reflect.String { + return "", fmt.Errorf("field '%s' is not a string", fieldName) + } + + return field.String(), nil + } else { + // If it's a map extract the field + field := v.MapIndex(reflect.ValueOf(fieldName)) + if !field.IsValid() { + return "", fmt.Errorf("field '%s' not found", fieldName) + } + + fieldValue := field.Interface() + + // Check if the field is of type string + strVal, ok := fieldValue.(string) + if !ok { + return "", fmt.Errorf("field '%s' is not a string", fieldName) + } + + return strVal, nil } +} - // Get field by name - field := v.FieldByName(fieldName) - if !field.IsValid() { - return "", fmt.Errorf("field '%s' not found", fieldName) +// SnakeToCamel converts a snake_case string to camelCase or PascalCase based on upperFirst flag +// +// Parameters: +// - s: the snake_case string to convert. +// - upperFirst: a flag to determine if the first letter should be capitalized. +// +// Returns: +// - the camelCase or PascalCase string. +func SnakeToCamel(s string, upperFirst bool) string { + parts := strings.Split(s, "_") + if len(parts) == 0 { + return s } - // Ensure field is a string - if field.Kind() != reflect.String { - return "", fmt.Errorf("field '%s' is not a string", fieldName) + // Use proper Unicode-aware title casing + titleCaser := cases.Title(language.English) + + // Process the first part based on upperFirst flag + var result string + if upperFirst { + result = titleCaser.String(parts[0]) // PascalCase: Capitalize first letter + } else { + result = strings.ToLower(parts[0]) // camelCase: Keep lowercase for first word + } + + // Capitalize the first letter of subsequent parts + for _, part := range parts[1:] { + if len(part) > 0 { + result += titleCaser.String(part) // Capitalize each word properly + } } - return field.String(), nil + return result } diff --git a/pkg/privatefunctions/milvus/milvus.go b/pkg/privatefunctions/milvus/milvus.go index 0466e2a..663f769 100644 --- a/pkg/privatefunctions/milvus/milvus.go +++ b/pkg/privatefunctions/milvus/milvus.go @@ -204,11 +204,10 @@ func loadCollection(collectionName string, milvusClient client.Client) (funcErro // // Parameters: // - schema: Schema of the collection to be created. -// - milvusClient: Milvus client. // // Returns: // - error: Error if any issue occurs during creating the collection. -func CreateCollection(schema *entity.Schema, milvusClient client.Client) (funcError error) { +func CreateCollection(schema *entity.Schema) (funcError error) { defer func() { r := recover() if r != nil { @@ -217,6 +216,13 @@ func CreateCollection(schema *entity.Schema, milvusClient client.Client) (funcEr return } }() + // Create Milvus client + milvusClient, err := newClient() + if err != nil { + logging.Log.Errorf(&logging.ContextMap{}, "error during NewMilvusClient: %s", err.Error()) + return err + } + // Check if the collection already exists hasColl, err := milvusClient.HasCollection( context.Background(), // ctx @@ -229,6 +235,11 @@ func CreateCollection(schema *entity.Schema, milvusClient client.Client) (funcEr if hasColl { logging.Log.Infof(&logging.ContextMap{}, "Collection already exists: %s\n", schema.CollectionName) + // Load the collection + err = loadCollection(schema.CollectionName, milvusClient) + if err != nil { + logging.Log.Errorf(&logging.ContextMap{}, "Error during LoadCollection: %v", err) + } return nil } @@ -368,21 +379,23 @@ func CreateIndexes(collectionName string, milvusClient client.Client, guidFieldN } // Create a vector index for the sparseVectorFieldName field - sparseIdx, err := entity.NewIndexSparseInverted(entity.IP, 0) - if err != nil { - logging.Log.Errorf(&logging.ContextMap{}, "failed to create index %v", err.Error()) - return err - } - err = milvusClient.CreateIndex( - context.Background(), - collectionName, - sparseVectorFieldName, - sparseIdx, - false, - ) - if err != nil { - logging.Log.Errorf(&logging.ContextMap{}, "failed to create index %v", err.Error()) - return err + if sparseVectorFieldName != "" { + sparseIdx, err := entity.NewIndexSparseInverted(entity.IP, 0) + if err != nil { + logging.Log.Errorf(&logging.ContextMap{}, "failed to create index %v", err.Error()) + return err + } + err = milvusClient.CreateIndex( + context.Background(), + collectionName, + sparseVectorFieldName, + sparseIdx, + false, + ) + if err != nil { + logging.Log.Errorf(&logging.ContextMap{}, "failed to create index %v", err.Error()) + return err + } } return nil @@ -416,8 +429,10 @@ func CreateCustomSchema(collectionName string, fields []SchemaField, description fieldSchema.DataType = entity.FieldTypeJSON case "[]float32": fieldSchema.DataType = entity.FieldTypeFloatVector + field.Dimension = config.GlobalConfig.EMBEDDINGS_DIMENSIONS case "map[uint]float32": fieldSchema.DataType = entity.FieldTypeSparseVector + field.Dimension = config.GlobalConfig.EMBEDDINGS_DIMENSIONS case "[]bool": fieldSchema.DataType = entity.FieldTypeBinaryVector default: diff --git a/pkg/privatefunctions/neo4j/neo4j.go b/pkg/privatefunctions/neo4j/neo4j.go index ecee7f5..9fbe84a 100644 --- a/pkg/privatefunctions/neo4j/neo4j.go +++ b/pkg/privatefunctions/neo4j/neo4j.go @@ -256,10 +256,11 @@ func (neo4j_context *neo4j_Context) AddCodeGenerationExampleNodes(nodes []shared // // Parameters: // - nodes: List of nodes to be added. +// - label: Label for the nodes. // // Returns: // - funcError: Error object. -func (neo4j_context *neo4j_Context) AddUserGuideSectionNodes(nodes []sharedtypes.CodeGenerationUserGuideSection) (funcError error) { +func (neo4j_context *neo4j_Context) AddUserGuideSectionNodes(nodes []sharedtypes.CodeGenerationUserGuideSection, label string) (funcError error) { defer func() { r := recover() if r != nil { @@ -281,6 +282,9 @@ func (neo4j_context *neo4j_Context) AddUserGuideSectionNodes(nodes []sharedtypes for _, node := range nodes { // Convert the node object to a map nodeType := "UserGuide" + if label != "" { + nodeType = label + } nodeName := node.Name nodeMap := make(map[string]any) nodeJSON, err := json.Marshal(node) // Convert struct to JSON @@ -558,7 +562,7 @@ func (neo4j_context *neo4j_Context) CreateUserGuideSectionRelationships(nodes [] // Check if reference link references a document and create relationship _, err = transaction.Run(db_ctx, - "MATCH (a {Name: $a}) MATCH (b:UserGuide {document_name: $b}) WITH a, b ORDER BY b.level ASC LIMIT 1 MERGE (a)-[:REFERENCES]->(b)", + "MATCH (a {Name: $a}) MATCH (b {document_name: $b}) WITH a, b ORDER BY b.level ASC LIMIT 1 MERGE (a)-[:REFERENCES]->(b)", map[string]any{ "a": node.Name, "b": referenceLink,