diff --git a/CosmosDbDataMigrationTool.sln b/CosmosDbDataMigrationTool.sln index 4ed8f39..10126ef 100644 --- a/CosmosDbDataMigrationTool.sln +++ b/CosmosDbDataMigrationTool.sln @@ -103,6 +103,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "PostgreSQL", "PostgreSQL", Extensions\PostgreSQL\README.md = Extensions\PostgreSQL\README.md EndProjectSection EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.MongoExtension", "Extensions\Mongo\Cosmos.DataTransfer.MongoExtension\Cosmos.DataTransfer.MongoExtension.csproj", "{31BC84E1-55E5-45AA-BFAC-90732F20588B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -193,6 +195,10 @@ Global {85820167-DB94-458B-B09B-9E823996C692}.Debug|Any CPU.Build.0 = Debug|Any CPU {85820167-DB94-458B-B09B-9E823996C692}.Release|Any CPU.ActiveCfg = Release|Any CPU {85820167-DB94-458B-B09B-9E823996C692}.Release|Any CPU.Build.0 = Release|Any CPU + {31BC84E1-55E5-45AA-BFAC-90732F20588B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {31BC84E1-55E5-45AA-BFAC-90732F20588B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {31BC84E1-55E5-45AA-BFAC-90732F20588B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {31BC84E1-55E5-45AA-BFAC-90732F20588B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -226,6 +232,7 @@ Global {40AD8890-BD78-48F5-AE76-2C2FC6F15B7E} = {39930280-DA29-4814-837B-FA7F252EB3EC} {85820167-DB94-458B-B09B-9E823996C692} = {1B927C5F-50FC-42A6-BAF6-B00E6D760543} {1B927C5F-50FC-42A6-BAF6-B00E6D760543} = {A8A1CEAB-2D82-460C-9B86-74ABD17CD201} + {31BC84E1-55E5-45AA-BFAC-90732F20588B} = {F18E789A-D32D-48D3-B75F-1196D7215F74} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {662B3F27-70D8-45E6-A1C0-1438A9C8A542} diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Context.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Context.cs similarity index 96% rename from Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Context.cs rename to Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Context.cs index 715f07c..2271d46 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Context.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Context.cs @@ -2,7 +2,7 @@ using MongoDB.Driver; using MongoDB.Driver.Core.Events; -namespace Cosmos.DataTransfer.MongoVectorExtension; +namespace Cosmos.DataTransfer.MongoExtension; public class Context { private readonly IMongoDatabase database = null!; diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Cosmos.DataTransfer.MongoExtension.csproj b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Cosmos.DataTransfer.MongoExtension.csproj new file mode 100644 index 0000000..f2e3cac --- /dev/null +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Cosmos.DataTransfer.MongoExtension.csproj @@ -0,0 +1,24 @@ + + + + net6.0 + enable + enable + Exe + + + + + + + + + + + + + + + + + diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/IRepository.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/IRepository.cs similarity index 90% rename from Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/IRepository.cs rename to Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/IRepository.cs index 03ea5aa..302c17b 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/IRepository.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/IRepository.cs @@ -1,6 +1,6 @@ using System.Linq.Expressions; -namespace Cosmos.DataTransfer.MongoVectorExtension; +namespace Cosmos.DataTransfer.MongoExtension; public interface IRepository { diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoDataItem.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataItem.cs similarity index 90% rename from Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoDataItem.cs rename to Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataItem.cs index 8bec99b..1f19a4b 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoDataItem.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataItem.cs @@ -1,7 +1,7 @@ using Cosmos.DataTransfer.Interfaces; using MongoDB.Bson; -namespace Cosmos.DataTransfer.MongoVectorExtension; +namespace Cosmos.DataTransfer.MongoExtension; public class MongoDataItem : IDataItem { private readonly BsonDocument record; diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataSinkExtension.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataSinkExtension.cs new file mode 100644 index 0000000..c7beeb3 --- /dev/null +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataSinkExtension.cs @@ -0,0 +1,58 @@ +using System.ComponentModel.Composition; +using Cosmos.DataTransfer.Interfaces; +using Cosmos.DataTransfer.MongoExtension.Settings; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Logging; +using MongoDB.Bson; + +namespace Cosmos.DataTransfer.MongoExtension; +[Export(typeof(IDataSinkExtension))] +public class MongoDataSinkExtension : IDataSinkExtensionWithSettings +{ + public string DisplayName => "MongoDB"; + + public async Task WriteAsync(IAsyncEnumerable dataItems, IConfiguration config, IDataSourceExtension dataSource, ILogger logger, CancellationToken cancellationToken = default) + { + var settings = config.Get(); + settings.Validate(); + + if (!string.IsNullOrEmpty(settings.ConnectionString) && !string.IsNullOrEmpty(settings.DatabaseName) && !string.IsNullOrEmpty(settings.Collection)) + { + var context = new Context(settings.ConnectionString, settings.DatabaseName); + var repo = context.GetRepository(settings.Collection); + + var batchSize = settings.BatchSize ?? 1000; + + var objects = new List(); + int itemCount = 0; + await foreach (var item in dataItems.WithCancellation(cancellationToken)) + { + var dict = item.BuildDynamicObjectTree(); + objects.Add(new BsonDocument(dict)); + itemCount++; + + if (objects.Count == batchSize) + { + await repo.AddRange(objects); + logger.LogInformation("Added {ItemCount} items to collection '{Collection}'", itemCount, settings.Collection); + objects.Clear(); + } + } + + if (objects.Any()) + { + await repo.AddRange(objects); + } + + if (itemCount > 0) + logger.LogInformation("Added {ItemCount} total items to collection '{Collection}'", itemCount, settings.Collection); + else + logger.LogWarning("No items added to collection '{Collection}'", settings.Collection); + } + } + + public IEnumerable GetSettings() + { + yield return new MongoSinkSettings(); + } +} diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSourceExtension.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataSourceExtension.cs similarity index 88% rename from Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSourceExtension.cs rename to Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataSourceExtension.cs index 4c844bc..5768800 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSourceExtension.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataSourceExtension.cs @@ -1,16 +1,16 @@ using System.ComponentModel.Composition; using System.Runtime.CompilerServices; using Cosmos.DataTransfer.Interfaces; -using Cosmos.DataTransfer.MongoVectorExtension.Settings; +using Cosmos.DataTransfer.MongoExtension.Settings; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; using MongoDB.Bson; -namespace Cosmos.DataTransfer.MongoVectorExtension; +namespace Cosmos.DataTransfer.MongoExtension; [Export(typeof(IDataSourceExtension))] -internal class MongoVectorDataSourceExtension : IDataSourceExtensionWithSettings +internal class MongoDataSourceExtension : IDataSourceExtensionWithSettings { - public string DisplayName => $"MongoDB-Vector{ExtensionExtensions.BetaExtensionTag}"; + public string DisplayName => "MongoDB"; public async IAsyncEnumerable ReadAsync(IConfiguration config, ILogger logger, [EnumeratorCancellation] CancellationToken cancellationToken = default) { diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoRepository.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoRepository.cs similarity index 95% rename from Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoRepository.cs rename to Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoRepository.cs index f104853..ec2f662 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoRepository.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoRepository.cs @@ -1,7 +1,7 @@ using System.Linq.Expressions; using MongoDB.Driver; -namespace Cosmos.DataTransfer.MongoVectorExtension; +namespace Cosmos.DataTransfer.MongoExtension; public class MongoRepository : IRepository { diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Program.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Program.cs new file mode 100644 index 0000000..90fe8a7 --- /dev/null +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Program.cs @@ -0,0 +1 @@ +Console.WriteLine("Starting Mongo extension"); diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Properties/PublishProfiles/PublishToExtensionsFolder.pubxml b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Properties/PublishProfiles/PublishToExtensionsFolder.pubxml new file mode 100644 index 0000000..789090b --- /dev/null +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Properties/PublishProfiles/PublishToExtensionsFolder.pubxml @@ -0,0 +1,24 @@ + + + + + Debug + Any CPU + ..\..\..\Core\Cosmos.DataTransfer.Core\bin\Debug\net6.0\Extensions + FileSystem + <_TargetId>Folder + net6.0 + false + + + Release + Any CPU + ..\..\..\Core\Cosmos.DataTransfer.Core\bin\Release\net6.0\Extensions + FileSystem + <_TargetId>Folder + net6.0 + false + + \ No newline at end of file diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoBaseSettings.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoBaseSettings.cs similarity index 84% rename from Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoBaseSettings.cs rename to Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoBaseSettings.cs index 0e9af71..1c60153 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoBaseSettings.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoBaseSettings.cs @@ -2,7 +2,7 @@ using Cosmos.DataTransfer.Interfaces; using Cosmos.DataTransfer.Interfaces.Manifest; -namespace Cosmos.DataTransfer.MongoVectorExtension.Settings; +namespace Cosmos.DataTransfer.MongoExtension.Settings; public class MongoBaseSettings : IDataExtensionSettings { [Required] diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoSinkSettings.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoSinkSettings.cs new file mode 100644 index 0000000..3c64b1b --- /dev/null +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoSinkSettings.cs @@ -0,0 +1,10 @@ +using System.ComponentModel.DataAnnotations; + +namespace Cosmos.DataTransfer.MongoExtension.Settings; +public class MongoSinkSettings : MongoBaseSettings +{ + [Required] + public string? Collection { get; set; } + + public int? BatchSize { get; set; } +} diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoSourceSettings.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoSourceSettings.cs similarity index 61% rename from Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoSourceSettings.cs rename to Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoSourceSettings.cs index 0cbbf36..95a9828 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoSourceSettings.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoSourceSettings.cs @@ -1,4 +1,4 @@ -namespace Cosmos.DataTransfer.MongoVectorExtension.Settings; +namespace Cosmos.DataTransfer.MongoExtension.Settings; public class MongoSourceSettings : MongoBaseSettings { public string? Collection { get; set; } diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Cosmos.DataTransfer.MongoVectorExtension.csproj b/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Cosmos.DataTransfer.MongoVectorExtension.csproj index 37735d2..9ef48ac 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Cosmos.DataTransfer.MongoVectorExtension.csproj +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Cosmos.DataTransfer.MongoVectorExtension.csproj @@ -16,6 +16,7 @@ + diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSinkExtension.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSinkExtension.cs index fd4638c..c9d671f 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSinkExtension.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSinkExtension.cs @@ -2,6 +2,7 @@ using Azure; using Azure.AI.OpenAI; using Cosmos.DataTransfer.Interfaces; +using Cosmos.DataTransfer.MongoExtension; using Cosmos.DataTransfer.MongoVectorExtension.Settings; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; diff --git a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoVectorSinkSettings.cs b/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoVectorSinkSettings.cs index 6e6e53f..4148263 100644 --- a/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoVectorSinkSettings.cs +++ b/Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoVectorSinkSettings.cs @@ -1,4 +1,5 @@ using System.ComponentModel.DataAnnotations; +using Cosmos.DataTransfer.MongoExtension.Settings; namespace Cosmos.DataTransfer.MongoVectorExtension.Settings; public class MongoVectorSinkSettings : MongoBaseSettings diff --git a/Extensions/Mongo/README.md b/Extensions/Mongo/README.md index 9d5952c..f491f2e 100644 --- a/Extensions/Mongo/README.md +++ b/Extensions/Mongo/README.md @@ -20,6 +20,28 @@ Source and sink settings require both `ConnectionString` and `DatabaseName` para ### Sink +```json +{ + "ConnectionString": "", + "DatabaseName: "", + "Collection": "" +} +``` + +# MongoDB Vector Extension (Beta) + +The MongoDB Vector extension is a Sink only extension that builds on the MongoDB extension by providing additional capabilities for generating embeddings using Azure OpenAI APIs. + +> **Note**: When specifying the MongoDB Vector extension as the Sink property in configuration, utilize the name **MongoDB-Vector(beta)**. + +## Settings + +The settings are based on the MongoDB extension settings with additional parameters for generating embeddings. + +### Additional Sink Settings + +The sink settings require the following additional parameters: + - `GenerateEmbedding`: If set to true, the sink will generate embeddings for the records before writing them to the database. The sink requires the `OpenAIUrl`, `OpenAIKey`, and `OpenAIDeploymentModel` parameters to be set. Following paramaters are required if this is true - `OpenAIUrl`: The URL of the OpenAI API - `OpenAIKey`: The API key for the OpenAI API @@ -41,3 +63,4 @@ Source and sink settings require both `ConnectionString` and `DatabaseName` para "DestPropEmbedding": "" } ``` +