diff --git a/tools/blobstorage-backupdata/BackupJob.cs b/tools/blobstorage-backupdata/BackupJob.cs index 5905c10..44c5dbc 100644 --- a/tools/blobstorage-backupdata/BackupJob.cs +++ b/tools/blobstorage-backupdata/BackupJob.cs @@ -13,7 +13,8 @@ namespace BackupData; internal sealed class BackupJob { - private readonly BlobContainerClient BlobContainerClient; + private readonly BlobContainerClient SnapshotBlobContainerClient; + private readonly BlobContainerClient DefinitionBlobContainerClient; private readonly IMongoClient MongoClient; private readonly DateTime Now; private readonly ILogger Logger; @@ -25,9 +26,10 @@ internal sealed class BackupJob private const string UpdatedFieldName = "_meta.updated"; private const string MetaFieldName = "_meta"; - public BackupJob(BlobContainerClient blobContainerClient, IMongoClient mongoClient, DateTime now, ILoggerFactory loggerFactory, IFilterRenderer filterRenderer) + public BackupJob(BlobContainerClient snapshotBlobContainerClient, BlobContainerClient definitionBlobContainerClient, IMongoClient mongoClient, DateTime now, ILoggerFactory loggerFactory, IFilterRenderer filterRenderer) { - BlobContainerClient = blobContainerClient; + SnapshotBlobContainerClient = snapshotBlobContainerClient; + DefinitionBlobContainerClient = definitionBlobContainerClient; MongoClient = mongoClient; Now = now; Logger = loggerFactory.CreateLogger(nameof(BackupJob)); @@ -109,7 +111,15 @@ await Parallel.ForEachAsync(cursor.Current, async (document, _) => { throw new Exception("Blob name is null or empty."); } - await UploadString(jObject.ToString(), blobName); + var definitionCoordinates = jObject.ConstructBlobUrl(); + if (string.IsNullOrWhiteSpace(definitionCoordinates)) + { + throw new Exception("Definition blob name is null or empty."); + } + // Get the data from the definition blob store + string definitionBlobData = await GetDefinition(definitionCoordinates); + + await UploadString(definitionBlobData, blobName); AddChangesToIndex(changesIndex, jObject, blobName); } catch (Exception e) @@ -163,13 +173,32 @@ await UploadString( "changes/index"); } + private async Task ReadFromBlob( + BlobContainerClient blobContainerClient, string blobName) + { + try + { + var blobClient = blobContainerClient.GetBlobClient(blobName); + var response = await blobClient.DownloadContentAsync(); + var content = response.Value.Content.ToString(); + Logger.LogInformation("Content of the blob: {blobName}, content: {content}", blobName, content); + + return content; + } + catch (Exception e) + { + Logger.LogError("Failed to read from definition blob: {blobName}, error message: {exceptionMessage}", blobName, e.Message); + throw; + } + } + private async Task UploadString( string blobContent, string blobName) { try { - var blobClient = BlobContainerClient.GetBlobClient(blobName); + var blobClient = SnapshotBlobContainerClient.GetBlobClient(blobName); using var stream = new MemoryStream(Encoding.UTF8.GetBytes(blobContent)); await blobClient.UploadAsync(stream, overwrite: true); } @@ -180,13 +209,26 @@ private async Task UploadString( } } + internal async Task GetDefinition(string coordinatePath) + { + try + { + var data = await ReadFromBlob(DefinitionBlobContainerClient, coordinatePath); + return data; + } + catch (Exception e) + { + Logger.LogError("Failed to get definition from blob storage, exception: {exceptionMessage}", e.Message); + throw; + } + } + internal async Task GetIndex() { try { - var blobClient = BlobContainerClient.GetBlobClient("changes/index"); - return (await blobClient.DownloadContentAsync()).Value.Content.ToString() - .Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries); + var data = await ReadFromBlob(SnapshotBlobContainerClient, "changes/index"); + return data.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries); } catch (Exception e) { diff --git a/tools/blobstorage-backupdata/BackupJobTests.cs b/tools/blobstorage-backupdata/BackupJobTests.cs index b77bb94..4aab77d 100644 --- a/tools/blobstorage-backupdata/BackupJobTests.cs +++ b/tools/blobstorage-backupdata/BackupJobTests.cs @@ -37,6 +37,14 @@ public override string ToString() { ["updated"] = Updated.ToString("yyyy-MM-ddTHH:mm:ssZ") } + ["coordinates"] = new JObject + { + ["type"] = "type", + ["provider"] = "provider", + ["namespace"] = "-", + ["name"] = "name", + ["revision"] = "revision" + } }; return jsonObject.ToString(); } @@ -79,9 +87,10 @@ public void Initialize(List> documents) _documents = documents; _index = -1; } - public IEnumerable Current + public IEnumerable Current { - get { + get + { lock (_lock) { return _documents[_index]; @@ -116,10 +125,11 @@ public string RenderFilter(FilterDefinition filter, IMongoCollecti [TestFixture] public class BackupJobTests { - private Mock mockBlobContainerClient = new(); + private Mock mockSnapshotBlobContainerClient = new(); + private Mock mockDefinitionBlobContainerClient = new(); private Mock mockMongoClient = new(); private MongoCursorWrapper mockCursor = new(); - + ILoggerFactory loggerFactory = LoggerFactory.Create(builder => { builder @@ -133,9 +143,9 @@ public class BackupJobTests private DateTime parseStringUTC(string date) { return DateTime.SpecifyKind( - DateTime.ParseExact(date, "yyyy-MM-ddTHH:mm:ssZ", null, DateTimeStyles.AdjustToUniversal), + DateTime.ParseExact(date, "yyyy-MM-ddTHH:mm:ssZ", null, DateTimeStyles.AdjustToUniversal), DateTimeKind.Utc); - + } private void SetupMockBlobClient(Mock mockBlobContainerClient, Dictionary data) { @@ -144,16 +154,18 @@ private void SetupMockBlobClient(Mock mockBlobContainerClie var mockBlobClient = new Mock(); mockBlobContainerClient.Setup(x => x.GetBlobClient(key)).Returns(mockBlobClient.Object); // mock download - mockBlobClient.Setup(x => x.DownloadContentAsync()).ReturnsAsync(() => { - var mockBlobDownloadResult = BlobsModelFactory.BlobDownloadResult(BinaryData.FromString(data[key]!)); - Response response = Response.FromValue(mockBlobDownloadResult, new Mock().Object); - return response; + mockBlobClient.Setup(x => x.DownloadContentAsync()).ReturnsAsync(() => + { + var mockBlobDownloadResult = BlobsModelFactory.BlobDownloadResult(BinaryData.FromString(data[key]!)); + Response response = Response.FromValue(mockBlobDownloadResult, new Mock().Object); + return response; }); // mock upload var tcs = new TaskCompletionSource>(); tcs.SetResult(new Mock>().Object); mockBlobClient.Setup(x => x.UploadAsync(It.IsAny(), true, It.IsAny())) - .Callback((stream, overwrite, token) => { + .Callback((stream, overwrite, token) => + { var reader = new StreamReader(stream); var content = reader.ReadToEnd(); data[key] = content; @@ -161,26 +173,29 @@ private void SetupMockBlobClient(Mock mockBlobContainerClie .Returns(tcs.Task); } } - + [SetUp] public void SetUp() { - mockBlobContainerClient = new Mock(); + mockSnapshotBlobContainerClient = new Mock(); + mockDefinitionBlobContainerClient = new Mock(); mockMongoClient = new Mock(); mockCursor = new MongoCursorWrapper(); var mockDatabase = new Mock(); var mockMongoCollection = new Mock>(); - mockMongoClient.Setup(x => x.GetDatabase("clearlydefined", null)).Returns(mockDatabase.Object); mockDatabase.Setup(x => x.GetCollection("definitions-trimmed", null)).Returns(mockMongoCollection.Object); mockMongoCollection .Setup(x => x.FindAsync(It.IsAny>(), It.IsAny>(), default)) - .Callback((FilterDefinition filter, FindOptions options, CancellationToken token) => { + .Callback((FilterDefinition filter, FindOptions options, CancellationToken token) => + { string dateFilter1 = "0001-01-01T00:00:00Z"; string dateFilter2 = "2999-12-31T23:59:59Z"; - foreach (var f in filter.Render(BsonSerializer.SerializerRegistry.GetSerializer(), BsonSerializer.SerializerRegistry).Elements) { - if (f.Name == "_meta.updated") { + foreach (var f in filter.Render(BsonSerializer.SerializerRegistry.GetSerializer(), BsonSerializer.SerializerRegistry).Elements) + { + if (f.Name == "_meta.updated") + { dateFilter1 = f.Value["$gte"].ToString()!; dateFilter2 = f.Value["$lt"].ToString()!; break; @@ -212,22 +227,38 @@ public void SetUp() [Test] public void TestGetIndex() { - var backupJob = new BackupJob(mockBlobContainerClient.Object, mockMongoClient.Object, DateTime.UtcNow, loggerFactory, new MockFilterRenderer()); - var changesets = new string[]{"2021-01-01-00", "2021-01-01-01", "2021-01-01-02"}; + var backupJob = new BackupJob(mockSnapshotBlobContainerClient.Object, mockDefinitionBlobContainerClient.Object, mockMongoClient.Object, DateTime.UtcNow, loggerFactory, new MockFilterRenderer()); + var changesets = new string[] { "2021-01-01-00", "2021-01-01-01", "2021-01-01-02" }; var binaryData = string.Join("\n", changesets); var data = new Dictionary { {"changes/index", binaryData} }; - SetupMockBlobClient(mockBlobContainerClient, data); + SetupMockBlobClient(mockSnapshotBlobContainerClient, data); var result = backupJob.GetIndex().Result; Assert.AreEqual(changesets, result); } + + [Test] + public void TestGetDefinitionData() + { + var backupJob = new BackupJob(mockSnapshotBlobContainerClient.Object, mockDefinitionBlobContainerClient.Object, mockMongoClient.Object, DateTime.UtcNow, loggerFactory, new MockFilterRenderer()); + var data = new Dictionary + { + {"type/provider/-/name/revision/revision.json", """{"licensed": { "toolScore": { "total": 17, "declared": 0, "discovered": 2, "consistency": 0, "spdx": 0, "texts": 15, "declared": "MIT" }}}"""} + }; + + SetupMockBlobClient(mockDefinitionBlobContainerClient, data); + + var result = backupJob.GetDefinition("type/provider/-/name/revision/revision.json").Result; + Assert.AreEqual(data["type/provider/-/name/revision/revision.json"], result); + } + [Test] public void TestSaveData_HappyCase() { - var backupJob = new BackupJob(mockBlobContainerClient.Object, mockMongoClient.Object, DateTime.UtcNow, loggerFactory, new MockFilterRenderer()); + var backupJob = new BackupJob(mockSnapshotBlobContainerClient.Object, mockDefinitionBlobContainerClient.Object, mockMongoClient.Object, DateTime.UtcNow, loggerFactory, new MockFilterRenderer()); // before start, data is empty // index can't be null because it must be read var data = new Dictionary @@ -241,22 +272,35 @@ public void TestSaveData_HappyCase() {"type/provider/-/name/3.json", null}, {"type/provider/-/name/4.json", null}, }; + SetupMockBlobClient(mockSnapshotBlobContainerClient, data); + + var definitionData = new Dictionary + { + {"type/provider/-/name/revision/1.json", """{"licensed": { "toolScore": { "total": 17, "declared": 0, "discovered": 2, "consistency": 0, "spdx": 0, "texts": 15, "declared": "MIT" }}}"""}, + {"type/provider/-/name/revision/2.json", """{"licensed": { "toolScore": { "total": 34, "declared": 0, "discovered": 2, "consistency": 0, "spdx": 0, "texts": 15, "declared": "Apache 2.0" }}}"""}, + {"type/provider/-/name/revision/3.json", """{"licensed": { "toolScore": { "total": 78, "declared": 0, "discovered": 2, "consistency": 0, "spdx": 0, "texts": 15, "declared": "MIT" }}}"""}, + {"type/provider/-/name/revision/4.json", """{"licensed": { "toolScore": { "total": 12, "declared": 0, "discovered": 2, "consistency": 0, "spdx": 0, "texts": 15, "declared": "Apache 2.0" }}}"""}, + }; + SetupMockBlobClient(mockDefinitionBlobContainerClient, definitionData); + + // Format the data to be used in the test + string[] definitions = definitionData + .Select(x => x.Value!.ToString()) + .ToArray(); // these are the definitions that database returns var bsonDefinitions = new List() { - """{"_id": "type/provider/-/name/1", "_meta": {"updated": "2023-01-01T00:00:00Z"}}""", - """{"_id": "type/provider/-/name/2", "_meta": {"updated": "2023-01-01T01:00:00Z"}}""", - """{"_id": "type/provider/-/name/3", "_meta": {"updated": "2023-01-02T00:00:00Z"}}""", - """{"_id": "type/provider/-/name/4", "_meta": {"updated": "2023-01-02T00:05:00Z"}}""", + """{"_id": "type/provider/-/name/1", "_meta": {"updated": "2023-01-01T00:00:00Z"}, "coordinates": {"type": "type", "provider": "provider", "namespace": "-", "name": "name", "revision": "1"}}""", + """{"_id": "type/provider/-/name/2", "_meta": {"updated": "2023-01-01T01:00:00Z"}, "coordinates": {"type": "type", "provider": "provider", "namespace": "-", "name": "name", "revision": "2"}}""", + """{"_id": "type/provider/-/name/3", "_meta": {"updated": "2023-01-02T00:00:00Z"}, "coordinates": {"type": "type", "provider": "provider", "namespace": "-", "name": "name", "revision": "3"}}""", + """{"_id": "type/provider/-/name/4", "_meta": {"updated": "2023-01-02T00:05:00Z"}, "coordinates": {"type": "type", "provider": "provider", "namespace": "-", "name": "name", "revision": "4"}}""", }.Select(x => BsonDocument.Parse(x)).ToList(); - SetupMockBlobClient(mockBlobContainerClient, data); + mockCursor.Initialize(new List>() { bsonDefinitions }); - mockCursor.Initialize(new List>(){bsonDefinitions}); - // run the test backupJob.ProcessJob().Wait(); - + // index should be replaced with new values data[indexPath]?.ToString().Should().Be("2023-01-01-00\n2023-01-01-01\n2023-01-02-00"); @@ -266,29 +310,40 @@ public void TestSaveData_HappyCase() // because of concurrency, we can't guarantee the order of changesets data["changes/2023-01-02-00"]?.Split("\n") .ToHashSet() - .Should().BeEquivalentTo(new HashSet() {"type/provider/-/name/3.json", "type/provider/-/name/4.json"}); + .Should().BeEquivalentTo(new HashSet() { "type/provider/-/name/3.json", "type/provider/-/name/4.json" }); // definitions should be uploaded data.Where(x => x.Key.StartsWith("type/provider/-/name/")) - .Select(x => BsonDocument.Parse(x.Value)) - .Should().BeEquivalentTo(bsonDefinitions).And.HaveCount(4); + .Select(x => x.Value) + .Should().BeEquivalentTo(definitions).And.HaveCount(4); } - + [Test] - public void TestSaveData_ShouldExcludeCurrentHour() { + public void TestSaveData_ShouldExcludeCurrentHour() + { var now = DateTime.Parse("2023-01-01T01:03:00Z"); - var backupJob = new BackupJob(mockBlobContainerClient.Object, mockMongoClient.Object, now, loggerFactory, new MockFilterRenderer()); + var backupJob = new BackupJob(mockSnapshotBlobContainerClient.Object, mockDefinitionBlobContainerClient.Object, mockMongoClient.Object, now, loggerFactory, new MockFilterRenderer()); var data = new Dictionary { {indexPath, "2022-12-31-23"}, {"changes/2023-01-01-00", null}, {"type/provider/-/name/1.json", null}, }; - SetupMockBlobClient(mockBlobContainerClient, data); + SetupMockBlobClient(mockSnapshotBlobContainerClient, data); + + var definitionData = new Dictionary + { + {"type/provider/-/name/revision/1.json", """{"licensed": { "toolScore": { "total": 17, "declared": 0, "discovered": 2, "consistency": 0, "spdx": 0, "texts": 15, "declared": "MIT" }}}"""}, + }; + SetupMockBlobClient(mockDefinitionBlobContainerClient, definitionData); + + string[] definitions = definitionData + .Select(x => x.Value!.ToString()) + .ToArray(); var bsonDefinitions = new List>() { - new List() {"""{"_id": "type/provider/-/name/1", "_meta": {"updated": "2023-01-01T00:00:00Z"}}""",}, - new List() {"""{"_id": "type/provider/-/name/2", "_meta": {"updated": "2023-01-01T01:01:00Z"}}""",}, + new List() {"""{"_id": "type/provider/-/name/1", "_meta": {"updated": "2023-01-01T00:00:00Z"}, "coordinates": {"type": "type", "provider": "provider", "namespace": "-", "name": "name", "revision": "1"}}""",}, + new List() {"""{"_id": "type/provider/-/name/2", "_meta": {"updated": "2023-01-01T01:01:00Z"}, "coordinates": {"type": "type", "provider": "provider", "namespace": "-", "name": "name", "revision": "2"}}""",}, }.Select(x => x.Select(x => BsonDocument.Parse(x)).ToList()).ToList(); mockCursor.Initialize(bsonDefinitions); backupJob.ProcessJob().Wait(); @@ -296,11 +351,53 @@ public void TestSaveData_ShouldExcludeCurrentHour() { data[indexPath]?.ToString().Should().Be("2022-12-31-23\n2023-01-01-00"); data["changes/2023-01-01-00"]?.Split("\n") .ToHashSet() - .Should().Contain(new HashSet(){"type/provider/-/name/1.json"}) - .And.NotContain(new HashSet(){"type/provider/-/name/2.json"}); + .Should().Contain(new HashSet() { "type/provider/-/name/1.json" }) + .And.NotContain(new HashSet() { "type/provider/-/name/2.json" }); data.Where(x => x.Key.StartsWith("type/provider/-/name/")) - .Select(x => BsonDocument.Parse(x.Value)) - .Should().HaveCount(1); + .Select(x => x.Value) + .Should().BeEquivalentTo(definitions).And.HaveCount(1); + } + + [Test] + public void TestConstructBlobUrl() + { + var jsonObject = new JObject + { + ["coordinates"] = new JObject + { + ["type"] = "type", + ["provider"] = "provider", + ["namespace"] = "-", + ["name"] = "name", + ["revision"] = "revision" + } + }; + var result = jsonObject.ConstructBlobUrl(); + Assert.AreEqual("type/provider/-/name/revision/revision.json", result); + + // test with null namespace + jsonObject = new JObject + { + ["coordinates"] = new JObject + { + ["type"] = "type", + ["provider"] = "provider", + ["namespace"] = null, + ["name"] = "name", + ["revision"] = "revision" + } + }; + + result = jsonObject.ConstructBlobUrl(); + Assert.AreEqual("type/provider/-/name/revision/revision.json", result); + } + + [Test] + public void TestConstructInvalidBlobUrl() + { + var jsonObject = JObject.Parse("{}"); + var result = jsonObject.ConstructBlobUrl(); + Assert.AreEqual("", result); } } \ No newline at end of file diff --git a/tools/blobstorage-backupdata/JObjectExtensions.cs b/tools/blobstorage-backupdata/JObjectExtensions.cs index fb707b3..865dfdb 100644 --- a/tools/blobstorage-backupdata/JObjectExtensions.cs +++ b/tools/blobstorage-backupdata/JObjectExtensions.cs @@ -1,5 +1,6 @@ namespace BackupData; +using System.Web; using Newtonsoft.Json.Linq; internal static class JObjectExtensions @@ -24,4 +25,39 @@ internal static DateTime GetDateTime(this JObject jObject) { return DateTime.Parse(jObject["_meta"]?["updated"]?.ToString()!); } + + /// + /// Gets the blob definition url. + /// + /// Json object extension param. + /// returns the constructed definition url from the coordinates field + internal static string ConstructBlobUrl(this JObject jObject) + { + if (jObject?["coordinates"]?.Type == null || jObject["coordinates"]!.Type == JTokenType.Null) + { + return string.Empty; + } + + var defaultNamespace = (jObject["coordinates"]?["namespace"]?.ToString() ?? "") == "" ? "-" + : jObject["coordinates"]?["namespace"]?.ToString(); + + var type = HttpUtility.UrlEncode(jObject["coordinates"]?["type"]?.ToString().Trim()); + var provider = HttpUtility.UrlEncode(jObject["coordinates"]?["provider"]?.ToString().Trim()); + var namespaceName = HttpUtility.UrlEncode(defaultNamespace!.Trim()); + var name = HttpUtility.UrlEncode(jObject["coordinates"]?["name"]?.ToString().Trim()); + var revision = HttpUtility.UrlEncode(jObject["coordinates"]?["revision"]?.ToString().Trim()); + + if (string.IsNullOrEmpty(type) || + string.IsNullOrEmpty(provider) || + string.IsNullOrEmpty(namespaceName) || + string.IsNullOrEmpty(name) || + string.IsNullOrEmpty(revision)) + { + return string.Empty; + } + + var constructedUrl = $"{type}/{provider}/{namespaceName}/{name}/revision/{revision}.json".ToLower(); + + return constructedUrl; + } } \ No newline at end of file diff --git a/tools/blobstorage-backupdata/Program.cs b/tools/blobstorage-backupdata/Program.cs index af9ae89..90c7e00 100644 --- a/tools/blobstorage-backupdata/Program.cs +++ b/tools/blobstorage-backupdata/Program.cs @@ -16,7 +16,7 @@ internal static void Main() ILogger logger = loggerFactory.CreateLogger(nameof(Program)); logger.LogInformation("Backup job started."); var backupJob = CreateBackupJob(loggerFactory); - try + try { backupJob.ProcessJob().Wait(); } @@ -25,7 +25,7 @@ internal static void Main() foreach (var e in ae.InnerExceptions) { logger.LogError(e, "Backup job failed."); - } + } } catch (Exception e) { @@ -49,6 +49,7 @@ private static BackupJob CreateBackupJob(ILoggerFactory loggerFactory) string mongoClientConnectionString = GetEnvironmentVariable("MONGO_CONNECTION_STRING"); string blobServiceConnectionString = GetEnvironmentVariable("BLOB_SERVICE_CONNECTION_STRING"); string blobContainerName = GetEnvironmentVariable("BLOB_CONTAINER_NAME"); + string definitionBlobContainerName = GetEnvironmentVariable("DEFINITION_BLOB_CONTAINER_NAME"); var dbClient = new MongoClient(mongoClientConnectionString); var blobOptions = new BlobClientOptions @@ -62,9 +63,13 @@ private static BackupJob CreateBackupJob(ILoggerFactory loggerFactory) NetworkTimeout = TimeSpan.FromSeconds(100) } }; + var blobServiceClient = new BlobServiceClient(blobServiceConnectionString, blobOptions); + + var definitionBlobContainerClient = blobServiceClient.GetBlobContainerClient(definitionBlobContainerName); var blobContainerClient = blobServiceClient.GetBlobContainerClient(blobContainerName); - return new BackupJob(blobContainerClient, dbClient, DateTime.SpecifyKind(DateTime.UtcNow, DateTimeKind.Utc), loggerFactory, new FilterRenderer()); + + return new BackupJob(blobContainerClient, definitionBlobContainerClient, dbClient, DateTime.SpecifyKind(DateTime.UtcNow, DateTimeKind.Utc), loggerFactory, new FilterRenderer()); } } diff --git a/tools/blobstorage-backupdata/template.env b/tools/blobstorage-backupdata/template.env index 7527dd7..2bce56f 100644 --- a/tools/blobstorage-backupdata/template.env +++ b/tools/blobstorage-backupdata/template.env @@ -1,3 +1,4 @@ MONGO_CONNECTION_STRING= BLOB_SERVICE_CONNECTION_STRING= -BLOB_CONTAINER_NAME=production-snapshots \ No newline at end of file +BLOB_CONTAINER_NAME=production-snapshots +DEFINITION_BLOB_CONTAINER_NAME=production-definition \ No newline at end of file