From 0015e9f954d441027fad0d260edf5b9ef1eced55 Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Mon, 3 Jul 2023 13:52:03 +0200 Subject: [PATCH 1/8] feat: generate a real world run --- tools/mongodb/generate-real-world-run.sh | 5 + .../scripts/generate-real-world-run.js | 143 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100755 tools/mongodb/generate-real-world-run.sh create mode 100644 tools/mongodb/scripts/generate-real-world-run.js diff --git a/tools/mongodb/generate-real-world-run.sh b/tools/mongodb/generate-real-world-run.sh new file mode 100755 index 000000000..15699d7ff --- /dev/null +++ b/tools/mongodb/generate-real-world-run.sh @@ -0,0 +1,5 @@ +#! /usr/bin/env bash + +DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" +# Description: Generate results for ArmoniK +"$DIR/utils/execute-mongo-shell-script.sh" generate-real-world-run diff --git a/tools/mongodb/scripts/generate-real-world-run.js b/tools/mongodb/scripts/generate-real-world-run.js new file mode 100644 index 000000000..90c712d82 --- /dev/null +++ b/tools/mongodb/scripts/generate-real-world-run.js @@ -0,0 +1,143 @@ +const localRequire = require("module").createRequire(__filename) +const faker = localRequire("@faker-js/faker").fakerEN + +// Move to the correct database in MongoDB +db = db.getSiblingDB("database") + +const application = { + ApplicationName: faker.commerce.productName().split(" ").join("."), + ApplicationService: faker.hacker.verb(), + ApplicationVersion: faker.system.semver(), + ApplicationNamespace: faker.commerce.productName().split(" ").join(".") +} + +const partitionId = faker.string.uuid() + +db.PartitionData.insertOne({ + _id: partitionId, + ParentPartitionIds: [], + PodConfiguration: null, + PodMax: faker.number.int({ + min: 20, + max: 100 + }), + PodReserved: faker.number.int({ + min: 0, + max: 20 + }), + PreemptionPercentage: faker.number.int({ + min: 0, + max: 100 + }), + Priority: faker.number.int({ + min: 0, + max: 4 + }), +}) + +const sessionId = faker.string.uuid() +const sessionCreationDate = faker.date.past() + +const options = { + MaxDuration: "00:00:00", + MaxRetries: faker.number.int({ + min: 0, + max: 10 + }), + Options: {}, + Priority: faker.number.int({ + min: 0, + max: 4 + }), + PartitionId: partitionId, + EngineType: faker.commerce.productAdjective(), + ...application +} + +db.SessionData.insertOne({ + _id: sessionId, + Status: 1, // Running + PartitionIds: [partitionId], + CreationDate: sessionCreationDate, + CancellationDate: null, + Options: { + ...options, + }, +}) + +const tasksNumber = 1_000 + +const resultsIds = [] +for (let i = 0; i < tasksNumber; i++) { + const creationDate = faker.date.between({ from: sessionCreationDate, to: new Date() }) + const submittedDate = faker.date.between({ from: creationDate, to: new Date() }) + const startDate = faker.date.future({ refDate: submittedDate }) + const endDate = faker.date.future({ refDate: startDate }) + + const expectedOutputIds = Array.from({ + length: faker.number.int({ + min: 0, + max: 2 + }) + }, () => faker.string.uuid()) + + const taskId = faker.string.uuid() + + resultsIds.push({ + taskId: taskId, + expectedOutputIds: expectedOutputIds + }) + + db.TaskData.insertOne({ + _id: taskId, + SessionId: sessionId, + OwnerPodId: "", + OwnerPodName: "", + PayloadId: faker.string.uuid(), + ParentTaskIds: [], + DataDependencies: [], + RemainingDataDependencies: {}, + ExpectedOutputIds: expectedOutputIds, + InitialTaskId: null, + Status: 4, // Completed + StatusMessage: "", + Options: { + ...options, + }, + CreationDate: creationDate, + SubmittedDate: submittedDate, + StartDate: startDate, + EndDate: endDate, + ReceptionDate: null, + AcquisitionDate: null, + PodTtl: null, + // TODO: Is it a date? + ProcessingToEndDuration: null, + // TODO: Is it a date? + CreationToEndDuration: null, + Output: { + Success: false, + Error: "" + } + }) +} + +resultsIds.forEach(({ taskId, expectedOutputIds }) => { + const creationDate = faker.date.past(); + const completionDate = faker.date.future({ refDate: creationDate }); + + expectedOutputIds.forEach((expectedOutputId) => { + db.Result.insertOne({ + SessionId: sessionId, + Name: faker.word.sample(), + OwnerTaskId: taskId, + Status: 2, // Completed + DependentTasks: [], + // Binary data + Data: faker.number.octal(), + CreationDate: creationDate, + CompletionDate: completionDate, + _id: expectedOutputId, + }) + }) +}) From 0aa9f85d1cabdc55729f059273596bb26f854e79 Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Mon, 3 Jul 2023 13:56:31 +0200 Subject: [PATCH 2/8] docs: remove useless comments --- tools/mongodb/scripts/generate-real-world-run.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/mongodb/scripts/generate-real-world-run.js b/tools/mongodb/scripts/generate-real-world-run.js index 90c712d82..2cfdd6068 100644 --- a/tools/mongodb/scripts/generate-real-world-run.js +++ b/tools/mongodb/scripts/generate-real-world-run.js @@ -111,9 +111,7 @@ for (let i = 0; i < tasksNumber; i++) { ReceptionDate: null, AcquisitionDate: null, PodTtl: null, - // TODO: Is it a date? ProcessingToEndDuration: null, - // TODO: Is it a date? CreationToEndDuration: null, Output: { Success: false, From e8a4f8bea33185a5d8533227b9296f71b998a76a Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Mon, 3 Jul 2023 13:56:44 +0200 Subject: [PATCH 3/8] docs: update populate database guide --- .docs/content/2.guide/populate-database.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.docs/content/2.guide/populate-database.md b/.docs/content/2.guide/populate-database.md index a04506a21..940139141 100644 --- a/.docs/content/2.guide/populate-database.md +++ b/.docs/content/2.guide/populate-database.md @@ -53,14 +53,15 @@ chmod +x ./tools/mongodb/.sh ### Available scripts -| Script name | Description | -|---------------------------------------|---------------------------------------------------| -| `export-all` | Export all collections in the `.database` folder. | -| `generate-partitions` | Generate 100 partitions | -| `generate-sessions` | Generate 100 sessions | -| `generate-session-with-related-tasks` | Generate 1 session and 100 related tasks | -| `generate-tasks` | Generate 100 tasks | -| `generate-results` | Generate 100 results | +| Script name | Description | +|---------------------------------------|-------------------------------------------------------------------------------------| +| `export-all` | Export all collections in the `.database` folder. | +| `generate-partitions` | Generate 100 partitions | +| `generate-sessions` | Generate 100 sessions | +| `generate-session-with-related-tasks` | Generate 1 session and 100 related tasks | +| `generate-tasks` | Generate 100 tasks | +| `generate-results` | Generate 100 results | +| `generate-real-world-run` | Generate 1 partition, 1 session, 1 application and 1 000 tasks with related results | ::alert{type="info"} To generate applications, you must generate tasks. From 83b12793d88bf60163cec59ed710ef5f5ce44a3d Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Tue, 11 Jul 2023 09:48:58 +0200 Subject: [PATCH 4/8] fix: pod name and ip to null --- tools/mongodb/scripts/generate-real-world-run.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mongodb/scripts/generate-real-world-run.js b/tools/mongodb/scripts/generate-real-world-run.js index 2cfdd6068..844a99add 100644 --- a/tools/mongodb/scripts/generate-real-world-run.js +++ b/tools/mongodb/scripts/generate-real-world-run.js @@ -91,8 +91,8 @@ for (let i = 0; i < tasksNumber; i++) { db.TaskData.insertOne({ _id: taskId, SessionId: sessionId, - OwnerPodId: "", - OwnerPodName: "", + OwnerPodId: null, + OwnerPodName: null, PayloadId: faker.string.uuid(), ParentTaskIds: [], DataDependencies: [], From e7721566a0c2090943e8ae55e85be4fa0ba0c06d Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Tue, 11 Jul 2023 10:18:15 +0200 Subject: [PATCH 5/8] feat: support retry --- .../scripts/generate-real-world-run.js | 78 +++++++++++++------ 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/tools/mongodb/scripts/generate-real-world-run.js b/tools/mongodb/scripts/generate-real-world-run.js index 844a99add..875886a5b 100644 --- a/tools/mongodb/scripts/generate-real-world-run.js +++ b/tools/mongodb/scripts/generate-real-world-run.js @@ -69,12 +69,48 @@ const tasksNumber = 1_000 const resultsIds = [] for (let i = 0; i < tasksNumber; i++) { + const { taskId, expectedOutputIds } = createTask() + + resultsIds.push({ + taskId: taskId, + expectedOutputIds: expectedOutputIds + }) +} + +resultsIds.forEach(({ taskId, expectedOutputIds }) => { + const creationDate = faker.date.past(); + const completionDate = faker.date.future({ refDate: creationDate }); + + expectedOutputIds.forEach((expectedOutputId) => { + db.Result.insertOne({ + SessionId: sessionId, + Name: faker.word.sample(), + OwnerTaskId: taskId, + Status: 2, // Completed + DependentTasks: [], + // Binary data + Data: faker.number.octal(), + CreationDate: creationDate, + CompletionDate: completionDate, + _id: expectedOutputId, + }) + }) +}) + + +/** + * Create a task + * + * If id and outputsIds are given, the task is created from a retry. + * Only return the last task id when a retry is created. + */ +function createTask(id, outputsIds) { const creationDate = faker.date.between({ from: sessionCreationDate, to: new Date() }) const submittedDate = faker.date.between({ from: creationDate, to: new Date() }) const startDate = faker.date.future({ refDate: submittedDate }) const endDate = faker.date.future({ refDate: startDate }) - const expectedOutputIds = Array.from({ + const expectedOutputIds = outputsIds ?? Array.from({ length: faker.number.int({ min: 0, max: 2 @@ -83,9 +119,8 @@ for (let i = 0; i < tasksNumber; i++) { const taskId = faker.string.uuid() - resultsIds.push({ - taskId: taskId, - expectedOutputIds: expectedOutputIds + const isRetried = faker.datatype.boolean({ + probability: 0.2 }) db.TaskData.insertOne({ @@ -98,8 +133,8 @@ for (let i = 0; i < tasksNumber; i++) { DataDependencies: [], RemainingDataDependencies: {}, ExpectedOutputIds: expectedOutputIds, - InitialTaskId: null, - Status: 4, // Completed + InitialTaskId: id ?? null, + Status: isRetried ? 11 /* Retry */ : 4 /* Completed */, StatusMessage: "", Options: { ...options, @@ -118,24 +153,17 @@ for (let i = 0; i < tasksNumber; i++) { Error: "" } }) -} -resultsIds.forEach(({ taskId, expectedOutputIds }) => { - const creationDate = faker.date.past(); - const completionDate = faker.date.future({ refDate: creationDate }); + if (isRetried) { + const { taskId: id } = createTask(taskId, expectedOutputIds) + return { + taskId: id, + expectedOutputIds: expectedOutputIds + } + } - expectedOutputIds.forEach((expectedOutputId) => { - db.Result.insertOne({ - SessionId: sessionId, - Name: faker.word.sample(), - OwnerTaskId: taskId, - Status: 2, // Completed - DependentTasks: [], - // Binary data - Data: faker.number.octal(), - CreationDate: creationDate, - CompletionDate: completionDate, - _id: expectedOutputId, - }) - }) -}) + return { + taskId: taskId, + expectedOutputIds: expectedOutputIds + } +} From 81933bdddd44c206a116bd9e44c1d283a230f96c Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Tue, 11 Jul 2023 10:58:12 +0200 Subject: [PATCH 6/8] fix: retriedOfIds --- tools/mongodb/scripts/generate-real-world-run.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/mongodb/scripts/generate-real-world-run.js b/tools/mongodb/scripts/generate-real-world-run.js index 875886a5b..d02a6c162 100644 --- a/tools/mongodb/scripts/generate-real-world-run.js +++ b/tools/mongodb/scripts/generate-real-world-run.js @@ -103,8 +103,12 @@ resultsIds.forEach(({ taskId, expectedOutputIds }) => { * * If id and outputsIds are given, the task is created from a retry. * Only return the last task id when a retry is created. + * + * @param {string} id + * @param {string[]} retriedIds + * @param {string[]} outputsIds */ -function createTask(id, outputsIds) { +function createTask(id, retriedIds, outputsIds) { const creationDate = faker.date.between({ from: sessionCreationDate, to: new Date() }) const submittedDate = faker.date.between({ from: creationDate, to: new Date() }) const startDate = faker.date.future({ refDate: submittedDate }) @@ -134,6 +138,7 @@ function createTask(id, outputsIds) { RemainingDataDependencies: {}, ExpectedOutputIds: expectedOutputIds, InitialTaskId: id ?? null, + RetryOfIds: retriedIds ?? [], Status: isRetried ? 11 /* Retry */ : 4 /* Completed */, StatusMessage: "", Options: { @@ -155,7 +160,7 @@ function createTask(id, outputsIds) { }) if (isRetried) { - const { taskId: id } = createTask(taskId, expectedOutputIds) + const { taskId: id } = createTask(taskId, retriedIds ? [taskId, ...retriedIds] : [taskId], expectedOutputIds) return { taskId: id, expectedOutputIds: expectedOutputIds From 86b35ffbb42b5178f12349e1462785c256668960 Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Tue, 11 Jul 2023 10:59:44 +0200 Subject: [PATCH 7/8] fix: pod ip and pod name, can't be null --- tools/mongodb/scripts/generate-real-world-run.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mongodb/scripts/generate-real-world-run.js b/tools/mongodb/scripts/generate-real-world-run.js index d02a6c162..d5d0dc167 100644 --- a/tools/mongodb/scripts/generate-real-world-run.js +++ b/tools/mongodb/scripts/generate-real-world-run.js @@ -130,8 +130,8 @@ function createTask(id, retriedIds, outputsIds) { db.TaskData.insertOne({ _id: taskId, SessionId: sessionId, - OwnerPodId: null, - OwnerPodName: null, + OwnerPodId: '', + OwnerPodName: '', PayloadId: faker.string.uuid(), ParentTaskIds: [], DataDependencies: [], From 81850bbca1ba95dfdfcbd818f7f828b5807053f5 Mon Sep 17 00:00:00 2001 From: esoubiran-aneo Date: Tue, 11 Jul 2023 11:08:47 +0200 Subject: [PATCH 8/8] fix: initial task id can't be null and taskId is always set --- tools/mongodb/scripts/generate-real-world-run.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/mongodb/scripts/generate-real-world-run.js b/tools/mongodb/scripts/generate-real-world-run.js index d5d0dc167..911bd607e 100644 --- a/tools/mongodb/scripts/generate-real-world-run.js +++ b/tools/mongodb/scripts/generate-real-world-run.js @@ -137,7 +137,7 @@ function createTask(id, retriedIds, outputsIds) { DataDependencies: [], RemainingDataDependencies: {}, ExpectedOutputIds: expectedOutputIds, - InitialTaskId: id ?? null, + InitialTaskId: id ?? taskId, RetryOfIds: retriedIds ?? [], Status: isRetried ? 11 /* Retry */ : 4 /* Completed */, StatusMessage: "",