Skip to content

Commit

Permalink
feat: implements batch file upload with promises
Browse files Browse the repository at this point in the history
- Updated `fetchBatchSize` in `DataPackage.js` to replace `batchModeValue`.
- Added `uploadBatchSize` configuration in `AppModel.js`.
- Implemented `uploadFilesInBatch` method in `DataItemView.js` to handle batch file uploads using promises.
- Ensured that the `_.each` loop completes before proceeding to batch processing.

Closes #2224
  • Loading branch information
vchendrix committed Dec 16, 2024
1 parent 4401c12 commit dacf926
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 56 deletions.
4 changes: 1 addition & 3 deletions src/js/collections/DataPackage.js
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,6 @@ define([
}
},


/**
* Fetches member models in batches to avoid fetching all members simultaneously.
*
Expand Down Expand Up @@ -800,8 +799,7 @@ define([
// Don't fetch each member model if the fetchModels property on this Collection is set to false
if (this.fetchModels !== false) {
// Start fetching member models
this.fetchMemberModels.call(this, models, 0, MetacatUI.appModel.get("batchModeValue"));

this.fetchMemberModels.call(this, models, 0, MetacatUI.appModel.get("batchSizeFetch"));
}
} catch (error) {
console.log(error);
Expand Down
20 changes: 18 additions & 2 deletions src/js/models/AppModel.js
Original file line number Diff line number Diff line change
Expand Up @@ -2414,7 +2414,7 @@ define(["jquery", "underscore", "backbone"], function ($, _, Backbone) {
*/
packageFormat: "application%2Fbagit-1.0",
/**
* Whether to batch requests to the DataONE API. This is an experimental feature
* Whether to batch fetch requests to the DataONE API. This is an experimental feature
* and should be used with caution. If set to a number greater than 0, MetacatUI will
* batch requests to the DataONE API and send them in groups of this size. This can
* improve performance when making many requests to the DataONE API, but can also
Expand All @@ -2428,7 +2428,23 @@ define(["jquery", "underscore", "backbone"], function ($, _, Backbone) {
* @default 0
* @example 20
*/
batchModeValue: 0,
batchSizeFetch: 0,
/**
* Whether to batch uploads to the DataONE API. This is an experimental feature
* and should be used with caution. If set to a number greater than 0, MetacatUI will
* batch uploads to the DataONE API and send them in groups of this size. This can
* improve performance when uploading many files to the DataONE API, but can also
* cause issues if the requests are too large or if the DataONE API is not able to
* handle the batched requests.
*
* Currently, this feature is only used in the DataPackageModel when uploading files
* to the DataONE API.
*
* @type {number}
* @default 0
* @example 20
*/
batchSizeUpload: 0,
},
MetacatUI.AppConfig,
),
Expand Down
165 changes: 114 additions & 51 deletions src/js/views/DataItemView.js
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,83 @@ define([
event.preventDefault();
},

/**
* Method to handle batch file uploads.
* This method processes files independently to avoid being slowed down by large files.
*
* @param {FileList} fileList - The list of files to be uploaded.
* @param {number} [batchSize=10] - The number of files to upload concurrently.
*/
uploadFilesInBatch(fileList, batchSize = 10) {
const view = this;
let currentIndex = 0; // Index of the current file being processed
let activeUploads = 0; // Counter for the number of active uploads

// If batchSize is 0, set it to the total number of files
if (batchSize == 0) batchSize = fileList.length;

/**
* Function to upload the next file in the list.
* This function is called recursively to ensure that the number of concurrent uploads
* does not exceed the batch size.
*/
function uploadNextFile() {
// If all files have been processed, return
if (currentIndex >= fileList.length) {
return;
}

// If the number of active uploads is less than the batch size, start a new upload
if (activeUploads < batchSize) {
const dataONEObject = fileList[currentIndex];
currentIndex++; // Move to the next file
activeUploads++; // Increment the active uploads counter

// Create a new Promise to handle the file upload
new Promise((resolve, reject) => {
// If the file needs to be uploaded and its checksum is not calculated
if (dataONEObject.get("uploadFile") && !dataONEObject.get("checksum")) {
// Stop listening to previous checksumCalculated events
dataONEObject.stopListening(dataONEObject, "checksumCalculated");
// Listen to the checksumCalculated event to start the upload
dataONEObject.listenToOnce(dataONEObject, "checksumCalculated", () => {
dataONEObject.save(); // Save the file
// Listen to changes in the uploadStatus to resolve the Promise
dataONEObject.listenTo(dataONEObject, "change:uploadStatus", () => {
if (dataONEObject.get("uploadStatus") !== "p" && dataONEObject.get("uploadStatus") !== "q" && dataONEObject.get("uploadStatus") !== "l") {
resolve(); // Resolve the Promise when the upload is complete
}
});
});
try {
dataONEObject.calculateChecksum(); // Calculate the checksum
} catch (exception) {
reject(exception); // Reject the Promise if an error occurs
}
} else {
resolve(); // Resolve the Promise if the file does not need to be uploaded
}
})
.then(() => {
activeUploads--; // Decrement the active uploads counter
uploadNextFile(); // Start the next file upload
})
.catch((error) => {
console.error("Error uploading file:", error);
activeUploads--; // Decrement the active uploads counter
uploadNextFile(); // Start the next file upload
});

uploadNextFile(); // Start the next file upload
}
}

// Start the initial batch of uploads
for (let i = 0; i < batchSize; i++) {
uploadNextFile();
}
},

/**
With a file list from the file picker or drag and drop,
add the files to the collection
Expand Down Expand Up @@ -805,60 +882,46 @@ define([
if (typeof event.delegateTarget.dataset.id !== "undefined") {
this.parentSciMeta = this.getParentScienceMetadata(event);
this.collection = this.getParentDataPackage(event);
// Queue the files for upload
const queueFilesPromise = new Promise((resolve) => {
_.each(
fileList,
function (file) {
var uploadStatus = "l",
errorMessage = "";

if (file.size == 0) {
uploadStatus = "e";
errorMessage =
"This is an empty file. It won't be included in the dataset.";
}

// Read each file, and make a DataONEObject
_.each(
fileList,
function (file) {
var uploadStatus = "l",
errorMessage = "";

if (file.size == 0) {
uploadStatus = "e";
errorMessage =
"This is an empty file. It won't be included in the dataset.";
}

var dataONEObject = new DataONEObject({
synced: true,
type: "Data",
fileName: file.name,
size: file.size,
mediaType: file.type,
uploadFile: file,
uploadStatus: uploadStatus,
errorMessage: errorMessage,
isDocumentedBy: [this.parentSciMeta.id],
isDocumentedByModels: [this.parentSciMeta],
resourceMap: [this.collection.packageModel.id],
});
var dataONEObject = new DataONEObject({
synced: true,
type: "Data",
fileName: file.name,
size: file.size,
mediaType: file.type,
uploadFile: file,
uploadStatus: uploadStatus,
errorMessage: errorMessage,
isDocumentedBy: [this.parentSciMeta.id],
isDocumentedByModels: [this.parentSciMeta],
resourceMap: [this.collection.packageModel.id],
});

// Add it to the parent collection
this.collection.add(dataONEObject);
// Add it to the parent collection
this.collection.add(dataONEObject);
},
this,
);
resolve();
});

// Asychronously calculate the checksum
if (
dataONEObject.get("uploadFile") &&
!dataONEObject.get("checksum")
) {
dataONEObject.stopListening(
dataONEObject,
"checksumCalculated",
);
dataONEObject.listenToOnce(
dataONEObject,
"checksumCalculated",
dataONEObject.save,
);
try {
dataONEObject.calculateChecksum();
} catch (exception) {
// TODO: Fail gracefully here for the user
}
}
},
this,
);
queueFilesPromise.then(() => {
// Call the batch upload method
this.uploadFilesInBatch(this.collection.models, MetacatUI.appModel.get('batchSizeUpload'));
});
}
},

Expand Down

0 comments on commit dacf926

Please sign in to comment.