diff --git a/package.json b/package.json index 530d089..f59943c 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "dependencies": { "@google-cloud/storage": "^7.7.0", "@google/earthengine": "^0.1.385", + "@googleapis/drive": "^8.11.0", "@openeo/js-commons": "^1.4.1", "@openeo/js-processgraphs": "^1.3.0", "@seald-io/nedb": "^4.0.4", @@ -47,6 +48,7 @@ "check-disk-space": "^3.4.0", "epsg-index": "^2.0.0", "fs-extra": "^11.2.0", + "googleapis-common": "^7.2.0", "luxon": "^3.4.4", "proj4": "^2.10.0", "restify": "^11.1.0" diff --git a/src/api/collections.js b/src/api/collections.js index 287ff47..ea66491 100644 --- a/src/api/collections.js +++ b/src/api/collections.js @@ -48,7 +48,7 @@ export default class Data { const b = Date.now(); const pContext = this.context.processingContext(); - this.ee = await pContext.connectGee(true); + this.ee = await pContext.connect(true); console.log(`Established connection to GEE for STAC (${Date.now()-b} ms)`); return num; diff --git a/src/api/files.js b/src/api/files.js index cf93080..bc9d2e0 100644 --- a/src/api/files.js +++ b/src/api/files.js @@ -94,8 +94,7 @@ export default class FilesAPI { size: newFileStat.size, modified: Utils.getISODateTime(newFileStat.mtime) }); - } - catch (e) { + } catch (e) { if (this.context.debug) { console.error(e); } diff --git a/src/api/jobs.js b/src/api/jobs.js index 3d43273..2986823 100644 --- a/src/api/jobs.js +++ b/src/api/jobs.js @@ -54,6 +54,9 @@ export default class JobsAPI { if (!req.user._id) { throw new Errors.AuthenticationRequired(); } + + // Update the task status + this.context.processingContext(req.user).startTaskMonitor(); } async getJobs(req, res) { @@ -200,6 +203,9 @@ export default class JobsAPI { } const makeStorageUrl = obj => { + if (Utils.isUrl(obj.href)) { + return obj; + } obj.href = API.getUrl("/storage/" + job.token + "/" + obj.href); return obj; }; @@ -246,7 +252,7 @@ export default class JobsAPI { if (this.storage.isFieldEditable(key)) { switch(key) { case 'process': { - const pg = new ProcessGraph(req.body.process, this.context.processingContext(req.user)); + const pg = new ProcessGraph(req.body.process, this.context.processingContext(req.user, job)); pg.allowUndefinedParameters(false); promises.push(pg.validate()); break; diff --git a/src/api/services.js b/src/api/services.js index ccbf2d7..550a000 100644 --- a/src/api/services.js +++ b/src/api/services.js @@ -118,7 +118,7 @@ export default class ServicesAPI { if (this.storage.isFieldEditable(key)) { switch(key) { case 'process': { - const pg = new ProcessGraph(req.body.process, this.context.processingContext(req.user)); + const pg = new ProcessGraph(req.body.process, this.context.processingContext(req.user, service)); // ToDo 1.0: Correctly handle service paramaters #79 pg.allowUndefinedParameters(false); promises.push(pg.validate()); diff --git a/src/api/worker/batchjob.js b/src/api/worker/batchjob.js index b22e019..6d6a344 100644 --- a/src/api/worker/batchjob.js +++ b/src/api/worker/batchjob.js @@ -3,6 +3,7 @@ import path from 'path'; import ProcessGraph from '../../processgraph/processgraph.js'; import GeeResults from '../../processes/utils/results.js'; import Utils from '../../utils/utils.js'; +import GDrive from '../../utils/gdrive.js'; const packageInfo = Utils.require('../../package.json'); export default async function run(config, storage, user, query) { @@ -19,26 +20,59 @@ export default async function run(config, storage, user, query) { await Promise.all(cleanupTasks); logger.info("Starting batch job"); + await storage.updateJobStatus(query, 'running'); - const context = config.processingContext(user); + const jobfolder = storage.getJobFolder(job._id); + await fse.ensureDir(path.dirname(jobfolder)); + + const context = config.processingContext(user, job); const pg = new ProcessGraph(job.process, context, logger); await pg.execute(); - const computeTasks = pg.getResults().map(async (datacube) => { - const response = await GeeResults.retrieve(context, datacube, logger); - const params = datacube.getOutputFormatParameters(); - const filename = (params.name || String(Utils.generateHash())) + GeeResults.getFileExtension(datacube, config); - const filepath = storage.getJobFile(job._id, filename); - logger.debug("Storing result to: " + filepath); - await fse.ensureDir(path.dirname(filepath)); - await new Promise((resolve, reject) => { - const writer = fse.createWriteStream(filepath); - response.data.pipe(writer); - writer.on('error', reject); - writer.on('close', resolve); - }); - return { filepath, datacube }; + const computeTasks = pg.getResults().map(async (dc) => { + const format = config.getOutputFormat(dc.getOutputFormat()); + const datacube = format.preprocess(GeeResults.BATCH, context, dc, logger); + + if (format.canExport()) { + // Ensure early that we have access to the Google Drive API + const drive = new GDrive(context.server(), user); + await drive.connect(); + // Start processing + const tasks = await format.export(context.ee, dc, context.getResource()); + storage.addTasks(job, tasks); + context.startTaskMonitor(); + const driveUrls = await new Promise((resolve, reject) => { + setInterval(async () => { + const updatedJob = await storage.getById(job._id, job.user_id); + if (!updatedJob) { + reject(new Error("Job was deleted")); + } + if (['canceled', 'error', 'finished'].includes(updatedJob.status)) { + resolve(job.googleDriveResults); + } + }, 10000); + }); + // Handle Google Drive specifics (permissions and public URLs) + const folderName = GDrive.getFolderName(job); + await drive.publishFoldersByName(folderName); + const files = await drive.getAssetsForFolder(folderName); + + return { files, datacube, links: driveUrls }; + } + else { + const response = await format.retrieve(context.ee, dc); + const params = datacube.getOutputFormatParameters(); + const filename = (params.name || String(Utils.generateHash())) + GeeResults.getFileExtension(datacube, config); + const filepath = storage.getJobFile(job._id, filename); + await new Promise((resolve, reject) => { + const writer = fse.createWriteStream(filepath); + response.data.pipe(writer); + writer.on('error', reject); + writer.on('close', resolve); + }); + return { files: [filepath], datacube }; + } }); await Promise.all(computeTasks); @@ -53,6 +87,7 @@ export default async function run(config, storage, user, query) { await fse.writeJSON(stacpath, item, {spaces: 2}); logger.info("Finished"); + // todo: set to error is any task failed storage.updateJobStatus(query, 'finished'); } catch(e) { logger.error(e); @@ -77,17 +112,11 @@ async function createSTAC(storage, job, results) { let startTime = null; let endTime = null; const extents = []; - for(const { filepath, datacube } of results) { - const filename = path.basename(filepath); - const stat = await fse.stat(filepath); - let asset = { - href: path.relative(folder, filepath), + for(const result of results) { + const files = result.files || []; + const datacube = result.datacube; + const baseAsset = { roles: ["data"], - type: Utils.extensionToMediaType(filepath), - title: filename, - "file:size": stat.size, - created: stat.birthtime, - updated: stat.mtime }; if (datacube.hasT()) { @@ -109,8 +138,8 @@ async function createSTAC(storage, job, results) { const extent = datacube.getSpatialExtent(); let wgs84Extent = extent; if (crs !== 4326) { - asset["proj:epsg"] = crs; - asset["proj:geometry"] = extent; + baseAsset["proj:epsg"] = crs; + baseAsset["proj:geometry"] = extent; wgs84Extent = Utils.projExtent(extent, 4326); } // Check the coordinates with a delta of 0.0001 or so @@ -120,8 +149,34 @@ async function createSTAC(storage, job, results) { } } - const params = datacube.getOutputFormatParameters(); - assets[filename] = Object.assign(asset, params.metadata); + for (const file of files) { + let asset; + let filename; + if (Utils.isUrl(file)) { + let url = new URL(file); + filename = path.basename(url.pathname || url.hash.substring(1)); + asset = { + href: file, + // type: Utils.extensionToMediaType(file), + title: filename + }; + } + else { + filename = path.basename(file); + const stat = await fse.stat(file); + asset = { + href: path.relative(folder, file), + type: Utils.extensionToMediaType(file), + title: filename, + "file:size": stat.size, + created: stat.birthtime, + updated: stat.mtime + }; + } + + const params = datacube.getOutputFormatParameters(); + assets[filename] = Object.assign(asset, baseAsset, params.metadata); + } } const item = { stac_version: packageInfo.stac_version, diff --git a/src/api/worker/sync.js b/src/api/worker/sync.js index 9934e1f..4ebe730 100644 --- a/src/api/worker/sync.js +++ b/src/api/worker/sync.js @@ -25,7 +25,11 @@ export default async function run(config, user, id, process, log_level) { if (pg.getResults().length > 1) { logger.warn("Multiple results can't be processed in synchronous mode. Only the result from the result node will be returned."); } - return await GeeResults.retrieve(context, resultNode.getResult(), logger); + + const dc = resultNode.getResult(); + const format = config.getOutputFormat(dc.getOutputFormat()); + const dc2 = format.preprocess(GeeResults.SYNC, context, dc, logger); + return await format.retrieve(context.ee, dc2); } export async function getResultLogs(user_id, id, log_level) { diff --git a/src/api/worker/webservice.js b/src/api/worker/webservice.js index 64075ee..ff5d5e4 100644 --- a/src/api/worker/webservice.js +++ b/src/api/worker/webservice.js @@ -9,7 +9,7 @@ export default async function run(config, storage, user, query, xyz) { try { const rect = storage.calculateXYZRect(...xyz); - const context = config.processingContext(user); + const context = config.processingContext(user, service); // Update user id to the user id, which stored the job. // See https://github.com/Open-EO/openeo-earthengine-driver/issues/19 context.setUserId(service.user_id); @@ -29,7 +29,9 @@ export default async function run(config, storage, user, query, xyz) { dc.setOutputFormat('png'); } - return await GeeResults.retrieve(context, dc, logger); + const format = config.getOutputFormat(dc.getOutputFormat()); + const dc2 = format.preprocess(GeeResults.SERVICE, context, dc, logger); + return await format.retrieve(context.ee, dc2); } catch(e) { logger.error(e); throw e; diff --git a/src/formats/bitmap.js b/src/formats/bitmap.js index 679a7d7..b2b0cb2 100644 --- a/src/formats/bitmap.js +++ b/src/formats/bitmap.js @@ -3,6 +3,7 @@ import GeeResults from "../processes/utils/results.js"; import DataCube from "../datacube/datacube.js"; import Utils from "../utils/utils.js"; import FileFormat, { EPSGCODE_PARAMETER, SIZE_PARAMETER } from "./fileformat.js"; +import HttpUtils from "../utils/http.js"; export const EPSGCODE_PARAMETER_BITMAP = Object.assign({}, EPSGCODE_PARAMETER); EPSGCODE_PARAMETER_BITMAP.default = 4326; @@ -96,7 +97,7 @@ export default class BitmapLike extends FileFormat { return renderer === 'filmstrip'; } - preprocess(context, dc, logger) { + preprocess(mode, context, dc, logger) { const ee = context.ee; const parameters = dc.getOutputFormatParameters(); @@ -175,7 +176,7 @@ export default class BitmapLike extends FileFormat { reject('Download URL provided by Google Earth Engine is empty.'); } else { - resolve(url); + resolve(HttpUtils.stream(url)); } }); }); diff --git a/src/formats/fileformat.js b/src/formats/fileformat.js index a812d0b..d8e2c23 100644 --- a/src/formats/fileformat.js +++ b/src/formats/fileformat.js @@ -16,7 +16,7 @@ export const SIZE_PARAMETER = { export const SCALE_PARAMETER = { type: 'number', - description: 'Scale of the image in meters per pixel.', + description: 'Scale of the image in meters per pixel. Defaults to native resolution in batch jobs, and 100 otherwise.', default: 100, minimum: 1 }; @@ -81,15 +81,19 @@ export default class FileFormat { }; } - preprocess(context, dc/*, logger*/) { + preprocess(mode, context, dc/*, logger*/) { return dc; } - async retrieve(/*ee, dc*/) { + async retrieve(/*ee, dc */) { throw new Error('Not implemented'); } - async export(/*ee, dc*/) { + canExport() { + return false; + } + + async export(/*ee, dc */) { throw new Error('Not implemented'); } diff --git a/src/formats/gif.js b/src/formats/gif.js index bc57ed6..27eb9c3 100644 --- a/src/formats/gif.js +++ b/src/formats/gif.js @@ -1,3 +1,4 @@ +import HttpUtils from "../utils/http.js"; import Utils from "../utils/utils.js"; import BitmapLike from "./bitmap.js"; @@ -53,7 +54,7 @@ export default class GifFormat extends BitmapLike { reject('Download URL provided by Google Earth Engine is empty.'); } else { - resolve(url); + resolve(HttpUtils.stream(url)); } }); }); diff --git a/src/formats/gtiff.js b/src/formats/gtiff.js index 979386d..e3ce7a7 100644 --- a/src/formats/gtiff.js +++ b/src/formats/gtiff.js @@ -2,28 +2,24 @@ import GeeResults from "../processes/utils/results.js"; import DataCube from "../datacube/datacube.js"; import Utils from "../utils/utils.js"; import FileFormat, { EPSGCODE_PARAMETER, SCALE_PARAMETER } from "./fileformat.js"; +import HttpUtils from "../utils/http.js"; export default class GTiffFormat extends FileFormat { constructor(title = 'GeoTiff', parameters = {}) { - super(title, parameters); + super(title, parameters, "Cloud-optimized in batch jobs, not cloud-optimized otherwise."); this.addParameter('scale', SCALE_PARAMETER); this.addParameter('epsgCode', EPSGCODE_PARAMETER); - this.addParameter('zipped', { - type: 'boolean', - description: 'Pack the GeoTiff files into ZIP files, one file per band.', - default: false - }); } getGisDataTypes() { return ['raster']; } - getFileExtension(parameters) { - return parameters.zipped ? '.zip' : '.tiff'; + getFileExtension(/*parameters*/) { + return '.tiff'; } - preprocess(context, dc, logger) { + preprocess(mode, context, dc, logger) { const ee = context.ee; const parameters = dc.getOutputFormatParameters(); const dc2 = new DataCube(ee, dc); @@ -33,7 +29,8 @@ export default class GTiffFormat extends FileFormat { if (dc2.hasT()) { dc2.dimT().drop(); } - return dc2.setData(GeeResults.toImageOrCollection(ee, logger, dc.getData())); + const allowMultiple = (mode === GeeResults.BATCH); + return dc2.setData(GeeResults.toImageOrCollection(ee, logger, dc.getData(), allowMultiple)); } async retrieve(ee, dc) { @@ -46,18 +43,13 @@ export default class GTiffFormat extends FileFormat { crs = Utils.crsToString(dc.getCrs()); } - const format = parameters.zipped ? 'ZIPPED_GEO_TIFF' : 'GEO_TIFF'; - const data = dc.getData(); - if (data instanceof ee.ImageCollection) { - // todo: implement - } - else if (data instanceof ee.Image) { + if (data instanceof ee.Image) { const eeOpts = { scale: parameters.scale || 100, region, crs, - format + format: 'GEO_TIFF' }; return await new Promise((resolve, reject) => { data.getDownloadURL(eeOpts, (url, err) => { @@ -68,12 +60,25 @@ export default class GTiffFormat extends FileFormat { reject('Download URL provided by Google Earth Engine is empty.'); } else { - resolve(url); + resolve(HttpUtils.stream(url)); } }); }); } + else { + throw new Error('Only single images are supported in this processing mode for GeoTIFF.'); + } + } + + canExport() { + return true; + } + async export(ee, dc, job) { + return await GeeResults.exportToDrive(ee, dc, job, 'GeoTIFF', { + cloudOptimized: true, + // noData: NaN + }); } } diff --git a/src/models/jobstore.js b/src/models/jobstore.js index 074fa16..90a21ec 100644 --- a/src/models/jobstore.js +++ b/src/models/jobstore.js @@ -4,11 +4,22 @@ import fse from 'fs-extra'; import path from 'path'; import Errors from '../utils/errors.js'; import Logs from './logs.js'; +import Utils from '../utils/utils.js'; + +const TASK_STATUS_MAP = { + PENDING: 'queued', + RUNNING : 'running', + CANCELLING: 'cancelled', + SUCCEEDED: 'finished', + CANCELLED: 'canceled', + FAILED: 'error' +}; export default class JobStore { constructor() { this.db = DB.load('jobs'); + this.taskDb = DB.load('tasks'); this.editableFields = ['title', 'description', 'process', 'plan', 'budget']; this.jobFolder = './storage/job_files'; this.logFileName = 'logs.db'; @@ -72,6 +83,182 @@ export default class JobStore { } } + async addTasks(job, tasks) { + const rows = []; + let logger = null; + for (const task of tasks) { + let status = 'queued'; + if (task.error) { + status = 'error'; + if (logger === null) { + logger = await this.getLogsById(job._id, job.log_level); + } + logger.error(task.error, {taskId: task.taskId}); + } + rows.push({ + task_id: task.taskId, + image_id: task.imageId, + job_id: job._id, + user_id: job.user_id, + google_user_id: Utils.isGoogleUser(job.user_id) ? job.user_id : '', + done: false, + script: null, + status, + stageOffset: 0, + progress: 0, + updated: Utils.toISODate(Date.now()), + results: [], + usage: null + }); + } + await this.taskDb.insertAsync(rows); + } + + async getTaskCount(google_user_id) { + return await this.taskDb.countAsync({google_user_id}); + } + + async updateTasks(ops) { + const documents = {}; + const newLogs = {}; + for (const op of ops) { + try { + const taskId = op.name.split('/').pop(); + const query = { task_id: taskId }; + const task = await this.taskDb.findOneAsync(query); + if (!task || (task.done && op.done)) { + // (1) Task not found, probably started in GEE directly, OR + // (2) Task is done and we synced it already, no need to update + continue; + } + + const jobId = task.job_id; + documents[jobId] = documents[jobId] || []; + newLogs[jobId] = newLogs[jobId] || []; + + if (task.attempt > 1 && task.attempt !== op.metadata.attempt) { + // New attempt, reset stage offset + task.stageOffset = 0; + newLogs[jobId].push({ + level: 'warning', + message: 'Started new attempt, previous attempt was aborted.' + }); + } + + if (Array.isArray(op.metadata.stages)) { + const stages = op.metadata.stages.slice(task.stageOffset); + for (const stage of stages) { + task.stageOffset++; + newLogs[jobId].push({ + level: 'info', + message: stage.description + }); + } + } + if (op.done && op.error) { + newLogs[jobId].push({ + level: 'error', + message: op.error.message, + code: String(op.error.message.code), + data: op.error.details + }); + } + + let progress = ops.done ? 100 : 0; + if (op.metadata.progress > 0) { + progress = op.metadata.progress * 100; + } + + if (!task.script && op.metadata.scriptUri) { + newLogs[jobId].push({ + level: 'info', + message: 'Google Earth Engine script can be found at: ' + op.metadata.scriptUri + }); + } + + const update = { + $set: { + status: TASK_STATUS_MAP[op.metadata.state], + progress, + attempt: op.metadata.attempt || 0, + done: op.done || false, + updated: op.metadata.updateTime, + script: op.metadata.scriptUri || null, + results: op.metadata.destinationUris || [], + usage: op.metadata.batchEecuUsageSeconds || 0, + stageOffset: task.stageOffset + } + }; + + const { affectedDocuments } = await this.taskDb.updateAsync(query, update, { returnUpdatedDocs: true }); + if (affectedDocuments) { + documents[task.job_id].push(affectedDocuments); + } + } catch (e) { + console.error(e); + } + + for(const jobId in documents) { + const tasks = documents[jobId]; + const job = await this.getById(jobId, tasks[0].user_id); + + const jobUpdates = { + updated: Utils.toISODate(Date.now()) + }; + if (tasks.some(t => t.status === 'running')) { + jobUpdates.status = 'running'; + } + else if (tasks.every(t => t.status === 'finished')) { + jobUpdates.status = 'finished'; + } + else if (tasks.some(t => t.status === 'error')) { + jobUpdates.status = 'error'; + } + else if (tasks.some(t => t.status === 'canceled')) { + jobUpdates.status = 'canceled'; + } + else { + jobUpdates.status = 'queued'; + } + + jobUpdates.progress = tasks.reduce((acc, t) => acc + t.progress, 0) / tasks.length; + + jobUpdates.usage = { + cpu: { + value: tasks.reduce((acc, t) => acc + t.batchEecuUsageSeconds, 0), + unit: 'eecu-seconds' + } + }; + + jobUpdates.googleDriveResults = tasks.reduce((acc, t) => acc.concat(t.results), []); + + await this.db.updateAsync({_id: jobId}, { $set: jobUpdates }); + + if (newLogs[jobId].length > 0) { + const logs = await this.getLogsById(jobId, job.log_level); + for (const log of newLogs[jobId]) { + logs.add(log.message, log.level, log.data, logs.trace, log.code); + } + } + } + } + } + + async removeTasks(taskIds) { + if (!Array.isArray(taskIds)) { + taskIds = [taskIds]; + } + + const query = { task_id: { $in: taskIds } }; + const { numRemoved } = await this.taskDb.removeAsync(query); + return numRemoved; + } + + async removeTasksForUser(userId) { + const { numRemoved } = await this.taskDb.removeAsync({ user_id: userId }); + return numRemoved; + } + async removeResults(jobId, removeLogs = true) { const p = this.makeFolder(this.jobFolder, [jobId]); if (!p) { diff --git a/src/models/userstore.js b/src/models/userstore.js index 5cfc0cd..146fc12 100644 --- a/src/models/userstore.js +++ b/src/models/userstore.js @@ -4,6 +4,7 @@ import Errors from '../utils/errors.js'; import crypto from "crypto"; import HttpUtils from '../utils/http.js'; import fse from 'fs-extra'; +import GDrive from '../utils/gdrive.js'; export default class UserStore { @@ -21,9 +22,7 @@ export default class UserStore { "openid", "email", "https://www.googleapis.com/auth/earthengine", - // "https://www.googleapis.com/auth/cloud-platform", - // "https://www.googleapis.com/auth/devstorage.full_control" - ]; + ].concat(GDrive.SCOPES); } database() { @@ -157,6 +156,10 @@ export default class UserStore { const jobCount = await jobDb.removeAsync({user_id}, {multi: true}); console.log(`- Removed ${jobCount} batch jobs from database`); + // note: doesn't cancel tasks, just removes them from the database + const taskCount = jobModel.removeTasksForUser(user_id); + console.log(`- Removed ${taskCount} GEE tasks from database`); + await Promise.all(jobs.map(job => jobModel.removeResults(job._id))); console.log('- Removed batch job results and logs from file system'); } catch (err) { @@ -207,7 +210,7 @@ export default class UserStore { async authenticateGoogle(token) { const userInfo = await this.getOidcUserInfo(token); const userData = this.emptyUser(false); - userData._id = "google_" + userInfo.sub; + userData._id = Utils.GOOGLE_USER_PREFIX + userInfo.sub; userData.name = userInfo.name || userInfo.email || null; userData.email = userInfo.email || null; userData.token = token; diff --git a/src/processes/utils/results.js b/src/processes/utils/results.js index 4f6f44e..7b2c8c4 100644 --- a/src/processes/utils/results.js +++ b/src/processes/utils/results.js @@ -1,8 +1,14 @@ +import GDrive from '../../utils/gdrive.js'; +import Utils from '../../utils/utils.js'; +import GeeProcessing from './processing.js'; import GeeTypes from './types.js'; -import HttpUtils from '../../utils/http.js'; const GeeResults = { + BATCH: 1, + SYNC: 2, + SERVICE: 3, + toImageOrCollection(ee, logger, data, allowMultiple = false) { const eeData = GeeTypes.toEE(ee, logger, data); if (eeData instanceof ee.Image) { @@ -37,21 +43,67 @@ const GeeResults = { return ext || ''; }, - // Returns AxiosResponse (object) or URL (string) - async retrieve(context, dc, logger) { - const ee = context.ee; - const config = context.server(); - - const format = config.getOutputFormat(dc.getOutputFormat()); - dc = format.preprocess(context, dc, logger); + async exportToDrive(ee, dc, job, fileFormat, formatOptions) { + const parameters = dc.getOutputFormatParameters(); - let response = await format.retrieve(ee, dc); - if (typeof response === 'string') { - logger.debug("Downloading data from Google: " + response); - response = await HttpUtils.stream(response); + let region = null; + let crs = null; + if (dc.hasXY()) { + region = Utils.bboxToGeoJson(dc.getSpatialExtent()); + crs = Utils.crsToString(dc.getCrs()); } - return response; + const data = ee.ImageCollection(dc.getData()); + const imageList = data.toList(data.size()); + const imgCount = await GeeProcessing.evaluate(imageList.size()); + const tasks = []; + for (let i = 0; i < imgCount; i++) { + let taskId = null; + let error = null; + let imageId; + try { + const image = ee.Image(imageList.get(i)); + imageId = await GeeProcessing.evaluate(image.id()); + + let crsTransform, scale; + if (parameters.scale > 0) { + scale = parameters.scale; + } + else { + const projection = await GeeProcessing.evaluate(image.projection()); + crsTransform = projection.transform; + } + + const task = ee.batch.Export.image.toDrive({ + image, + description: job.title, + folder: GDrive.getFolderName(job), + fileNamePrefix: imageId, + skipEmptyTiles: true, + crs, + crsTransform, + region, + scale, + fileFormat, + formatOptions + }); + taskId = await new Promise((resolve, reject) => { + task.start( + () => resolve(task.id), + (message) => reject(new Error(message)) + ) + }); + } catch (e) { + error = e.message; + } finally { + tasks.push({ + taskId, + imageId, + error + }); + } + } + return tasks; } }; diff --git a/src/processgraph/jsonschema.js b/src/processgraph/jsonschema.js index abff0fe..e19f702 100644 --- a/src/processgraph/jsonschema.js +++ b/src/processgraph/jsonschema.js @@ -18,7 +18,7 @@ export default class GeeJsonSchemaValidator extends JsonSchemaValidator { } async validateJobId(data) { - const job = await this.context.getJob(data); + const job = await this.context.server().jobs().getById(data); if (job === null) { throw new ajv.ValidationError([{ message: "Job doesn't exist." @@ -46,7 +46,7 @@ export default class GeeJsonSchemaValidator extends JsonSchemaValidator { } async validateProcessGraphId(data) { - const pg = await this.context.getStoredProcessGraph(data); + const pg = await this.context.server().storedProcessGraphs().getById(data); if (pg === null) { throw new ajv.ValidationError([{ message: "Stored process graph doesn't exist." diff --git a/src/processgraph/processgraph.js b/src/processgraph/processgraph.js index 83a4789..98d5c6a 100644 --- a/src/processgraph/processgraph.js +++ b/src/processgraph/processgraph.js @@ -63,7 +63,7 @@ export default class GeeProcessGraph extends ProcessGraph { async execute(args = null) { if (this.parentNode === null) { - await this.context.connectGee(); + await this.context.connect(); } return await super.execute(args); } diff --git a/src/utils/config.js b/src/utils/config.js index 2f1c4c2..746ceaa 100644 --- a/src/utils/config.js +++ b/src/utils/config.js @@ -29,6 +29,8 @@ export default class Config { certificate: null }; + // We need access to GEE + Drive + this.apiKey = null; this.serviceAccountCredentialsFile = null; this.googleAuthClients = []; diff --git a/src/utils/gdrive.js b/src/utils/gdrive.js new file mode 100644 index 0000000..788fc3a --- /dev/null +++ b/src/utils/gdrive.js @@ -0,0 +1,96 @@ +import drive from '@googleapis/drive'; +import { JWT } from 'googleapis-common'; +import Utils from './utils.js'; + +export default class GDrive { + + static SCOPES = [ + //"https://www.googleapis.com/auth/drive.file", + "https://www.googleapis.com/auth/drive" + ]; + + static getFolderName(job) { + return `gee-${job._id}`; + } + + constructor(context, user) { + this.drive = null; + this.context = context; + this.user = user; + } + + async connect() { + if (this.drive) { + return; + } + + let authType; + const options = { + version: 'v3' + }; + if (Utils.isGoogleUser(this.user._id)) { + authType = "user token"; + options.access_token = this.user.token; + options.auth = this.context.apiKey; + } + else if (this.context.eePrivateKey) { + authType = "private key"; + const client = this.context.eePrivateKey; + options.auth = new JWT(client.client_email, null, client.private_key, GDrive.SCOPES); + } + else { + throw new Error("No authentication method available, must have at least a private key configured."); + } + + this.drive = drive.drive(options); + // await this.drive.files.list({pageSize: 1}); + console.log(`Authenticated at Google Drive via ${authType}`); + } + + // Get the ID from URL + // https://drive.google.com/#folders/1rqL0rZqBCvNS9ZhgiJmPGN72y9ZfS3Ly + // => 1rqL0rZqBCvNS9ZhgiJmPGN72y9ZfS3Ly is the ID + getIdFromUrl(url) { + const parsed = new URL(url); + return parsed.hash.split('/').pop(); + } + + async publishFoldersByName(name) { + const res = await this.drive.files.list({ + q: `mimeType = 'application/vnd.google-apps.folder' and name = '${name}'`, + }); + const folders = res.data.files; + if (folders.length === 0) { + throw new Error(`Folder not found: ${name}`); + } + else { + console.log(folders); + const promises = folders.map(folder => this.publishFolder(folder.id)); + return Promise.all(promises); + } + } + + async publishFolder(id) { + if (Utils.isUrl(id)) { + id = this.getIdFromUrl(id); + } + return await this.drive.permissions.create({ + resource: { + 'type': 'anyone', + 'role': 'reader' + }, + fileId: id, + // fields: 'id', + }); + } + + async getAssetsForFolder(name) { + const res = await this.drive.files.list({ + pageSize: 1000, + q: `'${name}' in parents`, + }); + const files = res.data.files; + console.log(files); + return files; + } +} diff --git a/src/utils/processingcontext.js b/src/utils/processingcontext.js index c01e4d9..82131e4 100644 --- a/src/utils/processingcontext.js +++ b/src/utils/processingcontext.js @@ -1,36 +1,37 @@ import Utils from './utils.js'; -import fse from 'fs-extra'; export default class ProcessingContext { - constructor(serverContext, user = null) { + constructor(serverContext, user = null, parentResource = null) { this.serverContext = serverContext; this.user = user; this.userId = user ? user._id : null; + this.googleUserId = ''; + this.parentResource = parentResource; this.ee = Utils.require('@google/earthengine'); - this.eePrivateKey = null; + this.taskMonitorId = null; + this.connected = false; } - async connectGee(forcePrivateKey = false) { + async connect(forcePrivateKey = false) { + if (this.connected) { + return this.ee; + } + const user = this.getUser(); const ee = this.ee; - if (!forcePrivateKey && typeof this.userId === 'string' && this.userId.startsWith("google-")) { + if (!forcePrivateKey && Utils.isGoogleUser(this.userId)) { console.log("Authenticate via user token"); const expires = 59 * 60; // todo auth: get expiration from token and set more parameters #82 ee.apiclient.setAuthToken(null, 'Bearer', user.token, expires, [], null, false, false); + this.googleUserId = this.userId; } - else if (this.serverContext.serviceAccountCredentialsFile) { + else if (this.serverContext.eePrivateKey) { console.log("Authenticate via private key"); - if (!this.eePrivateKey) { - this.eePrivateKey = await fse.readJSON(this.serverContext.serviceAccountCredentialsFile); - } - if (!Utils.isObject(this.eePrivateKey)) { - console.error("ERROR: GEE private key not found."); - } await new Promise((resolve, reject) => { ee.data.authenticateViaPrivateKey( - this.eePrivateKey, + this.serverContext.eePrivateKey, () => resolve(), error => reject("ERROR: GEE Authentication failed: " + error.message) ); @@ -41,6 +42,7 @@ export default class ProcessingContext { } await ee.initialize(); + this.connected = true; return ee; } @@ -52,12 +54,8 @@ export default class ProcessingContext { return this.serverContext.collections().getData(id); } - getStoredProcessGraph(id) { // returns promise - return this.serverContext.storedProcessGraphs().getById(id); - } - - getJob(jobId) { // returns promise - return this.serverContext.jobs().getById(jobId); + getResource() { + return this.parentResource; } getVariable(id) { @@ -76,4 +74,32 @@ export default class ProcessingContext { return this.user; } + getGoogleUserId() { + return Utils.isGoogleUser(this.userId) ? this.userId : ''; + } + + startTaskMonitor() { + const googleUserId = this.getGoogleUserId(); + this.serverContext.removeTaskMonitor(googleUserId); + this.taskMonitorId = setTimeout(this.monitorTasks.bind(this), 60 * 1000); + this.serverContext.addTaskMonitor(googleUserId, this.taskMonitorId); + this.monitorTasks(); + } + + async monitorTasks() { + const jobModel = this.serverContext.jobs(); + const taskCount = await this.serverContext.jobs().getTaskCount(this.getGoogleUserId()); + if (taskCount === 0) { + // Nothing to monitor + return; + } + try { + await this.connect(); + this.ee.data.listOperations(null, ops => jobModel.updateTasks(ops)); + } catch (e) { + this.serverContext.removeTaskMonitor(this.googleUserId); + console.error(e); + } + } + } diff --git a/src/utils/servercontext.js b/src/utils/servercontext.js index 22bbce7..a1912c4 100644 --- a/src/utils/servercontext.js +++ b/src/utils/servercontext.js @@ -10,6 +10,8 @@ import JobStore from '../models/jobstore.js'; import UserStore from '../models/userstore.js'; import ServiceStore from '../models/servicestore.js'; +import fse from 'fs-extra'; + export default class ServerContext extends Config { constructor() { @@ -21,6 +23,23 @@ export default class ServerContext extends Config { this.jobStore = new JobStore(); this.userStore = new UserStore(this); this.serviceStore = new ServiceStore(); + this.taskMonitors = {}; + this.eePrivateKey = null; + this.loadPrivateKey(); + } + + loadPrivateKey() { + if (!this.serviceAccountCredentialsFile || this.eePrivateKey) { + return; + } + try { + this.eePrivateKey = fse.readJSONSync(this.serviceAccountCredentialsFile); + if (!Utils.isObject(this.eePrivateKey)) { + console.error("ERROR: GEE private key invalid."); + } + } catch (error) { + console.error("ERROR: GEE private key not provided. " + error.message); + } } jobs() { @@ -74,8 +93,19 @@ export default class ServerContext extends Config { return (typeof service_type === 'string' && Utils.isObject(this.services[service_type.toLowerCase()])); } - processingContext(user) { - return new ProcessingContext(this, user); + processingContext(user, parentResource = null) { + return new ProcessingContext(this, user, parentResource); + } + + addTaskMonitor(userId, monitorId) { + this.taskMonitors[userId] = monitorId; + } + + removeTaskMonitor(userId) { + if (userId in this.taskMonitors) { + clearTimeout(this.taskMonitors[userId]); + delete this.taskMonitors[userId]; + } } } diff --git a/src/utils/utils.js b/src/utils/utils.js index 0550d0f..586ae7e 100644 --- a/src/utils/utils.js +++ b/src/utils/utils.js @@ -9,6 +9,16 @@ const Utils = { crsBboxes: {}, + GOOGLE_USER_PREFIX: "google_", + + isGoogleUser(userId) { + return typeof userId === 'string' && userId.startsWith(Utils.GOOGLE_USER_PREFIX); + }, + + isUrl(url) { + return typeof url === 'string' && url.includes('://'); + }, + require(file) { const require = createRequire(import.meta.url); return require(file);