From dbcac54a600262aab96378e86a590aa0a7c739fe Mon Sep 17 00:00:00 2001
From: Bryan <bryan.duran@jpl.nasa.gov>
Date: Tue, 22 Oct 2024 18:24:00 -0700
Subject: [PATCH] Feature: Upload external datasets (#112)

* add endpoint for uploading external datasets

* chunk segments into payload sizes less than Jetty size limit

* cleanup after dataset creation failure

* fix codeql errors (squashable)

* return external dataset upload errors to client
---
 package-lock.json                             |  11 +
 package.json                                  |   1 +
 src/env.ts                                    |   2 +-
 src/main.ts                                   |   2 +-
 src/packages/auth/adapters/CAMAuthAdapter.ts  |   2 +-
 src/packages/auth/adapters/NoAuthAdapter.ts   |   2 +-
 src/packages/auth/functions.ts                |   2 +-
 src/packages/auth/routes.ts                   |   2 +-
 src/packages/plan/gql.ts                      |  29 ++
 src/packages/plan/plan.ts                     | 377 ++++++++++++++++--
 src/{packages/auth/types.ts => types/auth.ts} |   0
 src/types/dataset.ts                          |  22 +
 src/types/hasura.ts                           |   5 +
 src/{packages/plan/types.ts => types/plan.ts} |   2 +-
 src/types/time.ts                             |  20 +
 src/util/fileParser.ts                        |  28 ++
 src/util/time.test.ts                         |  55 +++
 src/util/time.ts                              | 130 ++++++
 18 files changed, 657 insertions(+), 35 deletions(-)
 rename src/{packages/auth/types.ts => types/auth.ts} (100%)
 create mode 100644 src/types/dataset.ts
 create mode 100644 src/types/hasura.ts
 rename src/{packages/plan/types.ts => types/plan.ts} (98%)
 create mode 100644 src/types/time.ts
 create mode 100644 src/util/fileParser.ts
 create mode 100644 src/util/time.test.ts
 create mode 100644 src/util/time.ts

diff --git a/package-lock.json b/package-lock.json
index ad7e4d3..1bfb677 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -13,6 +13,7 @@
         "altair-express-middleware": "^5.2.11",
         "cookie-parser": "^1.4.6",
         "cors": "^2.8.5",
+        "csv-parse": "^5.5.6",
         "express": "^4.18.2",
         "express-rate-limit": "^6.7.0",
         "helmet": "^7.0.0",
@@ -1873,6 +1874,11 @@
         "node": ">= 8"
       }
     },
+    "node_modules/csv-parse": {
+      "version": "5.5.6",
+      "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.5.6.tgz",
+      "integrity": "sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A=="
+    },
     "node_modules/data-uri-to-buffer": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
@@ -6355,6 +6361,11 @@
         "which": "^2.0.1"
       }
     },
+    "csv-parse": {
+      "version": "5.5.6",
+      "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.5.6.tgz",
+      "integrity": "sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A=="
+    },
     "data-uri-to-buffer": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
diff --git a/package.json b/package.json
index fad3011..4ff2c9a 100644
--- a/package.json
+++ b/package.json
@@ -30,6 +30,7 @@
     "altair-express-middleware": "^5.2.11",
     "cookie-parser": "^1.4.6",
     "cors": "^2.8.5",
+    "csv-parse": "^5.5.6",
     "express": "^4.18.2",
     "express-rate-limit": "^6.7.0",
     "helmet": "^7.0.0",
diff --git a/src/env.ts b/src/env.ts
index b3663f7..0c43edb 100644
--- a/src/env.ts
+++ b/src/env.ts
@@ -1,5 +1,5 @@
 import type { Algorithm } from 'jsonwebtoken';
-import { GroupRoleMapping } from './packages/auth/types';
+import { GroupRoleMapping } from './types/auth';
 
 export type Env = {
   ALLOWED_ROLES: string[];
diff --git a/src/main.ts b/src/main.ts
index c7a64be..fd27de5 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -12,7 +12,7 @@ import initHealthRoutes from './packages/health/health.js';
 import initPlanRoutes from './packages/plan/plan.js';
 import initSwaggerRoutes from './packages/swagger/swagger.js';
 import cookieParser from 'cookie-parser';
-import { AuthAdapter } from './packages/auth/types.js';
+import { AuthAdapter } from './types/auth.js';
 import { NoAuthAdapter } from './packages/auth/adapters/NoAuthAdapter.js';
 import { CAMAuthAdapter } from './packages/auth/adapters/CAMAuthAdapter.js';
 import { validateGroupRoleMappings } from './packages/auth/functions.js';
diff --git a/src/packages/auth/adapters/CAMAuthAdapter.ts b/src/packages/auth/adapters/CAMAuthAdapter.ts
index ccccd3c..daedf5e 100644
--- a/src/packages/auth/adapters/CAMAuthAdapter.ts
+++ b/src/packages/auth/adapters/CAMAuthAdapter.ts
@@ -1,7 +1,7 @@
 import { getEnv } from '../../../env.js';
 import { authGroupMappingsExist, generateJwt, getUserRoles, mapGroupsToRoles, syncRolesToDB } from '../functions.js';
 import fetch from 'node-fetch';
-import type { AuthAdapter, AuthResponse, ValidateResponse } from '../types.js';
+import type { AuthAdapter, AuthResponse, ValidateResponse } from '../../../types/auth.js';
 
 import { Request } from 'express';
 
diff --git a/src/packages/auth/adapters/NoAuthAdapter.ts b/src/packages/auth/adapters/NoAuthAdapter.ts
index 2d91b4a..da8882e 100644
--- a/src/packages/auth/adapters/NoAuthAdapter.ts
+++ b/src/packages/auth/adapters/NoAuthAdapter.ts
@@ -1,4 +1,4 @@
-import type { AuthAdapter, ValidateResponse } from '../types.js';
+import type { AuthAdapter, ValidateResponse } from '../../../types/auth.js';
 
 export const NoAuthAdapter: AuthAdapter = {
   logout: async (): Promise<boolean> => true,
diff --git a/src/packages/auth/functions.ts b/src/packages/auth/functions.ts
index 8f616f0..049f15d 100644
--- a/src/packages/auth/functions.ts
+++ b/src/packages/auth/functions.ts
@@ -12,7 +12,7 @@ import type {
   JwtSecret,
   SessionResponse,
   UserRoles,
-} from './types.js';
+} from '../../types/auth.js';
 import { loginSSO } from './adapters/CAMAuthAdapter.js';
 
 const logger = getLogger('packages/auth/functions');
diff --git a/src/packages/auth/routes.ts b/src/packages/auth/routes.ts
index 7d2a7fd..ee6cae4 100644
--- a/src/packages/auth/routes.ts
+++ b/src/packages/auth/routes.ts
@@ -2,7 +2,7 @@ import type { Express } from 'express';
 import rateLimit from 'express-rate-limit';
 import { getEnv } from '../../env.js';
 import { login, session } from './functions.js';
-import { AuthAdapter } from './types.js';
+import { AuthAdapter } from '../../types/auth.js';
 
 export default (app: Express, auth: AuthAdapter) => {
   const { RATE_LIMITER_LOGIN_MAX } = getEnv();
diff --git a/src/packages/plan/gql.ts b/src/packages/plan/gql.ts
index 1f49ac0..a21e03e 100644
--- a/src/packages/plan/gql.ts
+++ b/src/packages/plan/gql.ts
@@ -1,4 +1,19 @@
 export default {
+  ADD_EXTERNAL_DATASET: `#graphql
+    mutation AddExternalDataset(
+      $planId: Int!,
+      $simulationDatasetId: Int,
+      $datasetStart: String!,
+      $profileSet: ProfileSet!) {
+        addExternalDataset(
+          planId: $planId,
+          simulationDatasetId: $simulationDatasetId,
+          datasetStart: $datasetStart,
+          profileSet: $profileSet) {
+          datasetId
+        }
+    }
+  `,
   CREATE_ACTIVITY_DIRECTIVES: `#graphql
     mutation CreateActivityDirectives($activityDirectivesInsertInput: [activity_directive_insert_input!]!) {
       insert_activity_directive(objects: $activityDirectivesInsertInput) {
@@ -50,6 +65,13 @@ export default {
       }
     }
   `,
+  DELETE_EXTERNAL_DATASET: `#graphql
+    mutation DeleteExternalDataset($id: Int!) {
+      delete_dataset_by_pk(id: $id) {
+        id
+      }
+    }
+  `,
   DELETE_PLAN: `#graphql
     mutation DeletePlan($id: Int!) {
       deletePlan: delete_plan_by_pk(id: $id) {
@@ -68,6 +90,13 @@ export default {
       }
     }
   `,
+  EXTEND_EXTERNAL_DATASET: `#graphql
+    mutation ExtendExternalDataset($datasetId: Int!, $profileSet: ProfileSet!) {
+      extendExternalDataset(datasetId: $datasetId, profileSet: $profileSet) {
+        datasetId
+      }
+    }
+  `,
   GET_TAGS: `#graphql
     query GetTags {
       tags(order_by: { name: desc })  {
diff --git a/src/packages/plan/plan.ts b/src/packages/plan/plan.ts
index 5e31dd2..1d4b2b1 100644
--- a/src/packages/plan/plan.ts
+++ b/src/packages/plan/plan.ts
@@ -1,10 +1,14 @@
 import type { Express, Request, Response } from 'express';
 import rateLimit from 'express-rate-limit';
 import multer from 'multer';
-import { JSONParser } from '@streamparser/json';
+import { parse } from 'csv-parse';
 import fetch from 'node-fetch';
+import { Readable } from 'stream';
 
 import { auth } from '../auth/middleware.js';
+import { parseJSONFile } from '../../util/fileParser.js';
+import { convertDateToDoy, getTimeDifference } from '../../util/time.js';
+import { HasuraError } from '../../types/hasura.js';
 import type {
   ActivityDirective,
   ActivityDirectiveInsertInput,
@@ -14,7 +18,14 @@ import type {
   PlanTagsInsertInput,
   PlanTransfer,
   Tag,
-} from './types.js';
+} from '../../types/plan.js';
+import {
+  ProfileSegment,
+  ProfileSet,
+  ProfileSets,
+  UploadPlanDatasetJSON,
+  UploadPlanDatasetPayload,
+} from '../../types/dataset.js';
 import gql from './gql.js';
 import getLogger from '../../logger.js';
 import { getEnv } from '../../env.js';
@@ -25,6 +36,9 @@ const { RATE_LIMITER_LOGIN_MAX, HASURA_API_URL } = getEnv();
 
 const GQL_API_URL = `${HASURA_API_URL}/v1/graphql`;
 
+// Limit imposed by Jetty server
+const EXTERNAL_DATASET_MAX_SIZE = 1024;
+
 const refreshLimiter = rateLimit({
   legacyHeaders: false,
   max: RATE_LIMITER_LOGIN_MAX,
@@ -32,7 +46,9 @@ const refreshLimiter = rateLimit({
   windowMs: 15 * 60 * 1000, // 15 minutes
 });
 
-export async function importPlan(req: Request, res: Response) {
+const timeColumnKey = 'time_utc';
+
+async function importPlan(req: Request, res: Response) {
   const authorizationHeader = req.get('authorization');
 
   const {
@@ -55,30 +71,7 @@ export async function importPlan(req: Request, res: Response) {
   let createdTags: Tag[] = [];
 
   try {
-    const { activities, simulation_arguments }: PlanTransfer = await new Promise(resolve => {
-      const jsonParser = new JSONParser({ paths: ['$.*'], stringBufferSize: undefined });
-      let finalJSON: any;
-      jsonParser.onToken = ({ value }) => {
-        if (finalJSON === undefined) {
-          if (value === '[') finalJSON = [];
-          else if (value === '{') finalJSON = {};
-        }
-      };
-      jsonParser.onValue = ({ parent }) => {
-        finalJSON = parent;
-      };
-      jsonParser.onEnd = () => {
-        resolve(finalJSON);
-      };
-
-      if (file?.buffer) {
-        try {
-          jsonParser.write(file.buffer);
-        } catch (e) {
-          console.error(e);
-        }
-      }
-    });
+    const { activities, simulation_arguments }: PlanTransfer = await parseJSONFile<PlanTransfer>(file);
 
     // create the new plan first
     logger.info(`POST /importPlan: Creating new plan: ${name}`);
@@ -339,7 +332,293 @@ export async function importPlan(req: Request, res: Response) {
         method: 'POST',
       });
     }
-    res.sendStatus(500);
+    res.status(500);
+    res.send((error as Error).message);
+  }
+}
+
+function profileHasSegments(profileSets: ProfileSets): boolean {
+  const profileKeys = Object.keys(profileSets);
+  for (let i = 0; i < profileKeys.length; i++) {
+    if (profileSets[profileKeys[i]].segments.length) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+function getSegmentByteSize(segment: ProfileSegment): number {
+  return Buffer.byteLength(JSON.stringify(segment));
+}
+
+async function uploadDataset(req: Request, res: Response) {
+  const authorizationHeader = req.get('authorization');
+
+  const {
+    headers: { 'x-hasura-role': roleHeader, 'x-hasura-user-id': userHeader },
+  } = req;
+
+  const { body, file } = req;
+  const { plan_id: planIdString, simulation_dataset_id: simulationDatasetIdString } = body as UploadPlanDatasetPayload;
+
+  const headers: HeadersInit = {
+    Authorization: authorizationHeader ?? '',
+    'Content-Type': 'application/json',
+    'x-hasura-role': roleHeader ? `${roleHeader}` : '',
+    'x-hasura-user-id': userHeader ? `${userHeader}` : '',
+  };
+
+  let createdDatasetId: number | undefined;
+
+  try {
+    const planId: number = parseInt(planIdString);
+    const simulationDatasetId: number | undefined =
+      simulationDatasetIdString != null ? parseInt(simulationDatasetIdString) : undefined;
+    const matches = file?.originalname?.match(/\.(?<extension>\w+)$/);
+
+    if (file && matches != null) {
+      const { groups: { extension = '' } = {} } = matches;
+
+      logger.info(`POST /uploadDataset: Uploading plan dataset`);
+
+      let uploadedPlanDataset: UploadPlanDatasetJSON;
+      switch (extension) {
+        case 'json':
+          uploadedPlanDataset = await parseJSONFile<UploadPlanDatasetJSON>(file);
+          break;
+        case 'csv':
+        case 'txt': {
+          const parsedCSV: string[][] = [];
+          await new Promise((resolve, reject) => {
+            const parser = parse({
+              delimiter: ',',
+            });
+
+            parser.on('readable', () => {
+              let record;
+              while ((record = parser.read()) !== null) {
+                parsedCSV.push(record);
+              }
+            });
+            parser.on('error', error => {
+              reject(error);
+            });
+            parser.on('end', () => {
+              resolve(parsedCSV);
+            });
+
+            const fileStream = Readable.from(file.buffer);
+            fileStream.pipe(parser);
+          });
+
+          // Keep track of the time column's index separately since the name of the column is static
+          let timeColumnIndex = -1;
+
+          // Create a lookup for the profile name's index in each CSV row
+          const headerIndexMap: Record<string, number> = parsedCSV[0].reduce(
+            (prevHeaderIndexMap: Record<string, number>, header: string, headerIndex: number) => {
+              if (new RegExp(timeColumnKey).test(header)) {
+                timeColumnIndex = headerIndex;
+
+                return prevHeaderIndexMap;
+              } else {
+                return {
+                  ...prevHeaderIndexMap,
+                  [header]: headerIndex,
+                };
+              }
+            },
+            {},
+          );
+
+          if (timeColumnIndex === -1) {
+            throw new Error(`CSV file does not contain a "${timeColumnKey}" column.`);
+          }
+
+          const parsedSegments: string[][] = parsedCSV.slice(1);
+
+          // Use the first entry's time value in the CSV as the dataset start time
+          const startTime = convertDateToDoy(parsedSegments[0][timeColumnIndex]);
+          const parsedProfiles: ProfileSets = Object.keys(headerIndexMap).reduce(
+            (previousProfileSet: ProfileSets, header) => {
+              return {
+                ...previousProfileSet,
+                [header]: {
+                  // default CSV profile schemas to `real` and type `discrete`
+                  schema: { type: 'real' },
+                  segments: [],
+                  type: 'discrete',
+                },
+              };
+            },
+            {},
+          );
+          uploadedPlanDataset = parsedSegments.reduce(
+            (
+              previousPlanDataset: UploadPlanDatasetJSON,
+              parsedSegment: string[],
+              parsedSegmentIndex,
+              parsedSegmentsArray,
+            ) => {
+              const nextParsedSegment = parsedSegmentsArray[parsedSegmentIndex + 1];
+
+              // Only process entries that have an entry after it.
+              // The last entry is ignored on purpose as it is only used to get the duration of the previous entry
+              if (nextParsedSegment) {
+                const duration = getTimeDifference(parsedSegment[timeColumnIndex], nextParsedSegment[timeColumnIndex]);
+                if (duration) {
+                  const profileSet: ProfileSets = Object.entries(headerIndexMap).reduce(
+                    (previousProfileSet: ProfileSets, [header, index]) => {
+                      const previousSegments = previousProfileSet[header].segments;
+                      const value = parsedSegment[index];
+                      return {
+                        ...previousProfileSet,
+                        [header]: {
+                          ...previousProfileSet[header],
+                          segments: [
+                            ...previousSegments,
+                            { duration, ...(value !== undefined ? { dynamics: parseFloat(value) } : {}) },
+                          ],
+                        },
+                      };
+                    },
+                    previousPlanDataset.profileSet,
+                  );
+
+                  return {
+                    ...previousPlanDataset,
+                    profileSet,
+                  } as UploadPlanDatasetJSON;
+                }
+              }
+              return previousPlanDataset;
+            },
+            { datasetStart: startTime, profileSet: parsedProfiles } as UploadPlanDatasetJSON,
+          );
+          break;
+        }
+        default:
+          throw new Error('File extension not supported');
+      }
+
+      const { datasetStart, profileSet } = uploadedPlanDataset;
+
+      const profileNames = Object.keys(profileSet);
+
+      // Insert an initial set of profiles that have empty segments
+      const initialProfileSet: ProfileSets = profileNames.reduce(
+        (currentProfileSet: ProfileSets, profileName: string) => {
+          return {
+            ...currentProfileSet,
+            [profileName]: {
+              ...profileSet[profileName],
+              segments: [],
+            },
+          };
+        },
+        {},
+      );
+
+      const response = await fetch(GQL_API_URL, {
+        body: JSON.stringify({
+          query: gql.ADD_EXTERNAL_DATASET,
+          variables: { datasetStart, planId, profileSet: initialProfileSet, simulationDatasetId },
+        }),
+        headers,
+        method: 'POST',
+      });
+
+      type AddExternalDatasetResponse = { data: { addExternalDataset: { datasetId: number } | null } };
+      const jsonResponse = await response.json();
+      const addExternalDatasetResponse = jsonResponse as AddExternalDatasetResponse | HasuraError;
+
+      // If the initial insert was successful, follow-up with multiple inserts to add the segments to each profile
+      if ((addExternalDatasetResponse as AddExternalDatasetResponse).data?.addExternalDataset != null) {
+        logger.info(`POST /uploadDataset: Uploaded initial plan dataset`);
+
+        createdDatasetId = (addExternalDatasetResponse as AddExternalDatasetResponse).data.addExternalDataset
+          ?.datasetId;
+
+        // Repeat as long as the is at least one profile with a segment left
+        while (profileHasSegments(profileSet)) {
+          // Initialize profile payload
+          let currentProfileSet: ProfileSets = initialProfileSet;
+
+          // Get the initial profile payload byte size
+          let currentProfileSize: number = Buffer.byteLength(JSON.stringify(currentProfileSet));
+
+          let isMaxSizeReached: boolean = false;
+
+          // Repeat until the maximum payload size is reached or there are no more segments left within the profile to send
+          while (profileHasSegments(profileSet) && !isMaxSizeReached) {
+            for (let i = 0; i < profileNames.length; i++) {
+              const profileName = profileNames[i];
+              const profileSegments = profileSet[profileName].segments;
+              const nextProfileSegment = profileSegments[0];
+              const nextProfileSegmentSize = nextProfileSegment ? getSegmentByteSize(nextProfileSegment) : 0;
+
+              if (nextProfileSegment !== undefined) {
+                // Check to see if including the next segment will be under the maximum payload size
+                if (currentProfileSize + nextProfileSegmentSize < EXTERNAL_DATASET_MAX_SIZE) {
+                  // Add the next segment to the current profile set
+                  currentProfileSet = {
+                    ...currentProfileSet,
+                    [profileName]: {
+                      ...currentProfileSet[profileName],
+                      segments: [...currentProfileSet[profileName].segments, nextProfileSegment],
+                    } as ProfileSet,
+                  };
+                  // Mutate the array to remove the segment that we just copied
+                  profileSegments.shift();
+
+                  currentProfileSize += nextProfileSegmentSize;
+                } else {
+                  isMaxSizeReached = true;
+                  break;
+                }
+              }
+            }
+          }
+
+          logger.info(`POST /uploadDataset: Uploading extended plan dataset to dataset: ${createdDatasetId}`);
+
+          await fetch(GQL_API_URL, {
+            body: JSON.stringify({
+              query: gql.EXTEND_EXTERNAL_DATASET,
+              variables: { datasetId: createdDatasetId, profileSet: currentProfileSet },
+            }),
+            headers,
+            method: 'POST',
+          });
+          logger.info(`POST /uploadDataset: Uploaded extended plan dataset to dataset: ${createdDatasetId}`);
+        }
+
+        res.json(createdDatasetId);
+      } else if ((addExternalDatasetResponse as HasuraError).errors) {
+        throw new Error(JSON.stringify((addExternalDatasetResponse as HasuraError).errors));
+      } else {
+        throw new Error('Plan dataset upload unsuccessful.');
+      }
+    } else {
+      throw new Error('File extension not supported');
+    }
+  } catch (error) {
+    logger.error(`POST /uploadDataset: Error occurred during plan dataset upload`);
+    logger.error(error);
+
+    // cleanup the plan dataset if it failed along the way
+    if (createdDatasetId !== undefined) {
+      // delete the dataset - profiles associated to the plan will be automatically cleaned up
+      await fetch(GQL_API_URL, {
+        body: JSON.stringify({ query: gql.DELETE_EXTERNAL_DATASET, variables: { id: createdDatasetId } }),
+        headers,
+        method: 'POST',
+      });
+    }
+
+    res.status(500);
+    res.send((error as Error).message);
   }
 }
 
@@ -393,4 +672,46 @@ export default (app: Express) => {
    *       - Hasura
    */
   app.post('/importPlan', upload.single('plan_file'), refreshLimiter, auth, importPlan);
+
+  /**
+   * @swagger
+   * /uploadDataset:
+   *   post:
+   *     security:
+   *       - bearerAuth: []
+   *     consumes:
+   *       - multipart/form-data
+   *     produces:
+   *       - application/json
+   *     parameters:
+   *      - in: header
+   *        name: x-hasura-role
+   *        schema:
+   *          type: string
+   *          required: false
+   *     requestBody:
+   *       content:
+   *         multipart/form-data:
+   *          schema:
+   *            type: object
+   *            properties:
+   *              external_dataset:
+   *                format: binary
+   *                type: string
+   *              plan_id:
+   *                type: long
+   *              simulation_dataset_id:
+   *                type: integer
+   *     responses:
+   *       200:
+   *         description: ImportResponse
+   *       403:
+   *         description: Unauthorized error
+   *       401:
+   *         description: Unauthenticated error
+   *     summary: Upload an external dataset to a plan
+   *     tags:
+   *       - Hasura
+   */
+  app.post('/uploadDataset', upload.single('external_dataset'), refreshLimiter, auth, uploadDataset);
 };
diff --git a/src/packages/auth/types.ts b/src/types/auth.ts
similarity index 100%
rename from src/packages/auth/types.ts
rename to src/types/auth.ts
diff --git a/src/types/dataset.ts b/src/types/dataset.ts
new file mode 100644
index 0000000..df118a9
--- /dev/null
+++ b/src/types/dataset.ts
@@ -0,0 +1,22 @@
+export type ProfileSegment = {
+  duration: number;
+  dynamics?: number | string | boolean | object; // `dynamics` should match `schema`
+};
+
+export type ProfileSet = {
+  type: 'discrete' | 'real';
+  schema: object; // ValueSchema type
+  segments: ProfileSegment[];
+};
+
+export type ProfileSets = Record<string, ProfileSet>;
+
+export type UploadPlanDatasetPayload = {
+  plan_id: string;
+  simulation_dataset_id?: string;
+};
+
+export type UploadPlanDatasetJSON = {
+  datasetStart: string;
+  profileSet: ProfileSets;
+};
diff --git a/src/types/hasura.ts b/src/types/hasura.ts
new file mode 100644
index 0000000..6a56855
--- /dev/null
+++ b/src/types/hasura.ts
@@ -0,0 +1,5 @@
+export type HasuraError = {
+  errors: {
+    message: string;
+  }[];
+};
diff --git a/src/packages/plan/types.ts b/src/types/plan.ts
similarity index 98%
rename from src/packages/plan/types.ts
rename to src/types/plan.ts
index 51a0adb..0075518 100644
--- a/src/packages/plan/types.ts
+++ b/src/types/plan.ts
@@ -1,4 +1,4 @@
-import type { UserId } from '../auth/types';
+import type { UserId } from './auth';
 
 export type PlanSchema = {
   created_at: string;
diff --git a/src/types/time.ts b/src/types/time.ts
new file mode 100644
index 0000000..0f44fd3
--- /dev/null
+++ b/src/types/time.ts
@@ -0,0 +1,20 @@
+export type ParsedDoyString = {
+  doy: number;
+  hour: number;
+  min: number;
+  ms: number;
+  sec: number;
+  time: string;
+  year: number;
+};
+
+export type ParsedYmdString = {
+  day: number;
+  hour: number;
+  min: number;
+  month: number;
+  ms: number;
+  sec: number;
+  time: string;
+  year: number;
+};
diff --git a/src/util/fileParser.ts b/src/util/fileParser.ts
new file mode 100644
index 0000000..0685386
--- /dev/null
+++ b/src/util/fileParser.ts
@@ -0,0 +1,28 @@
+import { JSONParser } from '@streamparser/json';
+
+export function parseJSONFile<T>(file?: Express.Multer.File): Promise<T> {
+  return new Promise(resolve => {
+    const jsonParser = new JSONParser({ paths: ['$.*'], stringBufferSize: undefined });
+    let finalJSON: any;
+    jsonParser.onToken = ({ value }) => {
+      if (finalJSON === undefined) {
+        if (value === '[') finalJSON = [];
+        else if (value === '{') finalJSON = {};
+      }
+    };
+    jsonParser.onValue = ({ parent }) => {
+      finalJSON = parent;
+    };
+    jsonParser.onEnd = () => {
+      resolve(finalJSON as T);
+    };
+
+    if (file?.buffer) {
+      try {
+        jsonParser.write(file.buffer);
+      } catch (e) {
+        console.error(e);
+      }
+    }
+  });
+}
diff --git a/src/util/time.test.ts b/src/util/time.test.ts
new file mode 100644
index 0000000..7418715
--- /dev/null
+++ b/src/util/time.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, test } from 'vitest';
+import { convertDateToDoy, getTimeDifference, parseDoyOrYmdTime } from './time';
+
+describe('Time utility function tests', () => {
+  test('parseDoyOrYmdTime', () => {
+    expect(parseDoyOrYmdTime('2019-365T08:00:00.1234')).toEqual({
+      doy: 365,
+      hour: 8,
+      min: 0,
+      ms: 123.4,
+      sec: 0,
+      time: '08:00:00.1234',
+      year: 2019,
+    });
+
+    expect(parseDoyOrYmdTime('2019-01-20T08:10:03.9')).toEqual({
+      day: 20,
+      hour: 8,
+      min: 10,
+      month: 1,
+      ms: 900,
+      sec: 3,
+      time: '08:10:03.9',
+      year: 2019,
+    });
+
+    expect(parseDoyOrYmdTime('2022-01-2T00:00:00')).toEqual({
+      day: 2,
+      hour: 0,
+      min: 0,
+      month: 1,
+      ms: 0,
+      sec: 0,
+      time: '00:00:00',
+      year: 2022,
+    });
+
+    expect(parseDoyOrYmdTime('2019-365T08:80:00.1234')).toEqual(null);
+    expect(parseDoyOrYmdTime('2022-20-2T00:00:00')).toEqual(null);
+  });
+
+  test('convertDateToDoy', () => {
+    expect(convertDateToDoy('2024-01-01T00:10:00')).toEqual('2024-001T00:10:00');
+    expect(convertDateToDoy('2024-04-09T00:10:00')).toEqual('2024-100T00:10:00');
+    expect(convertDateToDoy('2024-09-27T00:10:00')).toEqual('2024-271T00:10:00');
+  });
+
+  test('getTimeDifference', () => {
+    expect(getTimeDifference('2024-01-01T00:10:00', '2024-01-01T00:11:00', 6)).toEqual(60000000);
+    expect(getTimeDifference('2024-01-01T00:01:00', '2024-01-01T00:11:00', 6)).toEqual(600000000);
+    expect(getTimeDifference('2024-245T00:01:00.0', '2024-245T00:02:00.0', 6)).toEqual(60000000);
+    expect(getTimeDifference('2024-245T00:01:00.0', '2024-245T12:02:00.0', 6)).toEqual(43260000000);
+    expect(getTimeDifference('2024-243T00:01:00.0', '2024-245T12:02:00.0', 6)).toEqual(216060000000);
+  });
+});
diff --git a/src/util/time.ts b/src/util/time.ts
new file mode 100644
index 0000000..197a302
--- /dev/null
+++ b/src/util/time.ts
@@ -0,0 +1,130 @@
+import { ParsedDoyString, ParsedYmdString } from '../types/time';
+
+function parseNumber(number: number | string): number {
+  return parseInt(`${number}`, 10);
+}
+
+/**
+ * padBefore - function to pad leading 0s to a number
+ *
+ * @param {number} number - number to pad
+ * @param {number} numOfZeroes - number of zeroes to pad
+ * @return {string}
+ */
+function padBefore(number: number | string, numOfZeroes: number, shouldTruncate: boolean = true) {
+  return `${[...Array(numOfZeroes).keys()].map(() => '0').join('')}${number}`.slice(
+    -(shouldTruncate ? numOfZeroes : Math.max(numOfZeroes, `${number}`.length)),
+  );
+}
+
+/**
+ * padDoy - function to pad leading 0s for DOY format
+ * Note: This should only be used for Earth based time types, e.g. SCET and ERT
+ *
+ * @param {number | string} dayNumber - the day of year
+ * @return {string}
+ */
+function padDoy(dayNumber: number | string) {
+  return padBefore(parseNumber(dayNumber), 3);
+}
+
+function getDOY(date: Date) {
+  const start = Date.UTC(date.getUTCFullYear(), 0, 0);
+  const diff = date.valueOf() - start.valueOf();
+  const oneDay = 1000 * 60 * 60 * 24;
+  return padDoy(Math.floor(diff / oneDay));
+}
+
+/**
+ * Parses a date string (YYYY-MM-DDTHH:mm:ss) or DOY string (YYYY-DDDDTHH:mm:ss) into its separate components
+ */
+export function parseDoyOrYmdTime(dateString: string, numDecimals = 6): null | ParsedDoyString | ParsedYmdString {
+  const matches = (dateString ?? '').match(
+    new RegExp(
+      `^(?<year>\\d{4})-(?:(?<month>(?:[0]?[0-9])|(?:[1][1-2]))-(?<day>(?:[0-2]?[0-9])|(?:[3][0-1]))|(?<doy>\\d{1,3}))(?:T(?<time>(?<hour>[0-9]|[0-2][0-9])(?::(?<min>[0-9]|(?:[0-5][0-9])))?(?::(?<sec>[0-9]|(?:[0-5][0-9]))(?<dec>\\.\\d{1,${numDecimals}})?)?)?)?$`,
+      'i',
+    ),
+  );
+  if (matches) {
+    const msPerSecond = 1000;
+
+    const { groups: { year, month, day, doy, time = '00:00:00', hour = '0', min = '0', sec = '0', dec = '.0' } = {} } =
+      matches;
+
+    const partialReturn = {
+      hour: parseInt(hour),
+      min: parseInt(min),
+      ms: parseFloat((parseFloat(dec) * msPerSecond).toFixed(numDecimals)),
+      sec: parseInt(sec),
+      time: time,
+      year: parseInt(year),
+    };
+
+    if (doy !== undefined) {
+      return {
+        ...partialReturn,
+        doy: parseInt(doy),
+      };
+    }
+
+    return {
+      ...partialReturn,
+      day: parseInt(day),
+      month: parseInt(month),
+    };
+  }
+
+  return null;
+}
+
+export function convertDateToDoy(dateString: string, numDecimals = 6): string | null {
+  const parsedTime = parseDoyOrYmdTime(dateString, numDecimals);
+
+  if (parsedTime) {
+    if ((parsedTime as ParsedDoyString).doy) {
+      return dateString;
+    }
+
+    const { year, month, day, time } = parsedTime as ParsedYmdString;
+    return `${year}-${getDOY(new Date(Date.UTC(year, month - 1, day, 0, 0, 0, 0)))}T${time}`;
+  }
+
+  return null;
+}
+
+function convertDoyToYmd(doyString: string, numDecimals = 6, includeMsecs = true): string | null {
+  const parsedDoy: ParsedDoyString = parseDoyOrYmdTime(doyString, numDecimals) as ParsedDoyString;
+
+  if (parsedDoy !== null) {
+    if (parsedDoy.doy !== undefined) {
+      const date = new Date(parsedDoy.year, 0, parsedDoy.doy);
+      const ymdString = `${[
+        date.getFullYear(),
+        padBefore(`${date.getUTCMonth() + 1}`, 2),
+        padBefore(`${date.getUTCDate()}`, 2),
+      ].join('-')}T${parsedDoy.time}`;
+      if (includeMsecs) {
+        return `${ymdString}Z`;
+      }
+      return `${ymdString.replace(/(\.\d+)/, '')}Z`;
+    } else {
+      // doyString is already in ymd format
+      return `${doyString}Z`;
+    }
+  }
+
+  return null;
+}
+
+export function getTimeDifference(dateString1: string, dateString2: string, numDecimals = 6): number | null {
+  const dateString = convertDoyToYmd(dateString1, numDecimals, true);
+  const nextDateString = convertDoyToYmd(dateString2, numDecimals, true);
+
+  if (dateString && nextDateString) {
+    const date = new Date(dateString);
+    const nextDate = new Date(nextDateString);
+
+    return Math.abs(date.getTime() * 1000 - nextDate.getTime() * 1000);
+  }
+  return null;
+}