Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate s3key and filename #254

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
18 changes: 12 additions & 6 deletions backend/fixtures/5-model_file.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"product": "model",
"createdAt": "2020-02-20T10:52:59.073Z",
"updatedAt": "2020-02-20T10:52:59.073Z",
"s3key": "20141205_mace-head_ecmwf.nc",
"s3key": "a5d1d5af-3667-41bc-b952-e684f627d91c/20141205_mace-head_ecmwf.nc",
"filename": "20141205_mace-head_ecmwf.nc",
"checksum": "255bcd9deae26851992c3da6352844fb9443203c48ae4f4ad8f1aa50ef2ab26f",
"size": 500452,
"format": "NetCDF3",
Expand All @@ -23,7 +24,8 @@
"measurementDate": "2020-01-26",
"createdAt": "2020-04-08T13:33:31.012Z",
"updatedAt": "2020-04-08T13:33:31.012Z",
"s3key": "20200126_granada_ecmwf.nc",
"s3key": "9e04d8ef-0f2b-4823-835d-33e458403c67/20200126_granada_ecmwf.nc",
"filename": "20200126_granada_ecmwf.nc",
"checksum": "c46e4c7fe4be5b38670b86f3ffc3101c6de8465eabf5c470226c733e69586031",
"size": 501452,
"format": "NetCDF3",
Expand All @@ -42,7 +44,8 @@
"model": "ecmwf",
"createdAt": "2020-02-20T10:52:59.073Z",
"updatedAt": "2020-02-20T10:52:59.073Z",
"s3key": "20141205_mace-head_ecmwf.nc",
"s3key": "b5d1d5af-3667-41bc-b952-e684f627d91c/20141205_mace-head_ecmwf.nc",
"filename": "20141205_mace-head_ecmwf.nc",
"checksum": "055bcd9deae26851992c3da6352844fb9443203c48ae4f4ad8f1aa50ef2ab26f",
"size": 500452,
"format": "NetCDF3",
Expand All @@ -59,7 +62,8 @@
"model": "icon-iglo-12-23",
"createdAt": "2020-02-20T10:52:59.073Z",
"updatedAt": "2020-02-20T10:52:59.073Z",
"s3key": "20141205_mace-head_icon-iglo-12-23.nc",
"s3key": "c5d1d5af-3667-41bc-b952-e684f627d91c/20141205_mace-head_icon-iglo-12-23.nc",
"filename": "20141205_mace-head_icon-iglo-12-23.nc",
"checksum": "155bcd9deae26851992c3da6352844fb9443203c48ae4f4ad8f1aa50ef2ab26f",
"size": 500452,
"format": "NetCDF3",
Expand All @@ -76,7 +80,8 @@
"model": "icon-iglo-12-23",
"createdAt": "2020-02-20T10:56:19.382Z",
"updatedAt": "2020-02-20T10:56:19.382Z",
"s3key": "20200122_bucharest_icon-iglo-12-23.nc",
"s3key": "88092c00-161d-4ca2-a29d-628cf8e960f6/20200122_bucharest_icon-iglo-12-23.nc",
"filename": "20200122_bucharest_icon-iglo-12-23.nc",
"checksum": "898688b011a511f8f0e9353371cf73ee86f60c89b0e02c6931d8c05542c64cdb",
"size": 12200657,
"format": "HDF5 (NetCDF4)",
Expand All @@ -93,7 +98,8 @@
"model": "ecmwf",
"createdAt": "2020-02-20T10:56:19.382Z",
"updatedAt": "2020-02-20T10:56:19.382Z",
"s3key": "20190716_bucharest_ecmwf.nc",
"s3key": "a45a2e9a-e39d-4af2-9798-5ea0fadf041e/20190716_bucharest_ecmwf.nc",
"filename": "20190716_bucharest_ecmwf.nc",
"checksum": "e7712f5a1a01fac7e0fa23d47f1879d5107de0c7e9c4b530d5ec98cf4091ddfd",
"size": 12200657,
"format": "HDF5 (NetCDF4)",
Expand Down
51 changes: 34 additions & 17 deletions backend/fixtures/5-regular_file.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"updatedAt": "2020-02-20T10:56:19.382Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20181115_mace-head_mira.nc",
"s3key": "38092c00-161d-4ca2-a29d-628cf8e960f6/20181115_mace-head_mira.nc",
"filename": "20181115_mace-head_mira.nc",
"checksum": "298688b011a511f8f0e9353371cf73ee86f60c89b0e02c6931d8c05542c64cdb",
"size": 12200657,
"format": "HDF5 (NetCDF4)",
Expand All @@ -31,7 +32,8 @@
"updatedAt": "2020-02-20T10:56:19.382Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20180609_mace-head_classification.nc",
"s3key": "bde7a35f-03aa-4bff-acfb-b4974ea9f217/20180609_mace-head_classification.nc",
"filename": "20180609_mace-head_classification.nc",
"checksum": "b3142dd3b179e8344b30a7c38d0280eab1d122b8683445006e7691fa88ed2c42",
"size": 130744,
"format": "HDF5 (NetCDF4)",
Expand All @@ -50,7 +52,8 @@
"updatedAt": "2020-02-20T10:47:33.775Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20190901_hyytiala_rpg-fmcw-94.nc",
"s3key": "d21d6a9b-6804-4465-a026-74ec429fe17d/20190901_hyytiala_rpg-fmcw-94.nc",
"filename": "20190901_hyytiala_rpg-fmcw-94.nc",
"checksum": "9e7c4d902494a254b80d873f13806fd0f01e83564a13a053d3902db98a372ad1",
"size": 16027310,
"format": "HDF5 (NetCDF4)",
Expand All @@ -70,7 +73,8 @@
"updatedAt": "2020-02-20T10:39:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20190715_bucharest_categorize.nc",
"s3key": "22b32746-faf0-4057-9076-ed2e698dcc34/20190715_bucharest_categorize.nc",
"filename": "20190715_bucharest_categorize.nc",
"checksum": "22d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -89,7 +93,8 @@
"updatedAt": "2020-02-20T10:49:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20190715_bucharest_categorize.nc",
"s3key": "6cb32746-faf0-4057-9076-ed2e698dcf36/20190715_bucharest_categorize.nc",
"filename": "20190715_bucharest_categorize.nc",
"checksum": "43d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -108,7 +113,8 @@
"updatedAt": "2020-02-20T10:59:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20190715_bucharest_categorize.nc",
"s3key": "8bb32746-faf0-4057-9076-ed2e698dcf36/20190715_bucharest_categorize.nc",
"filename": "20190715_bucharest_categorize.nc",
"checksum": "64d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -127,7 +133,8 @@
"updatedAt": "2020-02-19T10:59:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20190715_bucharest_categorize.nc",
"s3key": "1bb32746-faf0-4057-9076-ed2e698dcf36/20190715_bucharest_categorize.nc",
"filename": "20190715_bucharest_categorize.nc",
"checksum": "14d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -151,7 +158,8 @@
"updatedAt": "2020-02-19T10:59:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20190715_bucharest_categorize.nc",
"s3key": "2bb32746-faf0-4057-9076-ed2e698dcf36/20190715_bucharest_categorize.nc",
"filename": "20190715_bucharest_categorize.nc",
"checksum": "24d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -172,7 +180,8 @@
"updatedAt": "2020-02-19T10:59:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "legacy/20090716_bucharest_classification.nc",
"s3key": "3bb32746-faf0-4057-9076-ed2e698dcf36/legacy/20090716_bucharest_classification.nc",
"filename": "20090716_bucharest_classification.nc",
"checksum": "44d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -192,7 +201,8 @@
"updatedAt": "2020-02-20T10:39:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20210126_bucharest_categorize.nc",
"s3key": "52b32746-faf0-4057-9076-ed2e698dcc34/20210126_bucharest_categorize.nc",
"filename": "20210126_bucharest_categorize.nc",
"checksum": "52d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -212,7 +222,8 @@
"updatedAt": "2020-02-21T10:49:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20210126_bucharest_categorize.nc",
"s3key": "62b32746-faf0-4057-9076-ed2e698dcc34/20210126_bucharest_categorize.nc",
"filename": "20210126_bucharest_categorize.nc",
"checksum": "62d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -232,7 +243,8 @@
"updatedAt": "2020-02-21T10:39:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20210126_bucharest_categorize.nc",
"s3key": "72b32746-faf0-4057-9076-ed2e698dcc34/20210126_bucharest_categorize.nc",
"filename": "20210126_bucharest_categorize.nc",
"checksum": "72d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -252,7 +264,8 @@
"updatedAt": "2020-02-22T10:39:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20210126_bucharest_categorize.nc",
"s3key": "82b32746-faf0-4057-9076-ed2e698dcc34/20210126_bucharest_categorize.nc",
"filename": "20210126_bucharest_categorize.nc",
"checksum": "82d86b26f642a0befdcd7088ff38caab00ea3d95933f4ccc430179b4a29b74d3",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -272,7 +285,8 @@
"updatedAt": "2021-02-22T10:39:58.449Z",
"startTime": "2021-02-20T10:56:19.382Z",
"stopTime": "2021-02-20T11:56:19.382Z",
"s3key": "20210126_bucharest_radar.nc",
"s3key": "acf78456-11b1-41a6-b2de-aa7590a75675/20210126_bucharest_radar.nc",
"filename": "20210126_bucharest_radar.nc",
"checksum": "96307ce8dd4fabbc2353cc3c4f32e384bd70280edceb9fb69509cf2db15ad70c",
"size": 3327282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -292,7 +306,8 @@
"updatedAt": "2020-02-19T10:59:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "20190715_bucharest_lidar.nc",
"s3key": "b6de8cf4-8825-47b0-aaa9-4fd413bbb0d7/20190715_bucharest_lidar.nc",
"filename": "20190715_bucharest_lidar.nc",
"checksum": "f5e059df0c0dccecc5a5d07d4306e17b9b2b4272ea176688e0ee5c1f651010df",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -314,7 +329,8 @@
"updatedAt": "2020-02-19T10:59:58.449Z",
"startTime": "2020-02-19T07:56:19.382Z",
"stopTime": "2020-02-19T08:56:19.382Z",
"s3key": "20190715_bucharest_radar.nc",
"s3key": "f036da43-c19c-4832-99f9-6cc88f3255c5/20190715_bucharest_radar.nc",
"filename": "20190715_bucharest_radar.nc",
"checksum": "e09b61366e9c1a4b2676d07dc9bfccff1436a73e46f1a3a8ea51429218d5cb50",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand All @@ -337,7 +353,8 @@
"updatedAt": "2020-02-19T10:59:58.449Z",
"startTime": "2018-11-15T10:56:19.382Z",
"stopTime": "2018-11-15T11:56:19.382Z",
"s3key": "legacy/20090716_newyork_classification.nc",
"s3key": "0afca83a-7b6b-4288-82f6-a59685346617/legacy/20090716_newyork_classification.nc",
"filename": "20090716_newyork_classification.nc",
"checksum": "65d29c507d0ebeec46a6c93deb8ebe3b86dfd678fc73e1a70816cca2fe279025",
"size": 7127282,
"format": "HDF5 (NetCDF4)",
Expand Down
9 changes: 4 additions & 5 deletions backend/src/entity/File.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ import { Site } from "./Site";
import { Product } from "./Product";
import { Visualization } from "./Visualization";
import { isValidDate } from "../lib";
import { basename } from "path";
import { Model } from "./Model";
import { ModelVisualization } from "./ModelVisualization";
import { ErrorLevel } from "./QualityReport";
import { Software } from "./Software";
import { Instrument, InstrumentInfo } from "./Instrument";

@Unique(["checksum"])
@Unique(["s3key", "version"])
@Index(["measurementDate", "site", "product"])
export abstract class File {
@PrimaryColumn("uuid")
Expand All @@ -31,6 +31,9 @@ export abstract class File {
@Column()
s3key!: string;

@Column()
filename!: string;

@Column()
version!: string;

Expand Down Expand Up @@ -93,10 +96,6 @@ export abstract class File {
@JoinTable()
software!: Software[];

get filename() {
return basename(this.s3key);
}

@BeforeInsert()
updateDateCreation() {
this.createdAt = new Date();
Expand Down
5 changes: 2 additions & 3 deletions backend/src/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ export const augmentFile = (includeS3path: boolean) => (file: RegularFile | Mode
...file,
downloadUrl: `${env.DP_BACKEND_URL}/download/${getDownloadPathForFile(file)}`,
filename: basename(file.s3key),
s3key: undefined,
s3path: includeS3path ? getS3pathForFile(file) : undefined,
s3path: includeS3path ? getS3pathForFile(file) : undefined, // TODO: not used anymore?
model: "model" in file ? file.model : undefined,
software: parseSoftware(file),
timeliness: calcTimeliness(file),
Expand Down Expand Up @@ -128,7 +127,7 @@ export const getS3pathForFile = (file: File) => `/${getBucketForFile(file)}/${fi

export const getS3pathForImage = (s3key: string) => `/cloudnet-img/${s3key}`;

export const getDownloadPathForFile = (file: File) => `product/${file.uuid}/${file.s3key}`;
export const getDownloadPathForFile = (file: File) => `product/${file.uuid}/${file.filename}`;

export async function checkFileExists(s3path: string) {
const headers = {
Expand Down
19 changes: 19 additions & 0 deletions backend/src/migration/1730123930898-AddFilenameColumn.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { MigrationInterface, QueryRunner } from "typeorm";

export class AddFilenameColumn1730123930898 implements MigrationInterface {
name = "AddFilenameColumn1730123930898";

public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE "regular_file" ADD "filename" character varying`);
await queryRunner.query(`UPDATE "regular_file" SET "filename" = regexp_replace(s3key, '.+/', '')`);
await queryRunner.query(`ALTER TABLE "regular_file" ALTER COLUMN "filename" SET NOT NULL`);
await queryRunner.query(`ALTER TABLE "model_file" ADD "filename" character varying`);
await queryRunner.query(`UPDATE "model_file" SET "filename" = regexp_replace(s3key, '.+/', '')`);
await queryRunner.query(`ALTER TABLE "model_file" ALTER COLUMN "filename" SET NOT NULL`);
}

public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE "model_file" DROP COLUMN "filename"`);
await queryRunner.query(`ALTER TABLE "regular_file" DROP COLUMN "filename"`);
}
}
19 changes: 19 additions & 0 deletions backend/src/migration/1732796565757-MakeFileS3KeyUnique.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { MigrationInterface, QueryRunner } from "typeorm";

export class MakeFileS3KeyUnique1732796565757 implements MigrationInterface {
name = "MakeFileS3KeyUnique1732796565757";

public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(
`ALTER TABLE "regular_file" ADD CONSTRAINT "UQ_4ff8d5fa4adb59e4acdebfae2fa" UNIQUE ("s3key", "version")`,
);
await queryRunner.query(
`ALTER TABLE "model_file" ADD CONSTRAINT "UQ_b7415aaaa9ae376a3b62547086f" UNIQUE ("s3key", "version")`,
);
}

public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE "model_file" DROP CONSTRAINT "UQ_b7415aaaa9ae376a3b62547086f"`);
await queryRunner.query(`ALTER TABLE "regular_file" DROP CONSTRAINT "UQ_4ff8d5fa4adb59e4acdebfae2fa"`);
}
}
5 changes: 3 additions & 2 deletions backend/src/routes/download.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ export class DownloadRoutes {
readonly citationService: CitationService;

product: RequestHandler = async (req, res, next) => {
const s3key = (req.params.s3key as unknown as string[]).join("/");
const file = await this.fileController.findAnyFile((repo) => repo.findOneBy({ uuid: req.params.uuid, s3key }));
const file = await this.fileController.findAnyFile((repo) =>
repo.findOneBy({ uuid: req.params.uuid, filename: req.params.filename }),
);
if (!file) return next({ status: 404, errors: ["File not found"] });
const upstreamRes = await this.makeFileRequest(file);
res.setHeader("Content-Type", "application/octet-stream");
Expand Down
18 changes: 7 additions & 11 deletions backend/src/routes/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ export class FileRoutes {
const repo = file instanceof RegularFile ? this.fileRepo : this.modelFileRepo;
const versions = await repo.find({
select,
where: { s3key: s3Key(file), tombstoneReason: IsNull() },
where: { filename: file.filename, tombstoneReason: IsNull() },
order: { createdAt: "DESC" },
});
res.send(versions);
Expand Down Expand Up @@ -222,7 +222,7 @@ export class FileRoutes {
);
return qb
.leftJoinAndSelect("file.site", "site")
.where("regexp_replace(s3key, '.+/', '') = :filename", { filename: basename(file.s3key) }) // eslint-disable-line quotes
.where("file.filename = :filename", { filename: file.filename })
.getOne();
};
const existingFile = await findFileByName(isModel);
Expand Down Expand Up @@ -375,7 +375,7 @@ export class FileRoutes {
// Hack to prevent loading of model files when instrument is selected without product
if (isModel && (query.instrument || query.instrumentPid)) qb.andWhere("1 = 0");

if (query.filename) qb.andWhere("regexp_replace(s3key, '.+/', '') IN (:...filename)", query); // eslint-disable-line quotes
if (query.filename) qb.andWhere("filename IN (:...filename)", query);
if (query.releasedBefore) qb.andWhere("file.updatedAt < :releasedBefore", query);
if (query.updatedAtFrom) qb.andWhere("file.updatedAt >= :updatedAtFrom", query);
if (query.updatedAtTo) qb.andWhere("file.updatedAt <= :updatedAtTo", query);
Expand Down Expand Up @@ -506,7 +506,7 @@ export class FileRoutes {

async fetchValidVersions(queryRunner: QueryRunner, file: File) {
return await queryRunner.manager.find(RegularFile, {
where: { s3key: s3Key(file), tombstoneReason: IsNull() },
where: { filename: file.filename, tombstoneReason: IsNull() },
relations: { product: true, site: true },
order: { createdAt: "DESC" },
});
Expand Down Expand Up @@ -582,11 +582,7 @@ function addCommonFilters(qb: any, query: any) {
}

function isValidFilename(file: any) {
const [date, site] = basename(file.s3key).split(".")[0].split("_");
return file.measurementDate.replace(/-/g, "") == date && (file.site == site || typeof file.site == "object");
}

function s3Key(file: File) {
// Handle legacy filenames with 'legacy/' prefix.
return Raw((alias) => `regexp_replace(${alias}, '.+/', '') = :filename`, { filename: file.filename });
if (!file.filename) return false;
const [date, site] = file.filename.split(".")[0].split("_");
return file.measurementDate.replace(/-/g, "") == date && file.site == site;
}
Loading
Loading