diff --git a/.github/ci.env b/.github/ci.env index dec316d9a..9efe6ad8e 100644 --- a/.github/ci.env +++ b/.github/ci.env @@ -30,3 +30,4 @@ LABELLING_URL=http://localhost:5803 HANDLE_API_URL=http://localhost:5804 DVAS_URL=https://dvas.test DC_URL=https://dc.test +PRIVATE_IP_RANGES=193.169.0.0/16 diff --git a/backend/package-lock.json b/backend/package-lock.json index ec4b5fe1d..aff40fcd8 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -1,12 +1,12 @@ { "name": "dataportal", - "version": "2.107.0", + "version": "2.107.9", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dataportal", - "version": "2.107.0", + "version": "2.107.9", "license": "MIT", "dependencies": { "apache-md5": "^1.1.8", @@ -16,6 +16,7 @@ "basic-auth": "^2.0.1", "express": "^5.0.1", "express-rate-limit": "^7.1.5", + "ipaddr.js": "^2.2.0", "maxmind": "^4.3.8", "nodemon": "^3.0.1", "pg": "^8.8.0", @@ -2962,9 +2963,10 @@ "devOptional": true }, "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", @@ -4275,11 +4277,12 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, "node_modules/ipaddr.js": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", - "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz", + "integrity": "sha512-Ag3wB2o37wslZS19hZqorUnrnzSkpOVy+IiiDEiTqNubEYpYuHWIf6K4psgN2ZWKExS4xhVCrRVfb/wfW8fWJA==", + "license": "MIT", "engines": { - "node": ">= 0.10" + "node": ">= 10" } }, "node_modules/is-arguments": { @@ -6254,6 +6257,15 @@ "node": ">= 0.10" } }, + "node_modules/proxy-addr/node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", diff --git a/backend/package.json b/backend/package.json index 1dc11704c..63eee44ec 100644 --- a/backend/package.json +++ b/backend/package.json @@ -14,6 +14,7 @@ "basic-auth": "^2.0.1", "express": "^5.0.1", "express-rate-limit": "^7.1.5", + "ipaddr.js": "^2.2.0", "maxmind": "^4.3.8", "nodemon": "^3.0.1", "pg": "^8.8.0", diff --git a/backend/src/entity/Download.ts b/backend/src/entity/Download.ts index 8f4c73fa8..135b14857 100644 --- a/backend/src/entity/Download.ts +++ b/backend/src/entity/Download.ts @@ -77,10 +77,6 @@ export class Download { ) AS collection_file GROUP BY "collectionUuid", "measurementDate", "productType", "siteId" ) object ON "objectUuid" = object.uuid - WHERE - ip NOT IN ('', '::ffff:127.0.0.1') - AND ip NOT LIKE '192.168.%' - AND ip NOT LIKE '193.166.223.%' GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId" `, materialized: true, diff --git a/backend/src/lib/env.ts b/backend/src/lib/env.ts index 1e0f99f62..df2bd0210 100644 --- a/backend/src/lib/env.ts +++ b/backend/src/lib/env.ts @@ -1,4 +1,5 @@ -import { URL } from "url"; +import { URL } from "node:url"; +import * as ipaddr from "ipaddr.js"; const requiredVars = [ "NODE_ENV", @@ -78,6 +79,7 @@ interface Env { SLACK_NOTIFICATION_CHANNEL?: string; DVAS_URL: string; DC_URL: string; + PRIVATE_IP_RANGES: [ipaddr.IPv4 | ipaddr.IPv6, number][]; } const env: Env = { @@ -99,6 +101,7 @@ const env: Env = { typeof rawEnv.MATOMO_START_DATE !== "undefined" ? readIsoDate(rawEnv.MATOMO_START_DATE) : undefined, DVAS_URL: readUrl(rawEnv.DVAS_URL), DC_URL: readUrl(rawEnv.DC_URL), + PRIVATE_IP_RANGES: rawEnv.PRIVATE_IP_RANGES ? readIpRanges(rawEnv.PRIVATE_IP_RANGES) : [], }; export default env; @@ -136,3 +139,7 @@ function readIsoDate(input: string): string { } return input; } + +function readIpRanges(input: string) { + return input.split(",").map((s) => ipaddr.parseCIDR(s.trim())); +} diff --git a/backend/src/migration/1732801567740-RemoveFilterFromDownloadStats.ts b/backend/src/migration/1732801567740-RemoveFilterFromDownloadStats.ts new file mode 100644 index 000000000..94c198a87 --- /dev/null +++ b/backend/src/migration/1732801567740-RemoveFilterFromDownloadStats.ts @@ -0,0 +1,129 @@ +import { MigrationInterface, QueryRunner } from "typeorm"; + +export class RemoveFilterFromDownloadStats1732801567740 implements MigrationInterface { + name = "RemoveFilterFromDownloadStats1732801567740"; + + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DELETE FROM "typeorm_metadata" WHERE "type" = $1 AND "name" = $2 AND "schema" = $3`, [ + "MATERIALIZED_VIEW", + "download_stats", + "public", + ]); + await queryRunner.query(`DROP MATERIALIZED VIEW "download_stats"`); + await queryRunner.query(`CREATE MATERIALIZED VIEW "download_stats" AS + SELECT + "createdAt"::date AS "downloadDate", + "ip", + "country", + "measurementDate", + "productType", + "siteId", + SUM("downloads") AS "downloads" + FROM download + JOIN ( + SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads" + FROM ( + SELECT uuid, "measurementDate", 'observation' AS "productType", "siteId" + FROM regular_file + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + UNION ALL + SELECT uuid, "measurementDate", 'model' AS "productType", "siteId" + FROM model_file + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + ) AS file + GROUP BY uuid, "measurementDate", "productType", "siteId" + UNION ALL + SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads" + FROM ( + SELECT "collectionUuid", "measurementDate", 'observation' AS "productType", "siteId" + FROM collection_regular_files_regular_file + JOIN regular_file ON "regularFileUuid" = regular_file.uuid + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + UNION ALL + SELECT "collectionUuid", "measurementDate", 'model' AS "productType", "siteId" + FROM collection_model_files_model_file + JOIN model_file ON "modelFileUuid" = model_file.uuid + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + ) AS collection_file + GROUP BY "collectionUuid", "measurementDate", "productType", "siteId" + ) object ON "objectUuid" = object.uuid + GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId" + `); + await queryRunner.query( + `INSERT INTO "typeorm_metadata"("database", "schema", "table", "type", "name", "value") VALUES (DEFAULT, $1, DEFAULT, $2, $3, $4)`, + [ + "public", + "MATERIALIZED_VIEW", + "download_stats", + 'SELECT\n "createdAt"::date AS "downloadDate",\n "ip",\n "country",\n "measurementDate",\n "productType",\n "siteId",\n SUM("downloads") AS "downloads"\n FROM download\n JOIN (\n SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT uuid, "measurementDate", \'observation\' AS "productType", "siteId"\n FROM regular_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT uuid, "measurementDate", \'model\' AS "productType", "siteId"\n FROM model_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS file\n GROUP BY uuid, "measurementDate", "productType", "siteId"\n UNION ALL\n SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT "collectionUuid", "measurementDate", \'observation\' AS "productType", "siteId"\n FROM collection_regular_files_regular_file\n JOIN regular_file ON "regularFileUuid" = regular_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT "collectionUuid", "measurementDate", \'model\' AS "productType", "siteId"\n FROM collection_model_files_model_file\n JOIN model_file ON "modelFileUuid" = model_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS collection_file\n GROUP BY "collectionUuid", "measurementDate", "productType", "siteId"\n ) object ON "objectUuid" = object.uuid\n GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"', + ], + ); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DELETE FROM "typeorm_metadata" WHERE "type" = $1 AND "name" = $2 AND "schema" = $3`, [ + "MATERIALIZED_VIEW", + "download_stats", + "public", + ]); + await queryRunner.query(`DROP MATERIALIZED VIEW "download_stats"`); + await queryRunner.query(`CREATE MATERIALIZED VIEW "download_stats" AS SELECT + "createdAt"::date AS "downloadDate", + "ip", + "country", + "measurementDate", + "productType", + "siteId", + SUM("downloads") AS "downloads" + FROM download + JOIN ( + SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads" + FROM ( + SELECT uuid, "measurementDate", 'observation' AS "productType", "siteId" + FROM regular_file + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + UNION ALL + SELECT uuid, "measurementDate", 'model' AS "productType", "siteId" + FROM model_file + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + ) AS file + GROUP BY uuid, "measurementDate", "productType", "siteId" + UNION ALL + SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads" + FROM ( + SELECT "collectionUuid", "measurementDate", 'observation' AS "productType", "siteId" + FROM collection_regular_files_regular_file + JOIN regular_file ON "regularFileUuid" = regular_file.uuid + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + UNION ALL + SELECT "collectionUuid", "measurementDate", 'model' AS "productType", "siteId" + FROM collection_model_files_model_file + JOIN model_file ON "modelFileUuid" = model_file.uuid + JOIN product_variable USING ("productId") + WHERE product_variable."actrisName" IS NOT NULL + ) AS collection_file + GROUP BY "collectionUuid", "measurementDate", "productType", "siteId" + ) object ON "objectUuid" = object.uuid + WHERE + ip NOT IN ('', '::ffff:127.0.0.1') + AND ip NOT LIKE '192.168.%' + AND ip NOT LIKE '193.166.223.%' + GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"`); + await queryRunner.query( + `INSERT INTO "typeorm_metadata"("database", "schema", "table", "type", "name", "value") VALUES (DEFAULT, $1, DEFAULT, $2, $3, $4)`, + [ + "public", + "MATERIALIZED_VIEW", + "download_stats", + 'SELECT\n "createdAt"::date AS "downloadDate",\n "ip",\n "country",\n "measurementDate",\n "productType",\n "siteId",\n SUM("downloads") AS "downloads"\n FROM download\n JOIN (\n SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT uuid, "measurementDate", \'observation\' AS "productType", "siteId"\n FROM regular_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT uuid, "measurementDate", \'model\' AS "productType", "siteId"\n FROM model_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS file\n GROUP BY uuid, "measurementDate", "productType", "siteId"\n UNION ALL\n SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT "collectionUuid", "measurementDate", \'observation\' AS "productType", "siteId"\n FROM collection_regular_files_regular_file\n JOIN regular_file ON "regularFileUuid" = regular_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT "collectionUuid", "measurementDate", \'model\' AS "productType", "siteId"\n FROM collection_model_files_model_file\n JOIN model_file ON "modelFileUuid" = model_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS collection_file\n GROUP BY "collectionUuid", "measurementDate", "productType", "siteId"\n ) object ON "objectUuid" = object.uuid\n WHERE\n ip NOT IN (\'\', \'::ffff:127.0.0.1\')\n AND ip NOT LIKE \'192.168.%\'\n AND ip NOT LIKE \'193.166.223.%\'\n GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"', + ], + ); + } +} diff --git a/backend/src/routes/download.ts b/backend/src/routes/download.ts index 119ce4e22..8ded24e32 100644 --- a/backend/src/routes/download.ts +++ b/backend/src/routes/download.ts @@ -21,6 +21,7 @@ import env from "../lib/env"; import { UploadRoutes } from "./upload"; import { CitationService } from "../lib/cite"; import { citation2txt } from "./reference"; +import * as ipaddr from "ipaddr.js"; const LICENSE_TEXT = readFileSync("data/CC-BY-4.0.txt"); @@ -165,7 +166,12 @@ export class DownloadRoutes { } private async trackDownload(req: Request, type: ObjectType, uuid: string) { - if (!req.ip) return; + if (!req.ip || !ipaddr.isValid(req.ip)) return; + const ip = ipaddr.process(req.ip); + const ipRange = ip.range(); + if (ipRange === "loopback" || ipRange === "private" || env.PRIVATE_IP_RANGES.some((range) => ip.match(range))) { + return; + } const result = this.ipLookup.get(req.ip); const dl = new Download(type, uuid, req.ip, result?.country?.iso_code); await this.downloadRepo.save(dl); diff --git a/backend/test.env b/backend/test.env index f3b954a20..22e754dd8 100644 --- a/backend/test.env +++ b/backend/test.env @@ -29,3 +29,4 @@ LABELLING_URL=http://localhost:5803 HANDLE_API_URL=http://localhost:5804 DVAS_URL=https://dvas.test DC_URL=https://dc.test +PRIVATE_IP_RANGES=193.169.0.0/16 diff --git a/backend/tests/e2e/nc2api.test.ts b/backend/tests/e2e/nc2api.test.ts index 225ab83e6..0c1c8815d 100644 --- a/backend/tests/e2e/nc2api.test.ts +++ b/backend/tests/e2e/nc2api.test.ts @@ -99,24 +99,45 @@ describe("after PUTting metadata to API", () => { }); it("serves the file and increases download count", async () => { - return axios - .get(`${backendPublicUrl}download/product/${expectedJson.uuid}/${s3key}`, { responseType: "arraybuffer" }) - .then((response) => { - expect(response.status).toEqual(200); - const hash = createHash("sha256"); - hash.update(response.data); - expect(hash.digest("hex")).toEqual(expectedJson.checksum); - return expect( - downloadRepo.existsBy({ - objectUuid: expectedJson.uuid, - objectType: ObjectType.Product, - ip: "2.125.160.216", - country: "GB", - }), - ).resolves.toBe(true); - }); + const response = await axios.get(`${backendPublicUrl}download/product/${expectedJson.uuid}/${s3key}`, { + responseType: "arraybuffer", + }); + expect(response.status).toEqual(200); + const hash = createHash("sha256"); + hash.update(response.data); + expect(hash.digest("hex")).toEqual(expectedJson.checksum); + await expect( + downloadRepo.existsBy({ + objectUuid: expectedJson.uuid, + objectType: ObjectType.Product, + ip: "2.125.160.216", + country: "GB", + }), + ).resolves.toBe(true); }); + ["::ffff:127.0.0.1", "192.168.0.1", "193.169.0.1"].forEach((ip) => + it(`serves the file but doesn't increase download count from ${ip}`, async () => { + const response = await axios.get(`${backendPublicUrl}download/product/${expectedJson.uuid}/${s3key}`, { + responseType: "arraybuffer", + headers: { + "X-Forwarded-For": ip, + }, + }); + expect(response.status).toEqual(200); + const hash = createHash("sha256"); + hash.update(response.data); + expect(hash.digest("hex")).toEqual(expectedJson.checksum); + await expect( + downloadRepo.existsBy({ + objectUuid: expectedJson.uuid, + objectType: ObjectType.Product, + ip, + }), + ).resolves.toBe(false); + }), + ); + it("responds with 400 if file not uploaded", async () => { return expect(axios.put(`${backendPrivateUrl}files/notfound`, inputJson)).rejects.toMatchObject({ response: { status: 400 },