Skip to content

Commit

Permalink
Skip tracking of downloads from private IPs
Browse files Browse the repository at this point in the history
  • Loading branch information
siiptuo committed Nov 29, 2024
1 parent e7c08f1 commit 8ffde37
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 31 deletions.
1 change: 1 addition & 0 deletions .github/ci.env
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ LABELLING_URL=http://localhost:5803
HANDLE_API_URL=http://localhost:5804
DVAS_URL=https://dvas.test
DC_URL=https://dc.test
PRIVATE_IP_RANGES=193.169.0.0/16
30 changes: 21 additions & 9 deletions backend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"basic-auth": "^2.0.1",
"express": "^5.0.1",
"express-rate-limit": "^7.1.5",
"ipaddr.js": "^2.2.0",
"maxmind": "^4.3.8",
"nodemon": "^3.0.1",
"pg": "^8.8.0",
Expand Down
4 changes: 0 additions & 4 deletions backend/src/entity/Download.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,6 @@ export class Download {
) AS collection_file
GROUP BY "collectionUuid", "measurementDate", "productType", "siteId"
) object ON "objectUuid" = object.uuid
WHERE
ip NOT IN ('', '::ffff:127.0.0.1')
AND ip NOT LIKE '192.168.%'
AND ip NOT LIKE '193.166.223.%'
GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"
`,
materialized: true,
Expand Down
9 changes: 8 additions & 1 deletion backend/src/lib/env.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { URL } from "url";
import { URL } from "node:url";
import * as ipaddr from "ipaddr.js";

const requiredVars = [
"NODE_ENV",
Expand Down Expand Up @@ -78,6 +79,7 @@ interface Env {
SLACK_NOTIFICATION_CHANNEL?: string;
DVAS_URL: string;
DC_URL: string;
PRIVATE_IP_RANGES: [ipaddr.IPv4 | ipaddr.IPv6, number][];
}

const env: Env = {
Expand All @@ -99,6 +101,7 @@ const env: Env = {
typeof rawEnv.MATOMO_START_DATE !== "undefined" ? readIsoDate(rawEnv.MATOMO_START_DATE) : undefined,
DVAS_URL: readUrl(rawEnv.DVAS_URL),
DC_URL: readUrl(rawEnv.DC_URL),
PRIVATE_IP_RANGES: rawEnv.PRIVATE_IP_RANGES ? readIpRanges(rawEnv.PRIVATE_IP_RANGES) : [],
};

export default env;
Expand Down Expand Up @@ -136,3 +139,7 @@ function readIsoDate(input: string): string {
}
return input;
}

function readIpRanges(input: string) {
return input.split(",").map((s) => ipaddr.parseCIDR(s.trim()));
}
129 changes: 129 additions & 0 deletions backend/src/migration/1732801567740-RemoveFilterFromDownloadStats.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import { MigrationInterface, QueryRunner } from "typeorm";

export class RemoveFilterFromDownloadStats1732801567740 implements MigrationInterface {
name = "RemoveFilterFromDownloadStats1732801567740";

public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DELETE FROM "typeorm_metadata" WHERE "type" = $1 AND "name" = $2 AND "schema" = $3`, [
"MATERIALIZED_VIEW",
"download_stats",
"public",
]);
await queryRunner.query(`DROP MATERIALIZED VIEW "download_stats"`);
await queryRunner.query(`CREATE MATERIALIZED VIEW "download_stats" AS
SELECT
"createdAt"::date AS "downloadDate",
"ip",
"country",
"measurementDate",
"productType",
"siteId",
SUM("downloads") AS "downloads"
FROM download
JOIN (
SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"
FROM (
SELECT uuid, "measurementDate", 'observation' AS "productType", "siteId"
FROM regular_file
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
UNION ALL
SELECT uuid, "measurementDate", 'model' AS "productType", "siteId"
FROM model_file
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
) AS file
GROUP BY uuid, "measurementDate", "productType", "siteId"
UNION ALL
SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"
FROM (
SELECT "collectionUuid", "measurementDate", 'observation' AS "productType", "siteId"
FROM collection_regular_files_regular_file
JOIN regular_file ON "regularFileUuid" = regular_file.uuid
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
UNION ALL
SELECT "collectionUuid", "measurementDate", 'model' AS "productType", "siteId"
FROM collection_model_files_model_file
JOIN model_file ON "modelFileUuid" = model_file.uuid
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
) AS collection_file
GROUP BY "collectionUuid", "measurementDate", "productType", "siteId"
) object ON "objectUuid" = object.uuid
GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"
`);
await queryRunner.query(
`INSERT INTO "typeorm_metadata"("database", "schema", "table", "type", "name", "value") VALUES (DEFAULT, $1, DEFAULT, $2, $3, $4)`,
[
"public",
"MATERIALIZED_VIEW",
"download_stats",
'SELECT\n "createdAt"::date AS "downloadDate",\n "ip",\n "country",\n "measurementDate",\n "productType",\n "siteId",\n SUM("downloads") AS "downloads"\n FROM download\n JOIN (\n SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT uuid, "measurementDate", \'observation\' AS "productType", "siteId"\n FROM regular_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT uuid, "measurementDate", \'model\' AS "productType", "siteId"\n FROM model_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS file\n GROUP BY uuid, "measurementDate", "productType", "siteId"\n UNION ALL\n SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT "collectionUuid", "measurementDate", \'observation\' AS "productType", "siteId"\n FROM collection_regular_files_regular_file\n JOIN regular_file ON "regularFileUuid" = regular_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT "collectionUuid", "measurementDate", \'model\' AS "productType", "siteId"\n FROM collection_model_files_model_file\n JOIN model_file ON "modelFileUuid" = model_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS collection_file\n GROUP BY "collectionUuid", "measurementDate", "productType", "siteId"\n ) object ON "objectUuid" = object.uuid\n GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"',
],
);
}

public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DELETE FROM "typeorm_metadata" WHERE "type" = $1 AND "name" = $2 AND "schema" = $3`, [
"MATERIALIZED_VIEW",
"download_stats",
"public",
]);
await queryRunner.query(`DROP MATERIALIZED VIEW "download_stats"`);
await queryRunner.query(`CREATE MATERIALIZED VIEW "download_stats" AS SELECT
"createdAt"::date AS "downloadDate",
"ip",
"country",
"measurementDate",
"productType",
"siteId",
SUM("downloads") AS "downloads"
FROM download
JOIN (
SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"
FROM (
SELECT uuid, "measurementDate", 'observation' AS "productType", "siteId"
FROM regular_file
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
UNION ALL
SELECT uuid, "measurementDate", 'model' AS "productType", "siteId"
FROM model_file
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
) AS file
GROUP BY uuid, "measurementDate", "productType", "siteId"
UNION ALL
SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"
FROM (
SELECT "collectionUuid", "measurementDate", 'observation' AS "productType", "siteId"
FROM collection_regular_files_regular_file
JOIN regular_file ON "regularFileUuid" = regular_file.uuid
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
UNION ALL
SELECT "collectionUuid", "measurementDate", 'model' AS "productType", "siteId"
FROM collection_model_files_model_file
JOIN model_file ON "modelFileUuid" = model_file.uuid
JOIN product_variable USING ("productId")
WHERE product_variable."actrisName" IS NOT NULL
) AS collection_file
GROUP BY "collectionUuid", "measurementDate", "productType", "siteId"
) object ON "objectUuid" = object.uuid
WHERE
ip NOT IN ('', '::ffff:127.0.0.1')
AND ip NOT LIKE '192.168.%'
AND ip NOT LIKE '193.166.223.%'
GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"`);
await queryRunner.query(
`INSERT INTO "typeorm_metadata"("database", "schema", "table", "type", "name", "value") VALUES (DEFAULT, $1, DEFAULT, $2, $3, $4)`,
[
"public",
"MATERIALIZED_VIEW",
"download_stats",
'SELECT\n "createdAt"::date AS "downloadDate",\n "ip",\n "country",\n "measurementDate",\n "productType",\n "siteId",\n SUM("downloads") AS "downloads"\n FROM download\n JOIN (\n SELECT uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT uuid, "measurementDate", \'observation\' AS "productType", "siteId"\n FROM regular_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT uuid, "measurementDate", \'model\' AS "productType", "siteId"\n FROM model_file\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS file\n GROUP BY uuid, "measurementDate", "productType", "siteId"\n UNION ALL\n SELECT "collectionUuid" AS uuid, "measurementDate", "productType", "siteId", COUNT(*) AS "downloads"\n FROM (\n SELECT "collectionUuid", "measurementDate", \'observation\' AS "productType", "siteId"\n FROM collection_regular_files_regular_file\n JOIN regular_file ON "regularFileUuid" = regular_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n UNION ALL\n SELECT "collectionUuid", "measurementDate", \'model\' AS "productType", "siteId"\n FROM collection_model_files_model_file\n JOIN model_file ON "modelFileUuid" = model_file.uuid\n JOIN product_variable USING ("productId")\n WHERE product_variable."actrisName" IS NOT NULL\n ) AS collection_file\n GROUP BY "collectionUuid", "measurementDate", "productType", "siteId"\n ) object ON "objectUuid" = object.uuid\n WHERE\n ip NOT IN (\'\', \'::ffff:127.0.0.1\')\n AND ip NOT LIKE \'192.168.%\'\n AND ip NOT LIKE \'193.166.223.%\'\n GROUP BY "downloadDate", "ip", "country", "measurementDate", "productType", "siteId"',
],
);
}
}
8 changes: 7 additions & 1 deletion backend/src/routes/download.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import env from "../lib/env";
import { UploadRoutes } from "./upload";
import { CitationService } from "../lib/cite";
import { citation2txt } from "./reference";
import * as ipaddr from "ipaddr.js";

const LICENSE_TEXT = readFileSync("data/CC-BY-4.0.txt");

Expand Down Expand Up @@ -165,7 +166,12 @@ export class DownloadRoutes {
}

private async trackDownload(req: Request, type: ObjectType, uuid: string) {
if (!req.ip) return;
if (!req.ip || !ipaddr.isValid(req.ip)) return;
const ip = ipaddr.process(req.ip);
const ipRange = ip.range();
if (ipRange === "loopback" || ipRange === "private" || env.PRIVATE_IP_RANGES.some((range) => ip.match(range))) {
return;
}
const result = this.ipLookup.get(req.ip);
const dl = new Download(type, uuid, req.ip, result?.country?.iso_code);
await this.downloadRepo.save(dl);
Expand Down
1 change: 1 addition & 0 deletions backend/test.env
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ LABELLING_URL=http://localhost:5803
HANDLE_API_URL=http://localhost:5804
DVAS_URL=https://dvas.test
DC_URL=https://dc.test
PRIVATE_IP_RANGES=193.169.0.0/16
53 changes: 37 additions & 16 deletions backend/tests/e2e/nc2api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,24 +99,45 @@ describe("after PUTting metadata to API", () => {
});

it("serves the file and increases download count", async () => {
return axios
.get(`${backendPublicUrl}download/product/${expectedJson.uuid}/${s3key}`, { responseType: "arraybuffer" })
.then((response) => {
expect(response.status).toEqual(200);
const hash = createHash("sha256");
hash.update(response.data);
expect(hash.digest("hex")).toEqual(expectedJson.checksum);
return expect(
downloadRepo.existsBy({
objectUuid: expectedJson.uuid,
objectType: ObjectType.Product,
ip: "2.125.160.216",
country: "GB",
}),
).resolves.toBe(true);
});
const response = await axios.get(`${backendPublicUrl}download/product/${expectedJson.uuid}/${s3key}`, {
responseType: "arraybuffer",
});
expect(response.status).toEqual(200);
const hash = createHash("sha256");
hash.update(response.data);
expect(hash.digest("hex")).toEqual(expectedJson.checksum);
await expect(
downloadRepo.existsBy({
objectUuid: expectedJson.uuid,
objectType: ObjectType.Product,
ip: "2.125.160.216",
country: "GB",
}),
).resolves.toBe(true);
});

["::ffff:127.0.0.1", "192.168.0.1", "193.169.0.1"].forEach((ip) =>
it(`serves the file but doesn't increase download count from ${ip}`, async () => {
const response = await axios.get(`${backendPublicUrl}download/product/${expectedJson.uuid}/${s3key}`, {
responseType: "arraybuffer",
headers: {
"X-Forwarded-For": ip,
},
});
expect(response.status).toEqual(200);
const hash = createHash("sha256");
hash.update(response.data);
expect(hash.digest("hex")).toEqual(expectedJson.checksum);
await expect(
downloadRepo.existsBy({
objectUuid: expectedJson.uuid,
objectType: ObjectType.Product,
ip,
}),
).resolves.toBe(false);
}),
);

it("responds with 400 if file not uploaded", async () => {
return expect(axios.put(`${backendPrivateUrl}files/notfound`, inputJson)).rejects.toMatchObject({
response: { status: 400 },
Expand Down

0 comments on commit 8ffde37

Please sign in to comment.