From 9dce492de402fe04a867668511181216710fcb27 Mon Sep 17 00:00:00 2001 From: Tiago R Date: Sun, 26 Nov 2023 12:04:39 +0000 Subject: [PATCH 1/4] remove trailing dot from FQDN for TLD check Signed-off-by: GitHub --- backend/src/utils.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/src/utils.ts b/backend/src/utils.ts index 1bb4d6ce6..da21ee467 100644 --- a/backend/src/utils.ts +++ b/backend/src/utils.ts @@ -647,7 +647,9 @@ export function getUrlsInString(str: string, onlyUnique = false): MatchedURL[] { return urls; } - const hostnameParts = matchUrl.hostname.split("."); + const hostname = matchUrl.hostname.endsWith(".") ? matchUrl.hostname.slice(0, -1) : matchUrl.hostname; + + const hostnameParts = hostname.split("."); const tld = hostnameParts[hostnameParts.length - 1]; if (tlds.includes(tld)) { urls.push(matchUrl); From abd5b1558869e4fe423bf946b3b79a813be57dff Mon Sep 17 00:00:00 2001 From: Tiago R Date: Sun, 26 Nov 2023 12:25:05 +0000 Subject: [PATCH 2/4] yeet all trailing characters from TLDs Signed-off-by: GitHub --- backend/src/utils.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/backend/src/utils.ts b/backend/src/utils.ts index da21ee467..1d31ac524 100644 --- a/backend/src/utils.ts +++ b/backend/src/utils.ts @@ -625,6 +625,7 @@ const plainLinkRegex = /((?!https?:\/\/)\S)+\.\S+/; // anything.anything, withou // Both of the above, with precedence on the first one const urlRegex = new RegExp(`(${realLinkRegex.source}|${plainLinkRegex.source})`, "g"); const protocolRegex = /^[a-z]+:\/\//; +const hostnameTldRegex = /^[a-z]$/; interface MatchedURL extends URL { input: string; @@ -647,7 +648,15 @@ export function getUrlsInString(str: string, onlyUnique = false): MatchedURL[] { return urls; } - const hostname = matchUrl.hostname.endsWith(".") ? matchUrl.hostname.slice(0, -1) : matchUrl.hostname; + //let hostname = matchUrl.hostname.endsWith(".") ? matchUrl.hostname.slice(0, -1) : matchUrl.hostname; + let hostname = matchUrl.hostname.toLowerCase(); + + if (hostname.length > 3) { + while (!hostnameTldRegex.test(hostname.at(-1)!)) { + if (!hostname.length) break; + hostname = hostname.slice(0, -1); + } + } const hostnameParts = hostname.split("."); const tld = hostnameParts[hostnameParts.length - 1]; From 089d7a7b65d07ade0d914527f8837a6c031c2ca1 Mon Sep 17 00:00:00 2001 From: Tiago R Date: Sun, 26 Nov 2023 12:34:15 +0000 Subject: [PATCH 3/4] oops Signed-off-by: GitHub --- backend/src/utils.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/src/utils.ts b/backend/src/utils.ts index 1d31ac524..7b5460601 100644 --- a/backend/src/utils.ts +++ b/backend/src/utils.ts @@ -648,7 +648,6 @@ export function getUrlsInString(str: string, onlyUnique = false): MatchedURL[] { return urls; } - //let hostname = matchUrl.hostname.endsWith(".") ? matchUrl.hostname.slice(0, -1) : matchUrl.hostname; let hostname = matchUrl.hostname.toLowerCase(); if (hostname.length > 3) { From d4c591b8eecb63b9e00ef773c07349c04d267c64 Mon Sep 17 00:00:00 2001 From: Tiago R Date: Fri, 29 Dec 2023 03:30:40 +0000 Subject: [PATCH 4/4] move dumb loop to regex replace Signed-off-by: GitHub --- backend/src/utils.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/backend/src/utils.ts b/backend/src/utils.ts index 7b5460601..c6c705db6 100644 --- a/backend/src/utils.ts +++ b/backend/src/utils.ts @@ -625,7 +625,6 @@ const plainLinkRegex = /((?!https?:\/\/)\S)+\.\S+/; // anything.anything, withou // Both of the above, with precedence on the first one const urlRegex = new RegExp(`(${realLinkRegex.source}|${plainLinkRegex.source})`, "g"); const protocolRegex = /^[a-z]+:\/\//; -const hostnameTldRegex = /^[a-z]$/; interface MatchedURL extends URL { input: string; @@ -651,10 +650,7 @@ export function getUrlsInString(str: string, onlyUnique = false): MatchedURL[] { let hostname = matchUrl.hostname.toLowerCase(); if (hostname.length > 3) { - while (!hostnameTldRegex.test(hostname.at(-1)!)) { - if (!hostname.length) break; - hostname = hostname.slice(0, -1); - } + hostname = hostname.replace(/[^a-z]+$/, ""); } const hostnameParts = hostname.split(".");