Skip to content

Commit

Permalink
yeet all trailing characters from TLDs
Browse files Browse the repository at this point in the history
Signed-off-by: GitHub <[email protected]>
  • Loading branch information
metal0 committed Dec 29, 2023
1 parent 900428d commit 4beffd4
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion backend/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ const plainLinkRegex = /((?!https?:\/\/)\S)+\.\S+/; // anything.anything, withou
// Both of the above, with precedence on the first one
const urlRegex = new RegExp(`(${realLinkRegex.source}|${plainLinkRegex.source})`, "g");
const protocolRegex = /^[a-z]+:\/\//;
const hostnameTldRegex = /^[a-z]$/;

interface MatchedURL extends URL {
input: string;
Expand All @@ -647,7 +648,15 @@ export function getUrlsInString(str: string, onlyUnique = false): MatchedURL[] {
return urls;
}

const hostname = matchUrl.hostname.endsWith(".") ? matchUrl.hostname.slice(0, -1) : matchUrl.hostname;
//let hostname = matchUrl.hostname.endsWith(".") ? matchUrl.hostname.slice(0, -1) : matchUrl.hostname;
let hostname = matchUrl.hostname.toLowerCase();

if (hostname.length > 3) {
while (!hostnameTldRegex.test(hostname.at(-1)!)) {
if (!hostname.length) break;
hostname = hostname.slice(0, -1);
}
}

const hostnameParts = hostname.split(".");
const tld = hostnameParts[hostnameParts.length - 1];
Expand Down

0 comments on commit 4beffd4

Please sign in to comment.