-
Notifications
You must be signed in to change notification settings - Fork 0
/
cat_domains_to_regexp
executable file
·33 lines (29 loc) · 1.32 KB
/
cat_domains_to_regexp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#! /usr/bin/env bash
#INPUT="$(cat -)"
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Uses the data from the repo mcn-tools-norid-cat-domains,
# and formats the data in a way that can the used by the
# script that builds a regexp for .no domains
if [ ! $(which idn) ]; then
echo "[ERROR] Could not find 'idn', exiting." 1>&2
exit 1
fi
# We want both IDN and not-IDN versjons of applicable domains
WHOIS_AND_IDN_UNIQ="$(./mcn-tools-norid-cat-domains/list-all | while read D; do
echo "$D"
echo "$D" | idn --no-tld
done | sort | uniq)"
#WHOIS_AND_IDN_UNIQ=$(echo "$WHOIS_AND_IDN" | sort | uniq)
# Valid chars:
# https://www.norid.no/en/domeneregistrering/idn/idn_nyetegn/
echo -n "$WHOIS_AND_IDN_UNIQ" | \
sed -e 's/\.no$//' -e '/^no\.$/d' -e 's/\./\\./g' \
-e 's/à/\\x{E0}/g' -e 's/á/\\x{E1}/g' -e 's/ä/\\x{E4}/g' \
-e 's/å/\\x{E5}/g' -e 's/æ/\\x{E6}/g' -e 's/ç/\\x{E7}/g' \
-e 's/è/\\x{E8}/g' -e 's/é/\\x{E9}/g' -e 's/ê/\\x{EA}/g' \
-e 's/ï/\\x{EF}/g' -e 's/ñ/\\x{F1}/g' -e 's/ò/\\x{F2}/g' \
-e 's/ó/\\x{F3}/g' -e 's/ô/\\x{F4}/g' -e 's/ö/\\x{F6}/g' \
-e 's/ø/\\x{F8}/g' -e 's/ü/\\x{FC}/g' -e 's/č/\\x{010D}/g' \
-e 's/đ/\\x{0111}/g' -e 's/ŋ/\\x{014B}/g' -e 's/ń/\\x{0144}/g' \
-e 's/š/\\x{0161}/g' -e 's/ŧ/\\x{0167}/g' -e 's/ž/\\x{017E}/g' | \
LC_ALL=C sort | uniq | tr '\n' '|' | head -c -1