From 78104f3a49ec0eaf9b6ac7b7230686eb5a4ab3f9 Mon Sep 17 00:00:00 2001 From: Jeremy Frasier Date: Thu, 29 Nov 2018 22:00:23 -0500 Subject: [PATCH 1/2] Bumped version from 1.0.6 to 1.0.7 --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index af0b7dd..238d6e8 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.6 +1.0.7 From 22f357a5a2e049e68255d34181d30814b460c144 Mon Sep 17 00:00:00 2001 From: Jeremy Frasier Date: Thu, 29 Nov 2018 22:02:41 -0500 Subject: [PATCH 2/2] Remove carriage return characters and repeated dots from host names --- gather-domains.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/gather-domains.sh b/gather-domains.sh index 08bd37d..e4b9b61 100755 --- a/gather-domains.sh +++ b/gather-domains.sh @@ -100,6 +100,16 @@ cut -d"," -f1 gathered.csv > scanme.csv # Remove characters that might break parsing sed -i '/^ *$/d;/@/d;s/ //g;s/\"//g;s/'\''//g' scanme.csv +# The latest Censys snapshot contains a host name that contains a few +# carriage return characters in the middle of it. Let's get rid of +# those. +sed -i 's/\r//g' scanme.csv + +# We collect a few host names that contain consecutive dots. These +# seem to always be typos, so replace multiple dots in host names with +# a single dot. +sed -i 's/\.\+/\./g' scanme.csv + # Move the scanme to the output directory mv scanme.csv $OUTPUT_DIR/scanme.csv