From 3e6e9fc0e639e7684bb4d5c7b562f2a9a8171a8a Mon Sep 17 00:00:00 2001 From: Andrei Date: Sun, 21 Jul 2019 22:40:43 +0200 Subject: [PATCH] Updated - much improved parser for IP and ASN --- index.js | 183 +++----------------------------------------- parsers.js | 209 +++++++++++++++++++++++++++++++++++++++++++++++++++ test/test.js | 30 ++++---- 3 files changed, 234 insertions(+), 188 deletions(-) create mode 100644 parsers.js diff --git a/index.js b/index.js index 0b32da2..f3432ce 100644 --- a/index.js +++ b/index.js @@ -2,6 +2,7 @@ const net = require('net') const https = require('https') const url = require('url') const punycode = require('punycode') +const { parseSimpleWhois, parseDomainWhois } = require('./parsers.js') const { splitStringBy, requestGetBody, isTld, isDomain } = require('./utils.js') // cache @@ -21,12 +22,12 @@ const misspelledWhoisServer = { const whoisQuery = ({host = null, port = 43, timeout = 15000, query = '', querySuffix = "\r\n"} = {}) => { return new Promise((resolve, reject) => { let data = ''; - const socket = net.connect({host: host, port: port}, () => socket.write(query + querySuffix)); - socket.setTimeout(timeout); - socket.on('data', chunk => data += chunk); - socket.on('close', hadError => resolve(data)); - socket.on('timeout', () => socket.destroy(new Error('Timeout'))); - socket.on('error', reject); + const socket = net.connect({host: host, port: port}, () => socket.write(query + querySuffix)) + socket.setTimeout(timeout) + socket.on('data', chunk => data += chunk) + socket.on('close', hadError => resolve(data)) + socket.on('timeout', () => socket.destroy(new Error('Timeout'))) + socket.on('error', reject) }); } @@ -39,13 +40,13 @@ const allTlds = async () => { const whoisTld = async (tld, {timeout = 15000} = {}) => { - let result; + let result try { result = await whoisQuery({ host: 'whois.iana.org', query: tld, - timeout: timeout + timeout }); } catch (err) { throw err @@ -137,119 +138,6 @@ const whoisDomain = async (domain, {host = null, timeout = 15000, follow = 2} = } -const parseDomainWhois = whois => { - const renameLabels = { - 'domain name': 'Domain Name', - 'nameserver': 'Name Server', - 'nserver': 'Name Server', - 'name servers': 'Name Server' - }; - const ignoreLabels = ['note', 'notes', 'please note', 'important', 'notice', 'terms of use', 'web-based whois', 'https', 'to', 'registration service provider']; - const ignoreTexts = [ - 'more information', - 'lawful purposes', - 'to contact', - 'use this data', - 'register your domain', - 'copy and paste', - 'find out more', - 'this', - 'please', - 'important', - 'prices', - 'payment', - 'you agree', - 'restrictions', // found on .co.uk domains - 'queried object', // found in abc.tech - 'service', // found in .au domains - 'terms' - ]; - - let text = []; - let data = { - 'Domain Status': [], - 'Name Server': [] - }; - let lines = whois.trim().split('\n').map(line => line.trim()); - - // Fix "label: \n value" format - lines.forEach((line, index) => { - if (!line.startsWith('%') && line.endsWith(':')) { - let addedLabel = false; - - for (let i = 1; i <= 5; i++) { - if (!lines[index + i] || !lines[index + i].length || lines[index + i].includes(': ') || lines[index + i].endsWith(':')) { - break; - } - - lines[index + i] = line + ' ' + lines[index + i]; - } - - if (addedLabel) { - lines[index] = ''; - } - } - }); - - lines.forEach(line => { - - if ((line.includes(': ') || line.endsWith(':')) && !line.startsWith('%')) { - let [label, value] = splitStringBy(line, line.indexOf(':')).map(info => info.trim()) - - if (renameLabels[label.toLowerCase()]) { - label = renameLabels[label.toLowerCase()] - } - - if (data[label] && Array.isArray(data[label])) { - data[label].push(value); - } else if (!ignoreLabels.includes(label.toLowerCase()) && !ignoreTexts.some(text => label.toLowerCase().includes(text))) { - data[label] = data[label] ? data[label] + ' ' + value : value; - } else { - text.push(line); - } - } else { - text.push(line); - } - - }); - - // remove invalid Name Servers (not valid hostname) - data['Name Server'] = data['Name Server'].map(nameServer => nameServer.split(' ')[0]).filter(isDomain) - - // remove multiple empty lines - text = text.join("\n").trim(); - while (text.includes("\n\n\n")) { - text = text.replace("\n\n\n", "\n") - } - - data.text = text.split("\n"); - - return data; -} - - -const whoisAsn = async (asn, {timeout = 15000} = {}) => { - let result; - - try { - result = await whoisQuery({ - host: 'whois.iana.org', - query: asn, - timeout: timeout - }); - } catch (err) { - throw err - } - - const data = parseSimpleWhois(result); - - if (!data['as-block']) { - throw new Error(`AS "${asn}" not found`) - } - - return data -} - const whoisIpOrAsn = async (query, {host = null, timeout = 15000} = {}) => { let data = {} const type = net.isIP(query) ? 'ip' : 'asn' @@ -279,7 +167,6 @@ const whoisIpOrAsn = async (query, {host = null, timeout = 15000} = {}) => { } try { - let query = ip; // hardcoded custom queries.. if (host === 'whois.arin.net' && type === 'ip') { @@ -295,58 +182,6 @@ const whoisIpOrAsn = async (query, {host = null, timeout = 15000} = {}) => { throw new Error(`WHOIS error "${err.message}"`) } - const groups = whois.split("\n\n").map(group => { - let lines = group.split("\n").filter(line => line && !line.startsWith('%')); - let type = false; - let contactType = false; - - lines.forEach(line => { - const [label, value] = splitStringBy(line, line.indexOf(':')).map(info => info.trim()) - - if (!type) { - type = ['organisation', 'contact'].includes(label) ? label : 'line'; - } - - if (type === 'contact') { - if (!data.contact) { - data.contact = {}; - } - - if (label === 'contact') { - contactType = value; - data.contact[contactType] = {}; - } else { - if (data.contact[contactType][label]) { - data.contact[contactType][label] += "\n" + value; - } else { - data.contact[contactType][label] = value; - } - } - } else if (type === 'organisation') { - if (!data.organisation) { - data.organisation = {}; - } - - if (data.organisation[label]) { - data.organisation[label] += "\n" + value; - } else { - data.organisation[label] = value; - } - } else { - if (data[label]) { - if (!Array.isArray(data[label])) { - data[label] = [data[label]]; - } - data[label].push(value); - } else { - data[label] = value; - } - } - }); - - return lines - }); - return data } diff --git a/parsers.js b/parsers.js new file mode 100644 index 0000000..1010bae --- /dev/null +++ b/parsers.js @@ -0,0 +1,209 @@ +const { splitStringBy, isDomain } = require('./utils.js') + + +const parseSimpleWhois = whois => { + let data = {} + let text = [] + + const renameLabels = { + NetRange: 'range', + inetnum: 'range', + CIDR: 'route', + origin: 'asn', + OriginAS: 'asn', + } + const lineToGroup = { + OrgName: 'organisation', + organisation: 'organisation', + OrgAbuseHandle: 'contactAbuse', + irt: 'contactAbuse', + RAbuseHandle: 'contactAbuse', + OrgTechHandle: 'contactTechnical', + RTechHandle: 'contactTechnical', + OrgNOCHandle: 'contactNoc', + RNOCHandle: 'contactNoc', + } + + if (whois.includes('returned 0 objects') || whois.includes('No match found')) { + return data + } + + let resultNum = 0 + let groups = [{}] + let lastLabel + + whois.split("\n").forEach(line => { + + // catch comment lines + if (line.startsWith('%') || line.startsWith('#')) { + + // detect if an ASN or IP has multiple WHOIS results + if (line.includes('# start')) { + // nothing + } else if (line.includes('# end')) { + resultNum++ + } else { + text.push(line) + } + + } else if (resultNum === 0) { + // for the moment, parse only first WHOIS result + + if (line) { + + if (line.includes(':')) { + const [label, value] = splitStringBy(line, line.indexOf(':')).map(info => info.trim()) + lastLabel = label + + // 1) Filter out unnecessary info, 2) then detect if the label is already added to group + if (value.includes('---')) { + // do nothing with useless data + } else if (groups[groups.length - 1][label]) { + groups[groups.length - 1][label] += "\n" + value + } else { + groups[groups.length - 1][label] = value + } + + } else { + groups[groups.length - 1][lastLabel] += "\n" + line.trim() + } + + } else if (Object.keys(groups[groups.length - 1]).length) { + + // if empty line, means another info group starts + groups.push({}) + } + } + + }) + + groups.filter(group => Object.keys(group).length).forEach(group => { + const groupLabels = Object.keys(group) + let isGroup = false + + // check if a label is marked as group + groupLabels.forEach(groupLabel => { + if (Object.keys(lineToGroup).includes(groupLabel)) { + isGroup = lineToGroup[groupLabel] + } + }) + + // check if a info group is a Contact in APNIC result + // @Link https://www.apnic.net/manage-ip/using-whois/guide/role/ + if (!isGroup && groupLabels.includes('role')) { + isGroup = 'Contact ' + group.role.split(' ')[1] + } else if (!isGroup && groupLabels.includes('person')) { + isGroup = 'Contact ' + group['nic-hdl'] + } + + if (isGroup) { + data[isGroup] = group + } else { + for (key in group) { + const label = renameLabels[key] || key + data[label] = group[key] + } + } + + }) + + // Append the WHOIS comments + data.text = text + + return data +} + + +const parseDomainWhois = whois => { + const renameLabels = { + 'domain name': 'Domain Name', + 'nameserver': 'Name Server', + 'nserver': 'Name Server', + 'name servers': 'Name Server' + } + const ignoreLabels = ['note', 'notes', 'please note', 'important', 'notice', 'terms of use', 'web-based whois', 'https', 'to', 'registration service provider'] + const ignoreTexts = [ + 'more information', + 'lawful purposes', + 'to contact', + 'use this data', + 'register your domain', + 'copy and paste', + 'find out more', + 'this', + 'please', + 'important', + 'prices', + 'payment', + 'you agree', + 'restrictions', // found on .co.uk domains + 'queried object', // found in abc.tech + 'service', // found in .au domains + 'terms' + ] + + let text = [] + let data = { + 'Domain Status': [], + 'Name Server': [] + } + let lines = whois.trim().split('\n').map(line => line.trim()) + + // Fix "label: \n value" format + lines.forEach((line, index) => { + if (!line.startsWith('%') && line.endsWith(':')) { + let addedLabel = false + + for (let i = 1; i <= 5; i++) { + if (!lines[index + i] || !lines[index + i].length || lines[index + i].includes(': ') || lines[index + i].endsWith(':')) { + break + } + + lines[index + i] = line + ' ' + lines[index + i] + } + + if (addedLabel) { + lines[index] = '' + } + } + }) + + lines.forEach(line => { + + if ((line.includes(': ') || line.endsWith(':')) && !line.startsWith('%')) { + let [label, value] = splitStringBy(line, line.indexOf(':')).map(info => info.trim()) + + if (renameLabels[label.toLowerCase()]) { + label = renameLabels[label.toLowerCase()] + } + + if (data[label] && Array.isArray(data[label])) { + data[label].push(value) + } else if (!ignoreLabels.includes(label.toLowerCase()) && !ignoreTexts.some(text => label.toLowerCase().includes(text))) { + data[label] = data[label] ? data[label] + ' ' + value : value + } else { + text.push(line) + } + } else { + text.push(line) + } + + }) + + // remove invalid Name Servers (not valid hostname) + data['Name Server'] = data['Name Server'].map(nameServer => nameServer.split(' ')[0]).filter(isDomain) + + // remove multiple empty lines + text = text.join("\n").trim() + while (text.includes("\n\n\n")) { + text = text.replace("\n\n\n", "\n") + } + + data.text = text.split("\n") + + return data +} + + +module.exports.parseSimpleWhois = parseSimpleWhois +module.exports.parseDomainWhois = parseDomainWhois diff --git a/test/test.js b/test/test.js index 2a443cd..b20092b 100644 --- a/test/test.js +++ b/test/test.js @@ -17,16 +17,17 @@ describe('Whoiser', function() { it('should return IP WHOIS for "1.1.1.1"', async function() { let whois = await whoiser('1.1.1.1') - assert.ok(whois.length) + assert.equal(whois.range, '1.1.1.0 - 1.1.1.255', 'IP Range doesn\'t match') + assert.equal(whois.route, '1.1.1.0/24', 'IP Route doesn\'t match') }); - it('should return AS WHOIS for "1234"', async function() { - let whois = await whoiser('1234') - assert.equal(whois['as-block'], '1234-1235', 'AS Block range doesn\'t match') + it('should return AS WHOIS for "15169"', async function() { + let whois = await whoiser('15169') + assert.equal(whois.ASName, 'GOOGLE', 'AS Name doesn\'t match') }); it('should reject for unrecognised query "-abc"', function() { - assert.rejects(whoiser('-abc')) + assert.throws(() => whoiser('-abc'), Error) }); }); @@ -117,29 +118,30 @@ describe('Whoiser', function() { }); describe('#whoiser.asn()', function() { - it('should return WHOIS for "1234"', async function() { - let whois = await whoiser.asn(1234) - assert.equal(whois['as-block'], '1234-1235', 'AS Block range doesn\'t match') + it('should return WHOIS for "15169"', async function() { + let whois = await whoiser.asn(15169) + assert.equal(whois.ASNumber, '15169', 'AS Number doesn\'t match') + assert.equal(whois.ASName, 'GOOGLE', 'AS Name doesn\'t match') }); it('should return WHOIS for "AS13335"', async function() { let whois = await whoiser.asn('AS13335') - assert.equal(whois['as-block'], '13321-13352', 'AS Block range doesn\'t match') + assert.equal(whois.ASNumber, '13335', 'AS Number doesn\'t match') + assert.equal(whois.ASName, 'CLOUDFLARENET', 'AS Name doesn\'t match') }); }); describe('#whoiser.ip()', function() { it('should return WHOIS for "8.8.8.8"', async function() { let whois = await whoiser.ip('8.8.8.8') - whois = whois.join("\n") - assert.notStrictEqual(whois.indexOf('NetRange: 8.8.8.0 - 8.8.8.255'), -1, 'IP range doesn\'t match') + assert.equal(whois.range, '8.0.0.0 - 8.127.255.255', 'IP Range doesn\'t match') + assert.equal(whois.route, '8.0.0.0/9', 'IP Route doesn\'t match') }); it('should return WHOIS for "2606:4700:4700::1111"', async function() { let whois = await whoiser.ip('2606:4700:4700::1111') - whois = whois.join("\n") - assert.notStrictEqual(whois.indexOf('NetRange: 2606:4700:: - 2606:4700:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF'), -1, 'IP range doesn\'t match') - assert.notStrictEqual(whois.indexOf('NetName: CLOUDFLARENET'), -1, 'NetName doesn\'t match') + assert.equal(whois.range, '2606:4700:: - 2606:4700:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF', 'IP Range doesn\'t match') + assert.equal(whois.route, '2606:4700::/32', 'IP Route doesn\'t match') }); });