whoiser
Version:
Whois info for domains, TLDs, IPs, and ASN
633 lines (632 loc) • 22.9 kB
JavaScript
import { splitStringBy, isDomain } from "./utils.js";
export function parseSimpleWhois(whois) {
const renameLabels = {
NetRange: 'range',
inetnum: 'range',
CIDR: 'route',
origin: 'asn',
OriginAS: 'asn',
};
// labels of WHOIS data that are actually groups
const lineToGroup = {
contact: 'contact',
OrgName: 'organisation',
organisation: 'organisation',
OrgAbuseHandle: 'contactAbuse',
irt: 'contactAbuse',
RAbuseHandle: 'contactAbuse',
OrgTechHandle: 'contactTechnical',
RTechHandle: 'contactTechnical',
OrgNOCHandle: 'contactNoc',
RNOCHandle: 'contactNoc',
};
if (whois.includes('returned 0 objects') || whois.includes('No match found')) {
throw new Error('No WHOIS data found');
}
const { comments, groups } = whoisDataToGroups(whois);
const data = {
__comments: comments,
__raw: whois,
};
groups
.forEach((group) => {
const groupLabels = Object.keys(group);
let isGroup = false;
// check if a label is marked as group
groupLabels.forEach((groupLabel) => {
if (!isGroup && Object.keys(lineToGroup).includes(groupLabel)) {
isGroup = lineToGroup[groupLabel] || false;
}
});
/**
* Check if a info group is a Contact in APNIC result
* @see https://www.apnic.net/manage-ip/using-whois/guide/role/
*/
if (!isGroup && groupLabels.includes('role')) {
isGroup = 'Contact ' + group['role'].split(' ')[1];
}
else if (!isGroup && groupLabels.includes('person')) {
isGroup = 'Contact ' + group['nic-hdl'];
}
if (isGroup === 'contact') {
data.contacts ||= {};
data.contacts[group['contact']] = group;
}
else if (isGroup) {
data[isGroup] = group;
}
else {
for (const key in group) {
const label = renameLabels[key] || key;
data[label] = group[key];
}
}
});
return data;
}
export function parseDomainWhois(domain, whois, ignorePrivacy = true) {
// Text saying there's no useful data in a field
const noData = [
'-',
'.',
'n/a',
'no data',
'redacted',
'privado',
'datos privados',
'data protected',
'not disclosed',
'data protected, not disclosed',
'data redacted',
'not disclosed not disclosed',
'not disclosed! visit www.eurid.eu for webbased whois.',
'not available',
'redacted for privacy',
'redacted | eu data subject',
'gdpr redacted',
'non-public data',
'gdpr masked',
'statutory masking enabled',
'redacted by privacy',
'not applicable',
'na',
'redacted for privacy purposes',
'redacted | eu registrar',
'registration private',
'none',
'redacted.forprivacy',
'redacted | registry policy',
'redacted for gdpr privacy',
'redacted for gdpr',
'redacted redacted',
'not available from registry',
'hidden upon user request',
];
// WHOIS labels to rename. "From" must be lowercase
// from -> to
const renameLabels = {
'domain name': 'Domain Name',
domain: 'Domain Name',
'domain...............': 'Domain Name', // found in .ax
'idn tag': 'IDN',
'internationalized domain name': 'IDN',
nameserver: 'Name Server',
nameservers: 'Name Server',
nserver: 'Name Server',
'name servers': 'Name Server',
'name server information': 'Name Server',
dns: 'Name Server',
'nserver..............': 'Name Server', // found in .ax
hostname: 'Name Server',
'domain nameservers': 'Name Server',
'domain servers in listed order': 'Name Server', // found in .ly
'domain servers': 'Name Server', // found in .tr
'name servers dns': 'Name Server', // found in .mx
'ns 1': 'Name Server', // found in .tm
'ns 2': 'Name Server', // found in .tm
'ns 3': 'Name Server', // found in .tm
'ns 4': 'Name Server', // found in .tm
flags: 'Domain Status',
status: 'Domain Status',
state: 'Domain Status', // found in .ru
'registration status': 'Domain Status',
eppstatus: 'Domain Status', // found in .fr
'sponsoring registrar iana id': 'Registrar IANA ID',
organisation: 'Registrar',
registrar: 'Registrar',
'registrar name': 'Registrar',
'registrar organization': 'Registrar',
'registrar............': 'Registrar', // found in .ax
'record maintained by': 'Registrar',
'sponsoring registrar': 'Registrar',
'registrar organization name': 'Registrar', // found in .tr
url: 'Registrar URL',
'registrar website': 'Registrar URL',
'registrar web': 'Registrar URL', // found in .it
'www..................': 'Registrar URL', // found in .ax
'mnt-by': 'Registrar ID', // found in .ua
'creation date': 'Created Date',
'registered on': 'Created Date',
'registration date': 'Created Date',
'relevant dates registered on': 'Created Date',
created: 'Created Date',
'created on': 'Created Date', // found in .mx
'additional info created on..............': 'Created Date', // found in .tr
'registration time': 'Created Date',
registered: 'Created Date',
'created..............': 'Created Date', // found in .ax
'domain registered': 'Created Date',
'registered date': 'Created Date', // found in .co.jp
'last updated': 'Updated Date',
changed: 'Updated Date',
modified: 'Updated Date',
updated: 'Updated Date', // found in .ly
'modification date': 'Updated Date',
'last modified': 'Updated Date',
'relevant dates last updated': 'Updated Date', // found in .uk, .co.uk
'last updated on': 'Updated Date', // found in .mx
'last update': 'Updated Date', // found in .co.jp
'last-update': 'Updated Date', // found in .fr
'registrar registration expiration date': 'Expiry Date',
'registry expiry date': 'Expiry Date',
'expires on': 'Expiry Date',
expires: 'Expiry Date',
'expiration time': 'Expiry Date',
'expire date': 'Expiry Date',
'expiration date': 'Expiry Date',
'expires..............': 'Expiry Date', // found in .ax
'additional info expires on..............': 'Expiry Date', // found in .tr
'paid-till': 'Expiry Date',
'expiry date': 'Expiry Date',
expiry: 'Expiry Date', // found in .tm
expire: 'Expiry Date',
'relevant dates expiry date': 'Expiry Date', // found in .uk, .co.uk
'record will expire on': 'Expiry Date',
expired: 'Expiry Date', // found in .ly
'renewal date': 'Expiry Date', // found in .pl
'registry registrantid': 'Registry Registrant ID', // found in .ai
'owner name': 'Registrant Name', // found in .tm
registrant: 'Registrant Name', // found in .ai
'registrant contact': 'Registrant Name',
'registrant contact name': 'Registrant Name',
registrantname: 'Registrant Name', // found in .ai
'registrant person': 'Registrant Name', // found in .ua
'registrant email': 'Registrant Email', // found in .ua
'registrant e-mail': 'Registrant Email', // found in .fr
'registrant contact email': 'Registrant Email',
registrantemail: 'Registrant Email', // found in .ai
registrantstreet: 'Registrant Street', // found in .ai
registrantcity: 'Registrant City', // found in .ai
registrantcountry: 'Registrant Country', // found in .ai
'registrant country': 'Registrant Country', // found in .fr
'owner orgname': 'Registrant Organization', // found in .tm
'registrant organisation': 'Registrant Organization',
registrantphone: 'Registrant Phone',
'trading as': 'Registrant Organization', // found in .uk, .co.uk
org: 'Registrant Organization', // found in .ru
'registrant state': 'Registrant State/Province',
"registrant's address": 'Registrant Street',
'owner addr': 'Registrant Street', // found in .tm
dnssec: 'DNSSEC',
};
const ignoreLabels = [
'note',
'notes',
'please note',
'important',
'notice',
'terms of use',
'web-based whois',
'https',
'to',
'registration service provider',
'you acknowledge that',
];
const ignoreTexts = [
'more information',
'lawful purposes',
'to contact',
'use this data',
'register your domain',
'copy and paste',
'find out more',
'this',
'please',
'important',
'prices',
'payment',
'you agree',
'restrictions', // found on .co.uk domains
'queried object', // found in abc.tech
'service', // found in .au domains
'terms',
];
let colon = ': ';
let text = [];
let data = {
'Domain Status': [],
'Name Server': [],
text: [],
};
let lines = whois
.trim()
.split('\n')
.map((line) => line.replace('\t', ' '));
// Parse WHOIS info for specific TLDs
if (domain.endsWith('.uk') ||
domain.endsWith('.be') ||
domain.endsWith('.nl') ||
domain.endsWith('.eu') ||
domain.endsWith('.ly') ||
domain.endsWith('.mx') ||
domain.endsWith('.gg') ||
domain.endsWith('.je') ||
domain.endsWith('.as')) {
lines = handleMultiLines(lines);
}
if (domain.endsWith('.gg') || domain.endsWith('.je') || domain.endsWith('.as')) {
lines = handleMissingColons(lines);
}
if (domain.endsWith('.ua')) {
lines = handleDotUa(lines);
colon = ':';
}
if (domain.endsWith('.jp')) {
lines = handleJpLines(lines);
}
if (domain.endsWith('.it')) {
lines = handleDotIt(lines);
}
else if (domain.endsWith('.fr')) {
lines = handleDotFr(lines);
}
if (domain.endsWith('.tr')) {
lines = handleDotTr(lines);
}
lines = lines.map((l) => l.trim());
lines.forEach((line) => {
if ((line.includes(colon) || line.endsWith(':')) && !line.startsWith('%') && !line.startsWith(';') && !line.startsWith('*')) {
let [label, value] = splitStringBy(line, line.indexOf(':')).map((info) => info.trim());
// fix whois line with double color, ex: "Label:: value"
if (value.startsWith(':')) {
value = value.slice(1);
}
value = value.trim();
// rename labels to more common format
if (renameLabels[label.toLowerCase()]) {
label = renameLabels[label.toLowerCase()];
}
// remove redacted data
if (ignorePrivacy && noData.includes(value.toLowerCase())) {
value = '';
}
if (data[label] && Array.isArray(data[label])) {
data[label].push(value);
}
else if (!ignoreLabels.includes(label.toLowerCase()) && !ignoreTexts.some((text) => label.toLowerCase().includes(text))) {
// WHOIS field already exists, if so append data
if (data[label] && data[label] !== value) {
data[label] = `${data[label]} ${value}`.trim();
}
else {
data[label] = value;
}
}
else {
text.push(line);
}
}
else {
text.push(line);
}
});
// remove invalid Name Servers (not valid hostname)
data['Name Server'] = data['Name Server']
.map((nameServer) => nameServer.split(' '))
.flat()
.filter(isDomain);
//todo sometimes the whois includes duplicate NS for a domain
//data['Name Server'] = uniq(data['Name Server'])
// filter out empty status lines
data['Domain Status'] = data['Domain Status'].filter(Boolean);
// remove multiple empty lines
let textString = text.join('\n').trim();
while (textString.includes('\n\n\n')) {
textString = textString.replace('\n\n\n', '\n');
}
data.text = textString.split('\n');
return data;
}
const handleDotTr = (lines) => {
lines = lines.filter((line) => line.trim() !== ''); // Remove blank lines
const registrantLines = ['Name', undefined, undefined, undefined, undefined]; // No clue what the other 4 fields are, all domains have them hidden
const replacement = [];
let section = '';
let sectionLine = 0;
for (let line of lines) {
line = line.replace(/\s+/g, ' ').replace(' :', ':').trim();
if (line.startsWith('** Domain')) {
// Keep line for domain name/nameservers
line = line.replace('** ', '');
}
else if (line.includes('** ')) {
// Start new section
section = line.replace(':', '').replace('** ', '').trim();
sectionLine = 0;
continue;
}
else if (section === 'Registrant') {
// Add registrant info
if (!registrantLines[sectionLine])
continue;
line = `Registrant ${registrantLines[sectionLine]}: ${line}`;
sectionLine++;
}
else if (!line.includes(': ')) {
// Add multi-line information to one line (nameservers and address)
replacement[replacement.length - 1] += ` ${line}`;
continue;
}
else if (section) {
// Append section name to each line
line = `${section} ${line}`;
}
// Remove period at end of dates
if (section === 'Additional Info') {
line = line.replace(/\.$/, '');
}
replacement.push(line);
}
return replacement;
};
const handleDotUa = (lines) => {
const types = ['Registrar', 'Registrant', 'Admin', 'Technical'];
let flag = '';
lines.forEach((line, index) => {
if (line.startsWith('%') && types.some((v) => line.includes(v))) {
flag = line
.substring(1, line.length - 1)
.trim()
.toLowerCase();
}
else if (!line.startsWith('%') && line.includes(': ')) {
if (line.startsWith('registrar'))
line = 'id';
lines[index] = flag + ' ' + line;
}
});
return lines;
};
const handleDotIt = (lines) => {
let section = '';
const replacement = [];
for (let line of lines) {
// Ignore comments and empty lines
if (line.startsWith('*') || line === '') {
continue;
}
// Collapse whitespace
const collapsed = line.replace(/\s+/g, ' ').trim();
// Check for top-level values and new section indicators
if (/^[^\s]/.test(line)) {
if (line.includes(':')) {
replacement.push(collapsed);
}
else {
// Special handling for "Nameservers" section
if (line === 'Nameservers') {
section = 'Name Server:';
}
else {
section = collapsed;
}
}
}
// Make sure sub-section lines are properly labeled
if (/^\s{2}[^\s]/.test(line)) {
// New sub-section
replacement.push(`${section} ${collapsed}`);
}
else if (/^\s{4}/.test(line)) {
// Continuation of previous line
replacement[replacement.length - 1] += `, ${collapsed}`;
}
}
return replacement;
};
// Fix "label: \n value" format
const handleMultiLines = (lines) => {
lines.forEach((line, index) => {
// if line is just a WHOIS label ending with ":", then verify next lines
if (!line.startsWith('*') && !line.startsWith('%') && line.trim().endsWith(':')) {
let addedLabel = false;
// Check next lines
for (let i = 1; i <= 8; i++) {
// if no line or empty line
if (!lines[index + i] || !lines[index + i].trim().length) {
break;
}
// if tabbed line or line with value only, prefix the line with main label
if ((lines[index + i].startsWith(' ') && lines[index + i].includes(': ')) || !lines[index + i].endsWith(':')) {
let label = line.trim();
if (lines[index + i].includes(':') && label.endsWith(':')) {
label = label.slice(0, -1);
}
lines[index + i] = label + ' ' + lines[index + i].replace('\t', ' ').trim();
addedLabel = true;
}
}
// remove this line if it was just a label for other lines
if (addedLabel) {
lines[index] = '';
}
}
});
return lines;
};
// Handle formats like this:
// [Name Server] ns1.jprs.jp
// [Name Server] ns2.jprs.jp
const handleJpLines = (lines) => {
const ret = [];
while (lines.length > 0) {
let line = lines.shift();
// handle lines that start with "a. [label]"
if (/^[a-z]. \[/.test(line)) {
line = line.replace(/^[a-z]. \[/, '[');
}
if (line.startsWith('[ ')) {
// skip
}
else if (line.startsWith('[')) {
ret.push(line);
}
else if (line.startsWith(' ')) {
const prev = ret.pop();
ret.push(prev + '\n' + line.trim());
}
else {
// skip
}
}
return ret.map((line) => line.replace(/\[(.*?)\]/g, '$1:'));
};
/**
* Normalize WHOIS data for .fr ccTld, make it look more like gTLDs
*
* @param {string[]} lines
* @returns
*/
function handleDotFr(lines) {
const groups = [];
let group = [];
const finalLines = [];
// split data in groups
lines.forEach(line => {
if (line.startsWith('%')) {
finalLines.push(line);
}
else if (!line.trim().length && group.length) {
// start new group
groups.push(group);
group = [];
}
else if (line.trim().length && !line.startsWith('source')) {
group.push(splitStringBy(line, line.indexOf(':')).map(str => str.trim()));
}
});
if (group.length) {
groups.push(group);
}
groups.forEach(gr => {
if (gr[0][0] === 'domain') {
// group with domain info
gr.forEach(line => {
if (line[0] !== 'status') {
finalLines.push(line.join(': '));
}
});
}
else if (gr[0][0] === 'registrar') {
// group with Registrar info
gr.forEach(([label, value]) => {
if (label === 'registrar') {
finalLines.push(`${label}: ${value}`);
}
else {
finalLines.push(`registrar ${label}: ${value}`);
}
});
}
else if (gr[0][0] === 'nic-hdl') {
let contactType = '';
const contactTypeLine = finalLines.find(line => line.includes(gr[0][1]));
if (contactTypeLine.startsWith('admin-c')) {
contactType = 'admin';
}
else if (contactTypeLine.startsWith('holder-c')) {
contactType = 'registrant';
}
else if (contactTypeLine.startsWith('tech-c')) {
contactType = 'technical';
}
// group with contact info
gr.forEach(([label, value]) => {
if (label === 'nic-hdl') {
finalLines.push(`${contactType} registry id: ${value}`);
}
else {
finalLines.push(`${contactType} ${label}: ${value}`);
}
});
}
else {
gr.forEach(line => {
finalLines.push(line.join(': '));
});
}
});
return finalLines;
}
// Handle formats like this:
// Registrar Gandi SAS
const handleMissingColons = (lines) => {
lines.forEach((line, index) => {
if (line.startsWith('Registrar ')) {
lines[index] = line.replace('Registrar ', 'Registrar: ');
}
});
return lines;
};
// ----- v2
export function whoisDataToGroups(whois) {
const comments = [];
let resultNum = 0;
const groups = [{}];
let lastLabel;
whois.split('\n').forEach((line) => {
// catch comment lines
if (line.startsWith('%') || line.startsWith('#')) {
// detect if an ASN or IP has multiple WHOIS results
if (line.includes('# start')) {
// nothing
}
else if (line.includes('# end')) {
resultNum++;
}
else {
comments.push(line);
}
}
else if (resultNum === 0) {
// for the moment, parse only first WHOIS result
if (line) {
if (line.includes(':')) {
const [label, value] = splitStringBy(line, line.indexOf(':')).map((info) => info.trim());
lastLabel = label;
// 1) Filter out unnecessary info, 2) then detect if the label is already added to group
if (value.includes('---')) {
// do nothing with useless data
}
else if (groups[groups.length - 1][label]) {
groups[groups.length - 1][label] += '\n' + value;
}
else {
groups[groups.length - 1][label] = value;
}
}
else {
groups[groups.length - 1][lastLabel] += '\n' + line.trim();
}
}
else if (Object.keys(groups[groups.length - 1]).length) {
// if empty line, means another info group starts
groups.push({});
}
}
});
return {
comments,
groups: groups.filter((group) => Object.keys(group).length > 0)
};
}