geoip-lite2
Version:
A light weight native JavaScript implementation of GeoIP API from MaxMind. Improved and faster version by Sefinek.
609 lines (510 loc) • 17.9 kB
JavaScript
// Fetches and converts MaxMind lite databases
;
const { name, version } = require('../package.json');
const UserAgent = `Mozilla/5.0 (compatible; ${name}/${version}; +https://github.com/sefinek/geoip-lite2)`;
const fs = require('fs');
const http = require('http');
const https = require('https');
const path = require('path');
const zlib = require('zlib');
const readline = require('readline');
const async = require('async');
const { decodeStream } = require('iconv-lite');
const rimraf = require('rimraf').sync;
const AdmZip = require('adm-zip');
const utils = require('../lib/utils.js');
const { Address6, Address4 } = require('ip-address');
const args = process.argv.slice(2);
let license_key = args.find(arg => arg.match(/^license_key=[a-zA-Z0-9]+/) !== null);
if (typeof license_key === 'undefined' && typeof process.env.LICENSE_KEY !== 'undefined') {
license_key = `license_key=${process.env.LICENSE_KEY}`;
}
let geoDataDir = args.find(arg => arg.match(/^geoDataDir=[\w./]+/) !== null);
if (typeof geoDataDir === 'undefined' && typeof process.env.GEODATADIR !== 'undefined') {
geoDataDir = `geoDataDir=${process.env.GEODATADIR}`;
}
let dataPath = path.resolve(__dirname, '..', 'geoip-data');
if (typeof geoDataDir !== 'undefined') {
dataPath = path.resolve(process.cwd(), geoDataDir.split('=')[1]);
if (!fs.existsSync(dataPath)) {
console.log('ERROR: Directory doesn\'t exist: ' + dataPath);
process.exit(1);
}
}
const tmpPath = process.env.GEOTMPDIR || path.resolve(__dirname, '..', 'tmp');
const countryLookup = {};
const cityLookup = { NaN: -1 };
const databases = [{
type: 'country',
url: `https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-Country-CSV&suffix=zip&${license_key}`,
checksum: `https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-Country-CSV&suffix=zip.sha256&${license_key}`,
fileName: 'GeoLite2-Country-CSV.zip',
src: [
'GeoLite2-Country-Locations-en.csv',
'GeoLite2-Country-Blocks-IPv4.csv',
'GeoLite2-Country-Blocks-IPv6.csv',
],
dest: ['', 'geoip-country.dat', 'geoip-country6.dat'],
},
{
type: 'city',
url: `https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City-CSV&suffix=zip&${license_key}`,
checksum: `https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City-CSV&suffix=zip.sha256&${license_key}`,
fileName: 'GeoLite2-City-CSV.zip',
src: [
'GeoLite2-City-Locations-en.csv',
'GeoLite2-City-Blocks-IPv4.csv',
'GeoLite2-City-Blocks-IPv6.csv',
],
dest: ['geoip-city-names.dat', 'geoip-city.dat', 'geoip-city6.dat'],
}];
function mkdir(dirName) {
const dir = path.dirname(dirName);
if (!fs.existsSync(dir)) fs.mkdirSync(dir);
}
// Ref: http://stackoverflow.com/questions/8493195/how-can-i-parse-a-csv-string-with-javascript
// Return array of string values, or NULL if CSV string not well-formed.
function tryFixingLine(line) {
let pos1 = 0;
let pos2 = -1;
// Escape quotes
line = line.replace(/""/, '\\"').replace(/'/g, '\\\'');
while (pos1 < line.length && pos2 < line.length) {
pos1 = pos2;
pos2 = line.indexOf(',', pos1 + 1);
if (pos2 < 0) pos2 = line.length;
if (line.indexOf('\'', (pos1 || 0)) > -1 && line.indexOf('\'', pos1) < pos2 && line[pos1 + 1] != '"' && line[pos2 - 1] != '"') {
line = line.substr(0, pos1 + 1) + '"' + line.substr(pos1 + 1, pos2 - pos1 - 1) + '"' + line.substr(pos2, line.length - pos2);
pos2 = line.indexOf(',', pos2 + 1);
if (pos2 < 0) pos2 = line.length;
}
}
return line;
}
const re_valid = /^\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*(?:,\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*)*$/;
const re_value = /(?!\s*$)\s*(?:'([^'\\]*(?:\\[\S\s][^'\\]*)*)'|"([^"\\]*(?:\\[\S\s][^"\\]*)*)"|([^,'"\s\\]*(?:\s+[^,'"\s\\]+)*))\s*(?:,|$)/g;
function CSVtoArray(text) {
// Return NULL if input string is not well-formed CSV string.
if (!re_valid.test(text)) {
text = tryFixingLine(text);
if (!re_valid.test(text)) return null;
}
const a = []; // Initialize array to receive values.
text.replace(re_value, // "Walk" the string using replace with callback.
(m0, m1, m2, m3) => {
// Remove backslash from \' in single quoted values.
if (m1 !== undefined) a.push(m1.replace(/\\'/g, '\''));
// Remove backslash from \" in double-quoted values.
else if (m2 !== undefined) a.push(m2.replace(/\\"/g, '"').replace(/\\'/g, '\''));
else if (m3 !== undefined) a.push(m3);
return ''; // Return empty string.
});
// Handle special case of empty last value.
if ((/,\s*$/).test(text)) a.push('');
return a;
}
function getHTTPOptions(downloadUrl) {
const parsedUrl = new URL(downloadUrl);
const options = {
protocol: parsedUrl.protocol,
host: parsedUrl.host,
path: parsedUrl.pathname + parsedUrl.search,
headers: { 'User-Agent': UserAgent },
};
if (process.env.http_proxy || process.env.https_proxy) {
try {
const HttpsProxyAgent = require('https-proxy-agent');
options.agent = new HttpsProxyAgent(process.env.http_proxy || process.env.https_proxy);
} catch (err) {
console.error(`Install https-proxy-agent to use an HTTP/HTTPS proxy. ${err.message}`);
process.exit(-1);
}
}
return options;
}
function check(database, cb) {
if (args.indexOf('force') !== -1) {
// We are forcing database upgrade,
// So not even using checksums
return cb(null, database);
}
const checksumUrl = database.checksum;
if (typeof checksumUrl === 'undefined') return cb(null, database); // No checksum url to check, skipping
// Read existing checksum file
fs.readFile(path.join(dataPath, `${database.type}.checksum`), { encoding: 'utf8' }, (err, data) => {
if (!err && data && data.length) database.checkValue = data;
console.log('Checking', database.fileName);
function onResponse(response) {
const status = response.statusCode;
if ([301, 302, 303, 307, 308].includes(status)) {
return https.get(getHTTPOptions(response.headers.location), onResponse);
} else if (status !== 200) {
console.error(response.data);
console.error('ERROR: HTTP Request Failed [%d %s]', status, http.STATUS_CODES[status]);
client.end();
process.exit(1);
}
let str = '';
response.on('data', chunk => {
str += chunk;
});
response.on('end', () => {
if (str && str.length) {
if (str === database.checkValue) {
console.log(`Database "${database.type}" is up to date`);
database.skip = true;
} else {
console.log(`Database "${database.type}" has new data`);
database.checkValue = str;
}
}
else {
console.error(`ERROR: Could not retrieve checksum for ${database.type}. Aborting.`);
console.error('Run with "force" to update without checksum');
client.end();
process.exit(1);
}
cb(null, database);
});
}
var client = https.get(getHTTPOptions(checksumUrl), onResponse);
});
}
function fetch(database, cb) {
if (database.skip) return cb(null, null, null, database);
const downloadUrl = database.url;
let fileName = database.fileName;
const gzip = path.extname(fileName) === '.gz';
if (gzip) fileName = fileName.replace('.gz', '');
const tmpFile = path.join(tmpPath, fileName);
if (fs.existsSync(tmpFile)) return cb(null, tmpFile, fileName, database);
console.log('Fetching', fileName);
function onResponse(response) {
const status = response.statusCode;
if ([301, 302, 303, 307, 308].includes(status)) {
return https.get(getHTTPOptions(response.headers.location), onResponse);
} else if (status !== 200) {
console.error('ERROR: HTTP Request Failed [%d %s]', status, http.STATUS_CODES[status]);
client.end();
process.exit(1);
}
let tmpFilePipe;
const tmpFileStream = fs.createWriteStream(tmpFile);
if (gzip) {
tmpFilePipe = response.pipe(zlib.createGunzip()).pipe(tmpFileStream);
} else {
tmpFilePipe = response.pipe(tmpFileStream);
}
tmpFilePipe.on('close', () => {
console.log(' DONE');
cb(null, tmpFile, fileName, database);
});
}
mkdir(tmpFile);
var client = https.get(getHTTPOptions(downloadUrl), onResponse);
process.stdout.write(`Retrieving ${fileName}...`);
}
function extract(tmpFile, tmpFileName, database, cb) {
if (database.skip) return cb(null, database);
if (path.extname(tmpFileName) !== '.zip') {
cb(null, database);
} else {
process.stdout.write('Extracting ' + tmpFileName + '...');
const zip = new AdmZip(tmpFile);
const zipEntries = zip.getEntries();
zipEntries.forEach((entry) => {
if (entry.isDirectory) return; // Skip directory entries
const filePath = entry.entryName.split('/');
const fileName = filePath[filePath.length - 1];
const destinationPath = path.join(tmpPath, fileName);
fs.writeFileSync(destinationPath, entry.getData());
});
console.log(' DONE');
cb(null, database);
}
}
function processLookupCountry(src, cb) {
function processLine(line) {
const fields = CSVtoArray(line);
if (!fields || fields.length < 6) {
console.log('Weird line: %s::', line);
return;
}
countryLookup[fields[0]] = fields[4];
}
const tmpDataFile = path.join(tmpPath, src);
process.stdout.write('Processing lookup data (may take a moment)...');
const rl = readline.createInterface({ input: fs.createReadStream(tmpDataFile).pipe(decodeStream('latin1')), output: process.stdout, terminal: false });
let lineCount = 0;
rl.on('line', line => {
if (lineCount > 0) processLine(line);
lineCount++;
});
rl.on('close', () => {
console.log(' DONE');
cb();
});
}
async function processCountryData(src, dest) {
let lines = 0;
function processLine(line) {
const fields = CSVtoArray(line);
if (!fields || fields.length < 6) return console.warn('weird line: %s::', line);
lines++;
let sip;
let eip;
let rngip;
const cc = countryLookup[fields[1]];
let b;
let bsz;
let i;
if (cc) {
if (fields[0].match(/:/)) {
// IPv6
bsz = 34;
rngip = new Address6(fields[0]);
sip = utils.aton6(rngip.startAddress().correctForm());
eip = utils.aton6(rngip.endAddress().correctForm());
b = Buffer.alloc(bsz);
for (i = 0; i < sip.length; i++) {
b.writeUInt32BE(sip[i], i * 4);
}
for (i = 0; i < eip.length; i++) {
b.writeUInt32BE(eip[i], 16 + (i * 4));
}
} else {
// IPv4
bsz = 10;
rngip = new Address4(fields[0]);
sip = parseInt(rngip.startAddress().bigInt(), 10);
eip = parseInt(rngip.endAddress().bigInt(), 10);
b = Buffer.alloc(bsz);
b.fill(0);
b.writeUInt32BE(sip, 0);
b.writeUInt32BE(eip, 4);
}
b.write(cc, bsz - 2);
if (Date.now() - tstart > 5000) {
tstart = Date.now();
process.stdout.write(`\nStill working (${lines})...`);
}
if (datFile._writableState.needDrain) {
return new Promise(resolve => {
datFile.write(b, resolve);
});
} else {
return datFile.write(b);
}
}
}
const dataFile = path.join(dataPath, dest);
const tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
mkdir(dataFile);
process.stdout.write('\nProcessing data (may take a moment)...');
var tstart = Date.now();
var datFile = fs.createWriteStream(dataFile);
const rl = readline.createInterface({ input: fs.createReadStream(tmpDataFile), crlfDelay: Infinity });
let i = 0;
for await (const line of rl) {
i++;
if (i === 1) continue;
await processLine(line);
}
datFile.close();
console.log(' DONE');
}
async function processCityData(src, dest) {
let lines = 0;
async function processLine(line) {
if (line.match(/^Copyright/) || !line.match(/\d/)) return;
const fields = CSVtoArray(line);
if (!fields) return console.warn('Weird line: %s::', line);
let sip;
let eip;
let rngip;
let locId;
let b;
let bsz;
let lat;
let lon;
let area;
let i;
lines++;
if (fields[0].match(/:/)) {
// IPv6
let offset = 0;
bsz = 48;
rngip = new Address6(fields[0]);
sip = utils.aton6(rngip.startAddress().correctForm());
eip = utils.aton6(rngip.endAddress().correctForm());
locId = parseInt(fields[1], 10);
locId = cityLookup[locId];
b = Buffer.alloc(bsz);
b.fill(0);
for (i = 0; i < sip.length; i++) {
b.writeUInt32BE(sip[i], offset);
offset += 4;
}
for (i = 0; i < eip.length; i++) {
b.writeUInt32BE(eip[i], offset);
offset += 4;
}
b.writeUInt32BE(locId >>> 0, 32);
lat = Math.round(parseFloat(fields[7]) * 10000);
lon = Math.round(parseFloat(fields[8]) * 10000);
area = parseInt(fields[9], 10);
b.writeInt32BE(lat, 36);
b.writeInt32BE(lon, 40);
b.writeInt32BE(area, 44);
} else {
// IPv4
bsz = 24;
rngip = new Address4(fields[0]);
sip = parseInt(rngip.startAddress().bigInt(), 10);
eip = parseInt(rngip.endAddress().bigInt(), 10);
locId = parseInt(fields[1], 10);
locId = cityLookup[locId];
b = Buffer.alloc(bsz);
b.fill(0);
b.writeUInt32BE(sip >>> 0, 0);
b.writeUInt32BE(eip >>> 0, 4);
b.writeUInt32BE(locId >>> 0, 8);
lat = Math.round(parseFloat(fields[7]) * 10000);
lon = Math.round(parseFloat(fields[8]) * 10000);
area = parseInt(fields[9], 10);
b.writeInt32BE(lat, 12);
b.writeInt32BE(lon, 16);
b.writeInt32BE(area, 20);
}
if (Date.now() - tstart > 5000) {
tstart = Date.now();
process.stdout.write('\nStill working (' + lines + ')...');
}
if (datFile._writableState.needDrain) {
return new Promise((resolve) => {
datFile.write(b, resolve);
});
} else {
return datFile.write(b);
}
}
const dataFile = path.join(dataPath, dest);
const tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
process.stdout.write('\nProcessing data (may take a moment)...');
var tstart = Date.now();
var datFile = fs.createWriteStream(dataFile);
const rl = readline.createInterface({ input: fs.createReadStream(tmpDataFile), crlfDelay: Infinity });
let i = 0;
for await (const line of rl) {
i++;
if (i === 1) continue;
await processLine(line);
}
datFile.close();
}
function processCityDataNames(src, dest, cb) {
let locId = null;
let linesCount = 0;
function processLine(line) {
if (line.match(/^Copyright/) || !line.match(/\d/)) return;
const b = Buffer.alloc(88);
const fields = CSVtoArray(line);
if (!fields) {
// Lots of cities contain ` or ' in the name and can't be parsed correctly with current method
console.warn('Weird line: %s::', line);
return;
}
locId = parseInt(fields[0]);
cityLookup[locId] = linesCount;
const cc = fields[4];
const rg = fields[6];
const city = fields[10];
const metro = parseInt(fields[11]);
// Other possible fields to include
const tz = fields[12];
const eu = fields[13];
b.fill(0);
b.write(cc, 0); // Country code
b.write(rg, 2); // Region
if (metro) b.writeInt32BE(metro, 5);
b.write(eu, 9); // Is in eu
b.write(tz, 10); // Timezone
b.write(city, 42); // City name
fs.writeSync(datFile, b, 0, b.length, null);
linesCount++;
}
const dataFile = path.join(dataPath, dest);
const tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
var datFile = fs.openSync(dataFile, 'w');
const rl = readline.createInterface({ input: fs.createReadStream(tmpDataFile).pipe(decodeStream('utf-8')), output: process.stdout, terminal: false });
let lineCount = 0;
rl.on('line', line => {
if (lineCount > 0) processLine(line);
lineCount++;
});
rl.on('close', cb);
}
function processData(database, cb) {
if (database.skip) return cb(null, database);
const type = database.type;
const src = database.src;
const dest = database.dest;
if (type === 'country') {
if (Array.isArray(src)) {
processLookupCountry(src[0], () => {
processCountryData(src[1], dest[1]).then(() => {
return processCountryData(src[2], dest[2]);
}).then(() => {
cb(null, database);
});
});
}
else {
processCountryData(src, dest, () => {
cb(null, database);
});
}
} else if (type === 'city') {
processCityDataNames(src[0], dest[0], () => {
processCityData(src[1], dest[1]).then(() => {
console.log('\nCity data processed');
return processCityData(src[2], dest[2]);
}).then(() => {
console.log(' DONE');
cb(null, database);
});
});
}
}
function updateChecksum(database, cb) {
if (database.skip || !database.checkValue) return cb(); // Don't need to update checksums because it was not fetched or did not change
fs.writeFile(path.join(dataPath, database.type + '.checksum'), database.checkValue, 'utf8', err => {
if (err) console.log('Failed to Update checksums! Database:', database.type);
cb();
});
}
if (!license_key) {
console.error('ERROR: Missing license_key');
process.exit(1);
}
rimraf(tmpPath);
mkdir(tmpPath);
async.eachSeries(databases, (database, nextDatabase) => {
async.seq(check, fetch, extract, processData, updateChecksum)(database, nextDatabase);
}, err => {
if (err) {
console.error('Failed to update databases from MaxMind!', err);
process.exit(1);
} else {
console.log('Successfully updated databases from MaxMind');
if (args.indexOf('debug') !== -1) {
console.debug('Notice: temporary files are not deleted for debug purposes');
} else {
rimraf(tmpPath);
}
process.exit(0);
}
});