geoip2-lite
Version:
MaxMind's GeoIP2 API implementation on Native NodeJS
661 lines (567 loc) • 18.1 kB
JavaScript
// fetches and converts maxmind lite databases
;
var user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.36 Safari/537.36';
var fs = require('fs');
var http = require('http');
var https = require('https');
var path = require('path');
var url = require('url');
var zlib = require('zlib');
fs.existsSync = fs.existsSync || path.existsSync;
var async = require('async');
var chalk = require('chalk');
var iconv = require('iconv-lite');
var lazy = require('lazy');
var rimraf = require('rimraf').sync;
var yauzl = require('yauzl');
var utils = require('../lib/utils');
var Address6 = require('ip-address').Address6;
var Address4 = require('ip-address').Address4;
var args = process.argv.slice(2);
var license_key = args.find(function (arg) {
return arg.match(/^license_key=[a-zA-Z0-9]+/) !== null;
});
if (typeof license_key === 'undefined' && typeof process.env.LICENSE_KEY !== 'undefined') {
license_key = 'license_key=' + process.env.LICENSE_KEY;
}
var geodatadir = args.find(function (arg) {
return arg.match(/^geodatadir=[\w./]+/) !== null;
});
if (typeof geodatadir === 'undefined' && typeof process.env.GEODATADIR !== 'undefined') {
geodatadir = 'geodatadir=' + process.env.GEODATADIR;
}
var dataPath = path.join(__dirname, '..', 'data');
if (typeof geodatadir !== 'undefined') {
dataPath = path.join(process.cwd(), geodatadir.split('=')[1]);
if (!fs.existsSync(dataPath)) {
console.log(chalk.red('ERROR') + ': Directory does\'t exist: ' + dataPath);
process.exit(1);
}
}
var tmpPath = path.join(__dirname, '..', 'tmp');
var countryLookup = {};
var cityLookup = {};
var databases = [
{
type: 'country',
url: 'https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-Country-CSV&suffix=zip&' + license_key,
checksum: 'https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-Country-CSV&suffix=zip.sha256&' + license_key,
fileName: 'GeoLite2-Country-CSV.zip',
src: [
'GeoLite2-Country-Locations-en.csv',
'GeoLite2-Country-Blocks-IPv4.csv',
'GeoLite2-Country-Blocks-IPv6.csv'
],
dest: [
'',
'geoip2-country.dat',
'geoip2-country6.dat'
]
},
{
type: 'city',
url: 'https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City-CSV&suffix=zip&' + license_key,
checksum: 'https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City-CSV&suffix=zip.sha256&' + license_key,
fileName: 'GeoLite2-City-CSV.zip',
src: [
'GeoLite2-City-Locations-en.csv',
'GeoLite2-City-Blocks-IPv4.csv',
'GeoLite2-City-Blocks-IPv6.csv'
],
dest: [
'geoip2-city-names.dat',
'geoip2-city.dat',
'geoip2-city6.dat'
]
}
];
function mkdir(name) {
var dir = path.dirname(name);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
}
// Ref: http://stackoverflow.com/questions/8493195/how-can-i-parse-a-csv-string-with-javascript
// Return array of string values, or NULL if CSV string not well formed.
// Return array of string values, or NULL if CSV string not well formed.
function try_fixing_line(line) {
var pos1 = 0;
var pos2 = -1;
// escape quotes
line = line.replace(/""/, '\\"').replace(/'/g, "\\'");
while (pos1 < line.length && pos2 < line.length) {
pos1 = pos2;
pos2 = line.indexOf(',', pos1 + 1);
if (pos2 < 0) pos2 = line.length;
if (line.indexOf("'", (pos1 || 0)) > -1 && line.indexOf("'", pos1) < pos2 && line[pos1 + 1] != '"' && line[pos2 - 1] != '"') {
line = line.substr(0, pos1 + 1) + '"' + line.substr(pos1 + 1, pos2 - pos1 - 1) + '"' + line.substr(pos2, line.length - pos2);
pos2 = line.indexOf(',', pos2 + 1);
if (pos2 < 0) pos2 = line.length;
}
}
return line;
}
function CSVtoArray(text) {
var re_valid = /^\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*(?:,\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*)*$/;
var re_value = /(?!\s*$)\s*(?:'([^'\\]*(?:\\[\S\s][^'\\]*)*)'|"([^"\\]*(?:\\[\S\s][^"\\]*)*)"|([^,'"\s\\]*(?:\s+[^,'"\s\\]+)*))\s*(?:,|$)/g;
// Return NULL if input string is not well formed CSV string.
if (!re_valid.test(text)) {
text = try_fixing_line(text);
if (!re_valid.test(text))
return null;
}
var a = []; // Initialize array to receive values.
text.replace(re_value, // "Walk" the string using replace with callback.
function (m0, m1, m2, m3) {
// Remove backslash from \' in single quoted values.
if (m1 !== undefined) a.push(m1.replace(/\\'/g, "'"));
// Remove backslash from \" in double quoted values.
else if (m2 !== undefined) a.push(m2.replace(/\\"/g, '"').replace(/\\'/g, "'"));
else if (m3 !== undefined) a.push(m3);
return ''; // Return empty string.
});
// Handle special case of empty last value.
if (/,\s*$/.test(text)) a.push('');
return a;
}
function getHTTPOptions(downloadUrl) {
var options = url.parse(downloadUrl);
options.headers = {
'User-Agent': user_agent
};
if (process.env.http_proxy || process.env.https_proxy) {
try {
var HttpsProxyAgent = require('https-proxy-agent');
options.agent = new HttpsProxyAgent(process.env.http_proxy || process.env.https_proxy);
}
catch (e) {
console.error("Install https-proxy-agent to use an HTTP/HTTPS proxy");
process.exit(-1);
}
}
return options;
}
function check(database, cb) {
if (args.indexOf("force") !== -1) {
//we are forcing database upgrade,
//so not even using checksums
return cb(null, database);
}
var checksumUrl = database.checksum;
if (typeof checksumUrl === "undefined") {
//no checksum url to check, skipping
return cb(null, database);
}
//read existing checksum file
fs.readFile(path.join(dataPath, database.type + ".checksum"), { encoding: 'utf8' }, function (err, data) {
if (!err && data && data.length) {
database.checkValue = data;
}
console.log('Checking ', database.fileName);
function onResponse(response) {
var status = response.statusCode;
if (status !== 200) {
console.log(chalk.red('ERROR') + ': HTTP Request Failed [%d %s]', status, http.STATUS_CODES[status]);
client.abort();
process.exit();
}
var str = "";
response.on("data", function (chunk) {
str += chunk;
});
response.on("end", function () {
if (str && str.length) {
if (str == database.checkValue) {
console.log(chalk.green('Database "' + database.type + '" is up to date'));
database.skip = true;
}
else {
console.log(chalk.green('Database ' + database.type + ' has new data'));
database.checkValue = str;
}
}
else {
console.log(chalk.red('ERROR') + ': Could not retrieve checksum for', database.type, chalk.red('Aborting'));
console.log('Run with "force" to update without checksum');
client.abort();
process.exit();
}
cb(null, database);
});
}
var client = https.get(getHTTPOptions(checksumUrl), onResponse);
});
}
function fetch(database, cb) {
if (database.skip) {
return cb(null, null, null, database);
}
var downloadUrl = database.url;
var fileName = database.fileName;
var gzip = path.extname(fileName) === '.gz';
if (gzip) {
fileName = fileName.replace('.gz', '');
}
var tmpFile = path.join(tmpPath, fileName);
if (fs.existsSync(tmpFile)) {
return cb(null, tmpFile, fileName, database);
}
console.log('Fetching ', fileName);
function onResponse(response) {
var status = response.statusCode;
if (status !== 200) {
console.log(chalk.red('ERROR') + ': HTTP Request Failed [%d %s]', status, http.STATUS_CODES[status]);
client.abort();
process.exit();
}
var tmpFilePipe;
var tmpFileStream = fs.createWriteStream(tmpFile);
if (gzip) {
tmpFilePipe = response.pipe(zlib.createGunzip()).pipe(tmpFileStream);
} else {
tmpFilePipe = response.pipe(tmpFileStream);
}
tmpFilePipe.on('close', function () {
console.log(chalk.green(' DONE'));
cb(null, tmpFile, fileName, database);
});
}
mkdir(tmpFile);
var client = https.get(getHTTPOptions(downloadUrl), onResponse);
process.stdout.write('Retrieving ' + fileName + ' ...');
}
function extract(tmpFile, tmpFileName, database, cb) {
if (database.skip) {
return cb(null, database);
}
if (path.extname(tmpFileName) !== '.zip') {
cb(null, database);
} else {
process.stdout.write('Extracting ' + tmpFileName + ' ...');
yauzl.open(tmpFile, { autoClose: true, lazyEntries: true }, function (err, zipfile) {
if (err) {
throw err;
}
zipfile.readEntry();
zipfile.on("entry", function (entry) {
if (/\/$/.test(entry.fileName)) {
// Directory file names end with '/'.
// Note that entries for directories themselves are optional.
// An entry's fileName implicitly requires its parent directories to exist.
zipfile.readEntry();
} else {
// file entry
zipfile.openReadStream(entry, function (err, readStream) {
if (err) {
throw err;
}
readStream.on("end", function () {
zipfile.readEntry();
});
var filePath = entry.fileName.split("/");
// filePath will always have length >= 1, as split() always returns an array of at least one string
var fileName = filePath[filePath.length - 1];
readStream.pipe(fs.createWriteStream(path.join(tmpPath, fileName)));
});
}
});
zipfile.once("end", function () {
console.log(chalk.green(' DONE'));
cb(null, database);
});
});
}
}
function processLookupCountry(src, cb) {
function processLine(line) {
var fields = CSVtoArray(line);
if (!fields || fields.length < 6) {
console.log("weird line: %s::", line);
return;
}
countryLookup[fields[0]] = fields[4];
}
var tmpDataFile = path.join(tmpPath, src);
process.stdout.write('Processing Lookup Data (may take a moment) ...');
lazy(fs.createReadStream(tmpDataFile))
.lines
.map(function (byteArray) {
return iconv.decode(byteArray, 'latin1');
})
.skip(1)
.map(processLine)
.on('pipe', function () {
console.log(chalk.green(' DONE'));
cb();
});
}
function processCountryData(src, dest, cb) {
var lines = 0;
function processLine(line) {
var fields = CSVtoArray(line);
if (!fields || fields.length < 6) {
console.log("weird line: %s::", line);
return;
}
lines++;
var sip;
var eip;
var rngip;
var cc = countryLookup[fields[1]];
var b;
var bsz;
var i;
if (cc) {
if (fields[0].match(/:/)) {
// IPv6
bsz = 34;
rngip = new Address6(fields[0]);
sip = utils.aton6(rngip.startAddress().correctForm());
eip = utils.aton6(rngip.endAddress().correctForm());
b = Buffer.alloc(bsz);
for (i = 0; i < sip.length; i++) {
b.writeUInt32BE(sip[i], i * 4);
}
for (i = 0; i < eip.length; i++) {
b.writeUInt32BE(eip[i], 16 + (i * 4));
}
} else {
// IPv4
bsz = 10;
rngip = new Address4(fields[0]);
sip = parseInt(rngip.startAddress().bigInteger(), 10);
eip = parseInt(rngip.endAddress().bigInteger(), 10);
b = Buffer.alloc(bsz);
b.fill(0);
b.writeUInt32BE(sip, 0);
b.writeUInt32BE(eip, 4);
}
b.write(cc, bsz - 2);
fs.writeSync(datFile, b, 0, bsz, null);
if (Date.now() - tstart > 5000) {
tstart = Date.now();
process.stdout.write('\nStill working (' + lines + ') ...');
}
}
}
var dataFile = path.join(dataPath, dest);
var tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
mkdir(dataFile);
process.stdout.write('Processing Data (may take a moment) ...');
var tstart = Date.now();
var datFile = fs.openSync(dataFile, "w");
lazy(fs.createReadStream(tmpDataFile))
.lines
.map(function (byteArray) {
return iconv.decode(byteArray, 'latin1');
})
.skip(1)
.map(processLine)
.on('pipe', function () {
console.log(chalk.green(' DONE'));
cb();
});
}
function processCityData(src, dest, cb) {
var lines = 0;
function processLine(line) {
if (line.match(/^Copyright/) || !line.match(/\d/)) {
return;
}
var fields = CSVtoArray(line);
if (!fields) {
console.log("weird line: %s::", line);
return;
}
var sip;
var eip;
var rngip;
var locId;
var b;
var bsz;
var lat;
var lon;
var area;
var i;
lines++;
if (fields[0].match(/:/)) {
// IPv6
var offset = 0;
bsz = 48;
rngip = new Address6(fields[0]);
sip = utils.aton6(rngip.startAddress().correctForm());
eip = utils.aton6(rngip.endAddress().correctForm());
locId = parseInt(fields[1], 10);
locId = cityLookup[locId];
b = Buffer.alloc(bsz);
b.fill(0);
for (i = 0; i < sip.length; i++) {
b.writeUInt32BE(sip[i], offset);
offset += 4;
}
for (i = 0; i < eip.length; i++) {
b.writeUInt32BE(eip[i], offset);
offset += 4;
}
b.writeUInt32BE(locId >>> 0, 32);
lat = Math.round(parseFloat(fields[7]) * 10000);
lon = Math.round(parseFloat(fields[8]) * 10000);
area = parseInt(fields[9], 10);
b.writeInt32BE(lat, 36);
b.writeInt32BE(lon, 40);
b.writeInt32BE(area, 44);
} else {
// IPv4
bsz = 24;
rngip = new Address4(fields[0]);
sip = parseInt(rngip.startAddress().bigInteger(), 10);
eip = parseInt(rngip.endAddress().bigInteger(), 10);
locId = parseInt(fields[1], 10);
locId = cityLookup[locId];
b = Buffer.alloc(bsz);
b.fill(0);
b.writeUInt32BE(sip >>> 0, 0);
b.writeUInt32BE(eip >>> 0, 4);
b.writeUInt32BE(locId >>> 0, 8);
lat = Math.round(parseFloat(fields[7]) * 10000);
lon = Math.round(parseFloat(fields[8]) * 10000);
area = parseInt(fields[9], 10);
b.writeInt32BE(lat, 12);
b.writeInt32BE(lon, 16);
b.writeInt32BE(area, 20);
}
fs.writeSync(datFile, b, 0, b.length, null);
if (Date.now() - tstart > 5000) {
tstart = Date.now();
process.stdout.write('\nStill working (' + lines + ') ...');
}
}
var dataFile = path.join(dataPath, dest);
var tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
process.stdout.write('Processing Data (may take a moment) ...');
var tstart = Date.now();
var datFile = fs.openSync(dataFile, "w");
lazy(fs.createReadStream(tmpDataFile))
.lines
.map(function (byteArray) {
return iconv.decode(byteArray, 'latin1');
})
.skip(1)
.map(processLine)
.on('pipe', cb);
}
function processCityDataNames(src, dest, cb) {
var locId = null;
var linesCount = 0;
function processLine(line) {
if (line.match(/^Copyright/) || !line.match(/\d/)) {
return;
}
var b;
var sz = 88;
var fields = CSVtoArray(line);
if (!fields) {
//lot's of cities contain ` or ' in the name and can't be parsed correctly with current method
console.log("weird line: %s::", line);
return;
}
locId = parseInt(fields[0]);
cityLookup[locId] = linesCount;
var cc = fields[4];
var rg = fields[6];
var city = fields[10];
var metro = parseInt(fields[11]);
//other possible fields to include
var tz = fields[12];
var eu = fields[13];
b = Buffer.alloc(sz);
b.fill(0);
b.write(cc, 0);//country code
b.write(rg, 2);//region
if (metro) {
b.writeInt32BE(metro, 5);
}
b.write(eu, 9);//is in eu
b.write(tz, 10);//timezone
b.write(city, 42);//cityname
fs.writeSync(datFile, b, 0, b.length, null);
linesCount++;
}
var dataFile = path.join(dataPath, dest);
var tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
var datFile = fs.openSync(dataFile, "w");
lazy(fs.createReadStream(tmpDataFile))
.lines
.map(function (byteArray) {
return iconv.decode(byteArray, 'utf-8');
})
.skip(1)
.map(processLine)
.on('pipe', cb);
}
function processData(database, cb) {
if (database.skip) {
return cb(null, database);
}
var type = database.type;
var src = database.src;
var dest = database.dest;
if (type === 'country') {
if (Array.isArray(src)) {
processLookupCountry(src[0], function () {
processCountryData(src[1], dest[1], function () {
processCountryData(src[2], dest[2], function () {
cb(null, database);
});
});
});
}
else {
processCountryData(src, dest, function () {
cb(null, database);
});
}
} else if (type === 'city') {
processCityDataNames(src[0], dest[0], function () {
processCityData(src[1], dest[1], function () {
console.log("city data processed");
processCityData(src[2], dest[2], function () {
console.log(chalk.green(' DONE'));
cb(null, database);
});
});
});
}
}
function updateChecksum(database, cb) {
if (database.skip || !database.checkValue) {
//don't need to update checksums cause it was not fetched or did not change
return cb();
}
fs.writeFile(path.join(dataPath, database.type + ".checksum"), database.checkValue, 'utf8', function (err) {
if (err) console.log(chalk.red('Failed to Update checksums.'), "Database:", database.type);
cb();
});
}
if (!license_key) {
console.log(chalk.red('ERROR') + ': Missing license_key');
process.exit(1);
}
rimraf(tmpPath);
mkdir(tmpPath);
async.eachSeries(databases, function (database, nextDatabase) {
async.seq(check, fetch, extract, processData, updateChecksum)(database, nextDatabase);
}, function (err) {
if (err) {
console.log(chalk.red('Failed to Update Databases from MaxMind.'), err);
process.exit(1);
} else {
console.log(chalk.green('Successfully Updated Databases from MaxMind.'));
if (args.indexOf("debug") !== -1) console.log(chalk.yellow.bold('Notice: temporary files are not deleted for debug purposes.'));
else rimraf(tmpPath);
process.exit(0);
}
});