pelias-openaddresses
Version:
Pelias import pipeline for OpenAddresses.
133 lines (112 loc) • 3.67 kB
JavaScript
const fs = require('fs');
const util = require('util');
const glob = require('glob');
const path = require('path');
const _ = require('lodash');
const minimist = require('minimist');
const peliasConfig = require('pelias-config').generate();
const OpenAddressesAPI = require('../utils/OpenAddressesAPI');
/**
* Interprets the command-line arguments passed to the script.
*
* @param {array} argv Should be `process.argv.slice( 2 )`.
* @return {object} If arguments were succesfully parsed, an object that can be
* used to call `importOpenAddressesDir`:
*
* {
* dirPath: <string>,
* adminValues: <boolean>,
* }
*
* Otherwise, an error object.
*
* {
* exitCode: <number>,
* errMessage: <string>
* }
*/
function interpretUserArgs( argv, config ){
config = config || peliasConfig;
var usageMessage = [
'A tool for importing OpenAddresses data into Pelias. Usage:',
'',
'\tnode import.js --help | [--admin-values] [OPENADDRESSES_DIR]',
'',
'',
'\t--help: Print this help message.',
'',
'\tOPENADDRESSES_DIR: A directory containing OpenAddresses CSV files.',
'\t\tIf none is specified, the path from your PELIAS_CONFIG\'s',
'\t\t`imports.openaddresses.datapath` will be used.',
].join( '\n' );
argv = minimist(argv, {});
var validArgs = ['help', '_', 'parallel-count', 'parallel-id' ];
for( var arg in argv ){
if( validArgs.indexOf( arg ) === -1 ){
return {
errMessage: util.format( '`%s` is not a recognized argument.', arg ),
exitCode: 1
};
}
}
if( argv.help ){
return { errMessage: usageMessage, exitCode: 0 };
}
var opts = {
'parallel-count': argv['parallel-count'],
'parallel-id': argv['parallel-id'],
dirPath: null
};
if( argv._.length > 0 ){
opts.dirPath = argv._[ 0 ];
}
else {
opts.dirPath = config.imports.openaddresses.datapath;
}
opts.dirPath = path.normalize(opts.dirPath);
if( !fs.existsSync( opts.dirPath ) ){
return {
errMessage: util.format( 'Directory `%s` does not exist.', opts.dirPath ),
exitCode: 2
};
}
else if( !fs.statSync( opts.dirPath ).isDirectory() ){
return {
errMessage: util.format( '`%s` is not a directory.', opts.dirPath ),
exitCode: 2
};
}
return opts;
}
function getFullFileList(peliasConfig, args) {
// get the files to process
const files = _.get(peliasConfig.imports.openaddresses, 'files', []);
if (_.isEmpty(files)) {
// no specific files listed, so return all .csv and .geojson files
return glob.sync( args.dirPath + '/**/*.{csv,geojson,geojson.gz,csv.gz}' );
} else {
// otherwise return the requested files with full path
return files.map(file => {
// normalize source
const source = OpenAddressesAPI.normalize(file);
// search for files matching this source id, ending in either .geojson or .csv
const found = glob.sync(`${source}.{csv,geojson}`, { cwd: args.dirPath, absolute: true });
if (!_.isEmpty(found)) { return _.last(found); } // results are sorted, prefer .geojson
// no matching files were found, return a non-matching absolute path
return path.join(args.dirPath, file);
});
}
}
function getFileList(peliasConfig, args) {
var files = getFullFileList(peliasConfig, args);
if (args['parallel-count'] > 0 && args['parallel-id'] >= 0) {
files = files.filter(function(element, index) {
return index % args['parallel-count'] === args['parallel-id'];
});
}
return files;
}
module.exports = {
interpretUserArgs: interpretUserArgs,
getFileList: getFileList
};