datapackage
Version:
Utilities to work with Data Packages as defined on specs.frictionlessdata.io
563 lines (448 loc) • 17.8 kB
JavaScript
'use strict';
var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }();
// Retrieve descriptor
var retrieveDescriptor = function () {
var _ref = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee(descriptor) {
var response, message, _message, contents, _message2;
return regeneratorRuntime.wrap(function _callee$(_context) {
while (1) {
switch (_context.prev = _context.next) {
case 0:
if (!isPlainObject(descriptor)) {
_context.next = 2;
break;
}
return _context.abrupt('return', cloneDeep(descriptor));
case 2:
if (!isString(descriptor)) {
_context.next = 29;
break;
}
if (!isRemotePath(descriptor)) {
_context.next = 17;
break;
}
_context.prev = 4;
_context.next = 7;
return axios.get(descriptor);
case 7:
response = _context.sent;
return _context.abrupt('return', response.data);
case 11:
_context.prev = 11;
_context.t0 = _context['catch'](4);
message = 'Can not retrieve remote descriptor "' + descriptor + '"';
throw new DataPackageError(message);
case 15:
_context.next = 29;
break;
case 17:
if (!config.IS_BROWSER) {
_context.next = 20;
break;
}
_message = 'Local descriptor "' + descriptor + '" in browser is not supported';
throw new DataPackageError(_message);
case 20:
_context.prev = 20;
// TODO: rebase on promisified fs.readFile (async)
contents = fs.readFileSync(descriptor, 'utf-8');
return _context.abrupt('return', JSON.parse(contents));
case 25:
_context.prev = 25;
_context.t1 = _context['catch'](20);
_message2 = 'Can not retrieve local descriptor "' + descriptor + '"';
throw new DataPackageError(_message2);
case 29:
throw new DataPackageError('Descriptor must be String or Object');
case 30:
case 'end':
return _context.stop();
}
}
}, _callee, this, [[4, 11], [20, 25]]);
}));
return function retrieveDescriptor(_x) {
return _ref.apply(this, arguments);
};
}();
// Dereference descriptor
var dereferencePackageDescriptor = function () {
var _ref2 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee2(descriptor, basePath) {
var _iteratorNormalCompletion, _didIteratorError, _iteratorError, _iterator, _step, _step$value, index, resource;
return regeneratorRuntime.wrap(function _callee2$(_context2) {
while (1) {
switch (_context2.prev = _context2.next) {
case 0:
descriptor = cloneDeep(descriptor);
_iteratorNormalCompletion = true;
_didIteratorError = false;
_iteratorError = undefined;
_context2.prev = 4;
_iterator = (descriptor.resources || []).entries()[Symbol.iterator]();
case 6:
if (_iteratorNormalCompletion = (_step = _iterator.next()).done) {
_context2.next = 14;
break;
}
_step$value = _slicedToArray(_step.value, 2), index = _step$value[0], resource = _step$value[1];
_context2.next = 10;
return dereferenceResourceDescriptor(resource, basePath, descriptor);
case 10:
descriptor.resources[index] = _context2.sent;
case 11:
_iteratorNormalCompletion = true;
_context2.next = 6;
break;
case 14:
_context2.next = 20;
break;
case 16:
_context2.prev = 16;
_context2.t0 = _context2['catch'](4);
_didIteratorError = true;
_iteratorError = _context2.t0;
case 20:
_context2.prev = 20;
_context2.prev = 21;
if (!_iteratorNormalCompletion && _iterator.return) {
_iterator.return();
}
case 23:
_context2.prev = 23;
if (!_didIteratorError) {
_context2.next = 26;
break;
}
throw _iteratorError;
case 26:
return _context2.finish(23);
case 27:
return _context2.finish(20);
case 28:
return _context2.abrupt('return', descriptor);
case 29:
case 'end':
return _context2.stop();
}
}
}, _callee2, this, [[4, 16, 20, 28], [21,, 23, 27]]);
}));
return function dereferencePackageDescriptor(_x2, _x3) {
return _ref2.apply(this, arguments);
};
}();
var dereferenceResourceDescriptor = function () {
var _ref3 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee3(descriptor, basePath, baseDescriptor) {
var PROPERTIES, _iteratorNormalCompletion2, _didIteratorError2, _iteratorError2, _iterator2, _step2, property, value, message, response, _message3, _message4, _message5, _message6, fullPath, contents, _message7;
return regeneratorRuntime.wrap(function _callee3$(_context3) {
while (1) {
switch (_context3.prev = _context3.next) {
case 0:
descriptor = cloneDeep(descriptor);
baseDescriptor = baseDescriptor || descriptor;
PROPERTIES = ['schema', 'dialect'];
_iteratorNormalCompletion2 = true;
_didIteratorError2 = false;
_iteratorError2 = undefined;
_context3.prev = 6;
_iterator2 = PROPERTIES[Symbol.iterator]();
case 8:
if (_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done) {
_context3.next = 63;
break;
}
property = _step2.value;
value = descriptor[property];
// URI -> No
if (isString(value)) {
_context3.next = 15;
break;
}
return _context3.abrupt('continue', 60);
case 15:
if (!value.startsWith('#')) {
_context3.next = 26;
break;
}
_context3.prev = 16;
descriptor[property] = jsonpointer.get(baseDescriptor, value.slice(1));
_context3.next = 24;
break;
case 20:
_context3.prev = 20;
_context3.t0 = _context3['catch'](16);
message = 'Not resolved Pointer URI "' + value + '" for resource.' + property;
throw new DataPackageError(message);
case 24:
_context3.next = 60;
break;
case 26:
if (basePath && isRemotePath(basePath)) {
// TODO: support other that Unix OS
value = [basePath, value].join('/');
}
if (!isRemotePath(value)) {
_context3.next = 41;
break;
}
_context3.prev = 28;
_context3.next = 31;
return axios.get(value);
case 31:
response = _context3.sent;
descriptor[property] = response.data;
_context3.next = 39;
break;
case 35:
_context3.prev = 35;
_context3.t1 = _context3['catch'](28);
_message3 = 'Not resolved Remote URI "' + value + '" for resource.' + property;
throw new DataPackageError(_message3);
case 39:
_context3.next = 60;
break;
case 41:
if (!config.IS_BROWSER) {
_context3.next = 44;
break;
}
_message4 = 'Local URI dereferencing in browser is not supported';
throw new DataPackageError(_message4);
case 44:
if (isSafePath(value)) {
_context3.next = 47;
break;
}
_message5 = 'Not safe path in Local URI "' + value + '" for resource.' + property;
throw new DataPackageError(_message5);
case 47:
if (basePath) {
_context3.next = 50;
break;
}
_message6 = 'Local URI "' + value + '" requires base path for resource.' + property;
throw new DataPackageError(_message6);
case 50:
_context3.prev = 50;
// TODO: support other that Unix OS
fullPath = [basePath, value].join('/');
// TODO: rebase on promisified fs.readFile (async)
contents = fs.readFileSync(fullPath, 'utf-8');
descriptor[property] = JSON.parse(contents);
_context3.next = 60;
break;
case 56:
_context3.prev = 56;
_context3.t2 = _context3['catch'](50);
_message7 = 'Not resolved Local URI "' + value + '" for resource.' + property;
throw new DataPackageError(_message7);
case 60:
_iteratorNormalCompletion2 = true;
_context3.next = 8;
break;
case 63:
_context3.next = 69;
break;
case 65:
_context3.prev = 65;
_context3.t3 = _context3['catch'](6);
_didIteratorError2 = true;
_iteratorError2 = _context3.t3;
case 69:
_context3.prev = 69;
_context3.prev = 70;
if (!_iteratorNormalCompletion2 && _iterator2.return) {
_iterator2.return();
}
case 72:
_context3.prev = 72;
if (!_didIteratorError2) {
_context3.next = 75;
break;
}
throw _iteratorError2;
case 75:
return _context3.finish(72);
case 76:
return _context3.finish(69);
case 77:
return _context3.abrupt('return', descriptor);
case 78:
case 'end':
return _context3.stop();
}
}
}, _callee3, this, [[6, 65, 69, 77], [16, 20], [28, 35], [50, 56], [70,, 72, 76]]);
}));
return function dereferenceResourceDescriptor(_x4, _x5, _x6) {
return _ref3.apply(this, arguments);
};
}();
// Expand descriptor
function _objectEntries(obj) {
var entries = [];
var keys = Object.keys(obj);
for (var k = 0; k < keys.length; ++k) entries.push([keys[k], obj[keys[k]]]);
return entries;
}
function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; }
var fs = require('fs');
var axios = require('axios');
var pathModule = require('path');
var isString = require('lodash/isString');
var cloneDeep = require('lodash/cloneDeep');
var isPlainObject = require('lodash/isPlainObject');
var jsonpointer = require('json-pointer');
var _require = require('./errors'),
DataPackageError = _require.DataPackageError;
var config = require('./config');
var omit = require('lodash/omit');
// Locate descriptor
function locateDescriptor(descriptor) {
var basePath = void 0;
// Infer from path/url
if (isString(descriptor)) {
basePath = descriptor.split('/').slice(0, -1).join('/') || '.';
// Current dir by default
} else {
basePath = '.';
}
return basePath;
}function expandPackageDescriptor(descriptor) {
descriptor = cloneDeep(descriptor);
descriptor.profile = descriptor.profile || config.DEFAULT_DATA_PACKAGE_PROFILE;
var _iteratorNormalCompletion3 = true;
var _didIteratorError3 = false;
var _iteratorError3 = undefined;
try {
for (var _iterator3 = (descriptor.resources || []).entries()[Symbol.iterator](), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) {
var _step3$value = _slicedToArray(_step3.value, 2),
index = _step3$value[0],
resource = _step3$value[1];
descriptor.resources[index] = expandResourceDescriptor(resource);
}
} catch (err) {
_didIteratorError3 = true;
_iteratorError3 = err;
} finally {
try {
if (!_iteratorNormalCompletion3 && _iterator3.return) {
_iterator3.return();
}
} finally {
if (_didIteratorError3) {
throw _iteratorError3;
}
}
}
return descriptor;
}
function expandResourceDescriptor(descriptor) {
descriptor = cloneDeep(descriptor);
descriptor.profile = descriptor.profile || config.DEFAULT_RESOURCE_PROFILE;
descriptor.encoding = descriptor.encoding || config.DEFAULT_RESOURCE_ENCODING;
if (descriptor.profile === 'tabular-data-resource') {
// Schema
var schema = descriptor.schema;
if (schema !== undefined) {
var _iteratorNormalCompletion4 = true;
var _didIteratorError4 = false;
var _iteratorError4 = undefined;
try {
for (var _iterator4 = (schema.fields || [])[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) {
var field = _step4.value;
field.type = field.type || config.DEFAULT_FIELD_TYPE;
field.format = field.format || config.DEFAULT_FIELD_FORMAT;
}
} catch (err) {
_didIteratorError4 = true;
_iteratorError4 = err;
} finally {
try {
if (!_iteratorNormalCompletion4 && _iterator4.return) {
_iterator4.return();
}
} finally {
if (_didIteratorError4) {
throw _iteratorError4;
}
}
}
schema.missingValues = schema.missingValues || config.DEFAULT_MISSING_VALUES;
}
// Dialect
var dialect = descriptor.dialect;
if (dialect !== undefined) {
var _iteratorNormalCompletion5 = true;
var _didIteratorError5 = false;
var _iteratorError5 = undefined;
try {
for (var _iterator5 = _objectEntries(filterDefaultDialect(validateDialect(dialect)))[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) {
var _step5$value = _slicedToArray(_step5.value, 2),
key = _step5$value[0],
value = _step5$value[1];
if (!dialect.hasOwnProperty(key)) {
dialect[key] = value;
}
}
} catch (err) {
_didIteratorError5 = true;
_iteratorError5 = err;
} finally {
try {
if (!_iteratorNormalCompletion5 && _iterator5.return) {
_iterator5.return();
}
} finally {
if (_didIteratorError5) {
throw _iteratorError5;
}
}
}
}
}
return descriptor;
}
// Miscellaneous
// quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification
function filterDefaultDialect() {
var dialect = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
var defaultDialects = dialect.hasOwnProperty('escapeChar') ? omit(config.DEFAULT_DIALECT, 'quoteChar') : config.DEFAULT_DIALECT;
return defaultDialects;
}
// quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification
function validateDialect() {
var dialect = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
if (dialect.hasOwnProperty('escapeChar') && dialect.hasOwnProperty('quoteChar')) {
throw new DataPackageError('Resource.table dialect options quoteChar and escapeChar are mutually exclusive.');
}
return dialect;
}
function isRemotePath(path) {
// TODO: improve implementation
return path.startsWith('http');
}
function isSafePath(path) {
var containsWindowsVar = function containsWindowsVar(path) {
return path.match(/%.+%/);
};
var containsPosixVar = function containsPosixVar(path) {
return path.match(/\$.+/);
};
// Safety checks
var unsafenessConditions = [pathModule.isAbsolute(path), path.includes('..' + pathModule.sep), path.startsWith('~'), containsWindowsVar(path), containsPosixVar(path)];
return !unsafenessConditions.some(Boolean);
}
// System
module.exports = {
locateDescriptor: locateDescriptor,
retrieveDescriptor: retrieveDescriptor,
dereferencePackageDescriptor: dereferencePackageDescriptor,
dereferenceResourceDescriptor: dereferenceResourceDescriptor,
expandPackageDescriptor: expandPackageDescriptor,
expandResourceDescriptor: expandResourceDescriptor,
validateDialect: validateDialect,
isRemotePath: isRemotePath,
isSafePath: isSafePath
};