UNPKG

datapackage

Version:

Utilities to work with Data Packages as defined on specs.frictionlessdata.io

563 lines (448 loc) 17.8 kB
'use strict'; var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }(); // Retrieve descriptor var retrieveDescriptor = function () { var _ref = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee(descriptor) { var response, message, _message, contents, _message2; return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: if (!isPlainObject(descriptor)) { _context.next = 2; break; } return _context.abrupt('return', cloneDeep(descriptor)); case 2: if (!isString(descriptor)) { _context.next = 29; break; } if (!isRemotePath(descriptor)) { _context.next = 17; break; } _context.prev = 4; _context.next = 7; return axios.get(descriptor); case 7: response = _context.sent; return _context.abrupt('return', response.data); case 11: _context.prev = 11; _context.t0 = _context['catch'](4); message = 'Can not retrieve remote descriptor "' + descriptor + '"'; throw new DataPackageError(message); case 15: _context.next = 29; break; case 17: if (!config.IS_BROWSER) { _context.next = 20; break; } _message = 'Local descriptor "' + descriptor + '" in browser is not supported'; throw new DataPackageError(_message); case 20: _context.prev = 20; // TODO: rebase on promisified fs.readFile (async) contents = fs.readFileSync(descriptor, 'utf-8'); return _context.abrupt('return', JSON.parse(contents)); case 25: _context.prev = 25; _context.t1 = _context['catch'](20); _message2 = 'Can not retrieve local descriptor "' + descriptor + '"'; throw new DataPackageError(_message2); case 29: throw new DataPackageError('Descriptor must be String or Object'); case 30: case 'end': return _context.stop(); } } }, _callee, this, [[4, 11], [20, 25]]); })); return function retrieveDescriptor(_x) { return _ref.apply(this, arguments); }; }(); // Dereference descriptor var dereferencePackageDescriptor = function () { var _ref2 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee2(descriptor, basePath) { var _iteratorNormalCompletion, _didIteratorError, _iteratorError, _iterator, _step, _step$value, index, resource; return regeneratorRuntime.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: descriptor = cloneDeep(descriptor); _iteratorNormalCompletion = true; _didIteratorError = false; _iteratorError = undefined; _context2.prev = 4; _iterator = (descriptor.resources || []).entries()[Symbol.iterator](); case 6: if (_iteratorNormalCompletion = (_step = _iterator.next()).done) { _context2.next = 14; break; } _step$value = _slicedToArray(_step.value, 2), index = _step$value[0], resource = _step$value[1]; _context2.next = 10; return dereferenceResourceDescriptor(resource, basePath, descriptor); case 10: descriptor.resources[index] = _context2.sent; case 11: _iteratorNormalCompletion = true; _context2.next = 6; break; case 14: _context2.next = 20; break; case 16: _context2.prev = 16; _context2.t0 = _context2['catch'](4); _didIteratorError = true; _iteratorError = _context2.t0; case 20: _context2.prev = 20; _context2.prev = 21; if (!_iteratorNormalCompletion && _iterator.return) { _iterator.return(); } case 23: _context2.prev = 23; if (!_didIteratorError) { _context2.next = 26; break; } throw _iteratorError; case 26: return _context2.finish(23); case 27: return _context2.finish(20); case 28: return _context2.abrupt('return', descriptor); case 29: case 'end': return _context2.stop(); } } }, _callee2, this, [[4, 16, 20, 28], [21,, 23, 27]]); })); return function dereferencePackageDescriptor(_x2, _x3) { return _ref2.apply(this, arguments); }; }(); var dereferenceResourceDescriptor = function () { var _ref3 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee3(descriptor, basePath, baseDescriptor) { var PROPERTIES, _iteratorNormalCompletion2, _didIteratorError2, _iteratorError2, _iterator2, _step2, property, value, message, response, _message3, _message4, _message5, _message6, fullPath, contents, _message7; return regeneratorRuntime.wrap(function _callee3$(_context3) { while (1) { switch (_context3.prev = _context3.next) { case 0: descriptor = cloneDeep(descriptor); baseDescriptor = baseDescriptor || descriptor; PROPERTIES = ['schema', 'dialect']; _iteratorNormalCompletion2 = true; _didIteratorError2 = false; _iteratorError2 = undefined; _context3.prev = 6; _iterator2 = PROPERTIES[Symbol.iterator](); case 8: if (_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done) { _context3.next = 63; break; } property = _step2.value; value = descriptor[property]; // URI -> No if (isString(value)) { _context3.next = 15; break; } return _context3.abrupt('continue', 60); case 15: if (!value.startsWith('#')) { _context3.next = 26; break; } _context3.prev = 16; descriptor[property] = jsonpointer.get(baseDescriptor, value.slice(1)); _context3.next = 24; break; case 20: _context3.prev = 20; _context3.t0 = _context3['catch'](16); message = 'Not resolved Pointer URI "' + value + '" for resource.' + property; throw new DataPackageError(message); case 24: _context3.next = 60; break; case 26: if (basePath && isRemotePath(basePath)) { // TODO: support other that Unix OS value = [basePath, value].join('/'); } if (!isRemotePath(value)) { _context3.next = 41; break; } _context3.prev = 28; _context3.next = 31; return axios.get(value); case 31: response = _context3.sent; descriptor[property] = response.data; _context3.next = 39; break; case 35: _context3.prev = 35; _context3.t1 = _context3['catch'](28); _message3 = 'Not resolved Remote URI "' + value + '" for resource.' + property; throw new DataPackageError(_message3); case 39: _context3.next = 60; break; case 41: if (!config.IS_BROWSER) { _context3.next = 44; break; } _message4 = 'Local URI dereferencing in browser is not supported'; throw new DataPackageError(_message4); case 44: if (isSafePath(value)) { _context3.next = 47; break; } _message5 = 'Not safe path in Local URI "' + value + '" for resource.' + property; throw new DataPackageError(_message5); case 47: if (basePath) { _context3.next = 50; break; } _message6 = 'Local URI "' + value + '" requires base path for resource.' + property; throw new DataPackageError(_message6); case 50: _context3.prev = 50; // TODO: support other that Unix OS fullPath = [basePath, value].join('/'); // TODO: rebase on promisified fs.readFile (async) contents = fs.readFileSync(fullPath, 'utf-8'); descriptor[property] = JSON.parse(contents); _context3.next = 60; break; case 56: _context3.prev = 56; _context3.t2 = _context3['catch'](50); _message7 = 'Not resolved Local URI "' + value + '" for resource.' + property; throw new DataPackageError(_message7); case 60: _iteratorNormalCompletion2 = true; _context3.next = 8; break; case 63: _context3.next = 69; break; case 65: _context3.prev = 65; _context3.t3 = _context3['catch'](6); _didIteratorError2 = true; _iteratorError2 = _context3.t3; case 69: _context3.prev = 69; _context3.prev = 70; if (!_iteratorNormalCompletion2 && _iterator2.return) { _iterator2.return(); } case 72: _context3.prev = 72; if (!_didIteratorError2) { _context3.next = 75; break; } throw _iteratorError2; case 75: return _context3.finish(72); case 76: return _context3.finish(69); case 77: return _context3.abrupt('return', descriptor); case 78: case 'end': return _context3.stop(); } } }, _callee3, this, [[6, 65, 69, 77], [16, 20], [28, 35], [50, 56], [70,, 72, 76]]); })); return function dereferenceResourceDescriptor(_x4, _x5, _x6) { return _ref3.apply(this, arguments); }; }(); // Expand descriptor function _objectEntries(obj) { var entries = []; var keys = Object.keys(obj); for (var k = 0; k < keys.length; ++k) entries.push([keys[k], obj[keys[k]]]); return entries; } function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; } var fs = require('fs'); var axios = require('axios'); var pathModule = require('path'); var isString = require('lodash/isString'); var cloneDeep = require('lodash/cloneDeep'); var isPlainObject = require('lodash/isPlainObject'); var jsonpointer = require('json-pointer'); var _require = require('./errors'), DataPackageError = _require.DataPackageError; var config = require('./config'); var omit = require('lodash/omit'); // Locate descriptor function locateDescriptor(descriptor) { var basePath = void 0; // Infer from path/url if (isString(descriptor)) { basePath = descriptor.split('/').slice(0, -1).join('/') || '.'; // Current dir by default } else { basePath = '.'; } return basePath; }function expandPackageDescriptor(descriptor) { descriptor = cloneDeep(descriptor); descriptor.profile = descriptor.profile || config.DEFAULT_DATA_PACKAGE_PROFILE; var _iteratorNormalCompletion3 = true; var _didIteratorError3 = false; var _iteratorError3 = undefined; try { for (var _iterator3 = (descriptor.resources || []).entries()[Symbol.iterator](), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) { var _step3$value = _slicedToArray(_step3.value, 2), index = _step3$value[0], resource = _step3$value[1]; descriptor.resources[index] = expandResourceDescriptor(resource); } } catch (err) { _didIteratorError3 = true; _iteratorError3 = err; } finally { try { if (!_iteratorNormalCompletion3 && _iterator3.return) { _iterator3.return(); } } finally { if (_didIteratorError3) { throw _iteratorError3; } } } return descriptor; } function expandResourceDescriptor(descriptor) { descriptor = cloneDeep(descriptor); descriptor.profile = descriptor.profile || config.DEFAULT_RESOURCE_PROFILE; descriptor.encoding = descriptor.encoding || config.DEFAULT_RESOURCE_ENCODING; if (descriptor.profile === 'tabular-data-resource') { // Schema var schema = descriptor.schema; if (schema !== undefined) { var _iteratorNormalCompletion4 = true; var _didIteratorError4 = false; var _iteratorError4 = undefined; try { for (var _iterator4 = (schema.fields || [])[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { var field = _step4.value; field.type = field.type || config.DEFAULT_FIELD_TYPE; field.format = field.format || config.DEFAULT_FIELD_FORMAT; } } catch (err) { _didIteratorError4 = true; _iteratorError4 = err; } finally { try { if (!_iteratorNormalCompletion4 && _iterator4.return) { _iterator4.return(); } } finally { if (_didIteratorError4) { throw _iteratorError4; } } } schema.missingValues = schema.missingValues || config.DEFAULT_MISSING_VALUES; } // Dialect var dialect = descriptor.dialect; if (dialect !== undefined) { var _iteratorNormalCompletion5 = true; var _didIteratorError5 = false; var _iteratorError5 = undefined; try { for (var _iterator5 = _objectEntries(filterDefaultDialect(validateDialect(dialect)))[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) { var _step5$value = _slicedToArray(_step5.value, 2), key = _step5$value[0], value = _step5$value[1]; if (!dialect.hasOwnProperty(key)) { dialect[key] = value; } } } catch (err) { _didIteratorError5 = true; _iteratorError5 = err; } finally { try { if (!_iteratorNormalCompletion5 && _iterator5.return) { _iterator5.return(); } } finally { if (_didIteratorError5) { throw _iteratorError5; } } } } } return descriptor; } // Miscellaneous // quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification function filterDefaultDialect() { var dialect = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; var defaultDialects = dialect.hasOwnProperty('escapeChar') ? omit(config.DEFAULT_DIALECT, 'quoteChar') : config.DEFAULT_DIALECT; return defaultDialects; } // quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification function validateDialect() { var dialect = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; if (dialect.hasOwnProperty('escapeChar') && dialect.hasOwnProperty('quoteChar')) { throw new DataPackageError('Resource.table dialect options quoteChar and escapeChar are mutually exclusive.'); } return dialect; } function isRemotePath(path) { // TODO: improve implementation return path.startsWith('http'); } function isSafePath(path) { var containsWindowsVar = function containsWindowsVar(path) { return path.match(/%.+%/); }; var containsPosixVar = function containsPosixVar(path) { return path.match(/\$.+/); }; // Safety checks var unsafenessConditions = [pathModule.isAbsolute(path), path.includes('..' + pathModule.sep), path.startsWith('~'), containsWindowsVar(path), containsPosixVar(path)]; return !unsafenessConditions.some(Boolean); } // System module.exports = { locateDescriptor: locateDescriptor, retrieveDescriptor: retrieveDescriptor, dereferencePackageDescriptor: dereferencePackageDescriptor, dereferenceResourceDescriptor: dereferenceResourceDescriptor, expandPackageDescriptor: expandPackageDescriptor, expandResourceDescriptor: expandResourceDescriptor, validateDialect: validateDialect, isRemotePath: isRemotePath, isSafePath: isSafePath };