UNPKG

datapackage

Version:

Utilities to work with Data Packages as defined on specs.frictionlessdata.io

239 lines (209 loc) 7.21 kB
const fs = require('fs') const axios = require('axios') const pathModule = require('path') const isString = require('lodash/isString') const cloneDeep = require('lodash/cloneDeep') const isPlainObject = require('lodash/isPlainObject') const jsonpointer = require('json-pointer') const { DataPackageError } = require('./errors') const config = require('./config') const omit = require('lodash/omit') // Locate descriptor function locateDescriptor(descriptor) { let basePath // Infer from path/url if (isString(descriptor)) { basePath = descriptor.split('/').slice(0, -1).join('/') || '.' // Current dir by default } else { basePath = '.' } return basePath } // Retrieve descriptor async function retrieveDescriptor(descriptor) { if (isPlainObject(descriptor)) { return cloneDeep(descriptor) } if (isString(descriptor)) { // Remote if (isRemotePath(descriptor)) { try { const response = await axios.get(descriptor) return response.data } catch (error) { const message = `Can not retrieve remote descriptor "${descriptor}"` throw new DataPackageError(message) } // Local } else { if (config.IS_BROWSER) { const message = `Local descriptor "${descriptor}" in browser is not supported` throw new DataPackageError(message) } try { // TODO: rebase on promisified fs.readFile (async) const contents = fs.readFileSync(descriptor, 'utf-8') return JSON.parse(contents) } catch (error) { const message = `Can not retrieve local descriptor "${descriptor}"` throw new DataPackageError(message) } } } throw new DataPackageError('Descriptor must be String or Object') } // Dereference descriptor async function dereferencePackageDescriptor(descriptor, basePath) { descriptor = cloneDeep(descriptor) for (const [index, resource] of (descriptor.resources || []).entries()) { // TODO: May be we should use Promise.all here descriptor.resources[index] = await dereferenceResourceDescriptor( resource, basePath, descriptor ) } return descriptor } async function dereferenceResourceDescriptor(descriptor, basePath, baseDescriptor) { descriptor = cloneDeep(descriptor) baseDescriptor = baseDescriptor || descriptor const PROPERTIES = ['schema', 'dialect'] for (const property of PROPERTIES) { let value = descriptor[property] // URI -> No if (!isString(value)) { continue // URI -> Pointer } else if (value.startsWith('#')) { try { descriptor[property] = jsonpointer.get(baseDescriptor, value.slice(1)) } catch (error) { const message = `Not resolved Pointer URI "${value}" for resource.${property}` throw new DataPackageError(message) } // URI -> Remote } else { if (basePath && isRemotePath(basePath)) { // TODO: support other that Unix OS value = [basePath, value].join('/') } if (isRemotePath(value)) { try { const response = await axios.get(value) descriptor[property] = response.data } catch (error) { const message = `Not resolved Remote URI "${value}" for resource.${property}` throw new DataPackageError(message) } // URI -> Local } else { if (config.IS_BROWSER) { const message = 'Local URI dereferencing in browser is not supported' throw new DataPackageError(message) } if (!isSafePath(value)) { const message = `Not safe path in Local URI "${value}" for resource.${property}` throw new DataPackageError(message) } if (!basePath) { const message = `Local URI "${value}" requires base path for resource.${property}` throw new DataPackageError(message) } try { // TODO: support other that Unix OS const fullPath = [basePath, value].join('/') // TODO: rebase on promisified fs.readFile (async) const contents = fs.readFileSync(fullPath, 'utf-8') descriptor[property] = JSON.parse(contents) } catch (error) { const message = `Not resolved Local URI "${value}" for resource.${property}` throw new DataPackageError(message) } } } } return descriptor } // Expand descriptor function expandPackageDescriptor(descriptor) { descriptor = cloneDeep(descriptor) descriptor.profile = descriptor.profile || config.DEFAULT_DATA_PACKAGE_PROFILE for (const [index, resource] of (descriptor.resources || []).entries()) { descriptor.resources[index] = expandResourceDescriptor(resource) } return descriptor } function expandResourceDescriptor(descriptor) { descriptor = cloneDeep(descriptor) descriptor.profile = descriptor.profile || config.DEFAULT_RESOURCE_PROFILE descriptor.encoding = descriptor.encoding || config.DEFAULT_RESOURCE_ENCODING if (descriptor.profile === 'tabular-data-resource') { // Schema const schema = descriptor.schema if (schema !== undefined) { for (const field of schema.fields || []) { field.type = field.type || config.DEFAULT_FIELD_TYPE field.format = field.format || config.DEFAULT_FIELD_FORMAT } schema.missingValues = schema.missingValues || config.DEFAULT_MISSING_VALUES } // Dialect const dialect = descriptor.dialect if (dialect !== undefined) { for (const [key, value] of Object.entries(filterDefaultDialect(validateDialect(dialect)))) { if (!dialect.hasOwnProperty(key)) { dialect[key] = value } } } } return descriptor } // Miscellaneous // quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification function filterDefaultDialect(dialect = {}) { const defaultDialects = dialect.hasOwnProperty('escapeChar') ? omit(config.DEFAULT_DIALECT, 'quoteChar') : config.DEFAULT_DIALECT return defaultDialects } // quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification function validateDialect(dialect = {}) { if (dialect.hasOwnProperty('escapeChar') && dialect.hasOwnProperty('quoteChar')) { throw new DataPackageError( 'Resource.table dialect options quoteChar and escapeChar are mutually exclusive.' ) } return dialect } function isRemotePath(path) { // TODO: improve implementation return path.startsWith('http') } function isSafePath(path) { const containsWindowsVar = (path) => path.match(/%.+%/) const containsPosixVar = (path) => path.match(/\$.+/) // Safety checks const unsafenessConditions = [ pathModule.isAbsolute(path), path.includes(`..${pathModule.sep}`), path.startsWith('~'), containsWindowsVar(path), containsPosixVar(path), ] return !unsafenessConditions.some(Boolean) } // System module.exports = { locateDescriptor, retrieveDescriptor, dereferencePackageDescriptor, dereferenceResourceDescriptor, expandPackageDescriptor, expandResourceDescriptor, validateDialect, isRemotePath, isSafePath, }