UNPKG

spec-url

Version:

URL library that implements a reference resolution algorithm for WHATWG URLs

381 lines (282 loc) 9.96 kB
import { utf8, pct, _encodeSets as S } from './characters.js' import { parse, parsePath, isDottedSegment } from './parser.js' import { URLIPv6Address, URLDomainName, URLIPv4Address, parseAuth, parsePort, parseHost, parseDomainOrIPv4Address, printHost, isLocalHost, ipv6, ipv4 } from './authority.js' import { options as opts, modes, modeFor, componentTypes, ord, upto, pureRebase, schemesAreEquivalent, isFragmentOnlyURL, normaliseScheme, _attributeNames, _firstNonEmptySegment, _removePrecedingSegments, } from './model.js' const { setPrototypeOf:setProto, assign } = Object const log = console.log.bind (console) // Rebase // ------ // The `rebase` function is a generalisation of the URL resolution behaviour // that is implicitly specified by the WHATWG. It makes the same distinctions // between file-, web- and opaque-path URLs as the WHATWG standard does, but // also supports schemeless URLs. // It uses what RFC3986 calls the 'non-strict' transformation of // references (section 5.2) for 'special' URLs: If the input has a scheme // and the scheme is equivalent to the scheme of the base URL, then it is // removed. It then continues with the 'strict' behaviour as implemented // by the `pureRebase` function. function rebase (url, base) { base = typeof base === 'string' ? parse (base) : (base ?? { }) if (typeof url === 'string') url = parse (url, modeFor (base)) if (modeFor (url) & opts.nonStrict && schemesAreEquivalent (url.scheme, base.scheme)) url = setProto ({ scheme:null }, url) if (url.scheme || isFragmentOnlyURL (url) || !hasOpaquePath (base)) return pureRebase (url, base) else throw new RebaseError (url, base) } function goto (base, url) { return rebase (url, base) } class RebaseError extends Error { constructor (url1, url2) { super (`Cannot rebase <${print(url1)}> onto <${print(url2)}>`) } } // Note: opaque-paths are currently not modeled by using a // separate *opaque-path* component-type. Instead they are // detected by looking at the shape of the URL as follows. const hasOpaquePath = url => !(modeFor (url) & opts.hierPart) && url.root == null && url.host == null // Reference Resolution // -------------------- function resolve (input, base, _encodeOptions = {}) { const result = rebase (input, base) if (result.scheme == null) throw new ResolveError (input, base) const mode = modeFor (result) if (mode & opts.stealAuth) { if (result.host == null || result.host === '') { const match = _firstNonEmptySegment (result) if (!match) throw new ResolveError (result) _removePrecedingSegments (result, match) assign (result, parseAuth (match.value)) } } if (mode & opts.plainAuth) { const { user, pass, port } = result if (user != null || pass != null || port != null) throw new Error (`Cannot resolve <${print(result)}>\n\t -A file URL must not have a username, password, or port\n`) } if (mode & opts.hierPart) { if (result.host == null) result.host = '' if (result.drive == null) result.root = '/' } // Convert opaque host to domain or IPv4 address if (mode & opts.parseDomain && typeof result.host === 'string' && result.host.length) result.host = parseDomainOrIPv4Address (result.host) const { fixup = false, strict = false, unicode = false } = _encodeOptions return percentEncodeMut (normaliseMut (result), { fixup, strict, unicode }) } class ResolveError extends TypeError { constructor (url1, url2) { if (url2 != null) super (`Cannot resolve <${print(url1)}> against <${print(url2)}>`) else super (`Cannot resolve <${print(url1)}>`) } } // Normalisation // ------------- const defaultPorts = { http: 80, ws: 80, https: 443, wss: 443, ftp: 21 } const normalise = (url, coded = true) => normaliseMut (assign ({}, url), coded) const validateOpaqueHost = input => { return pct.encode (input, S.opaqueHost, { fixup:false, strict:false }) } function normaliseMut (r, coded = true) { // ### Scheme normalisation if (r.scheme) r.scheme = normaliseScheme (r.scheme) // ### Authority normalisation if (r.pass === '') delete r.pass if (!r.pass && r.user === '') delete r.user if (r.port === '') delete r.port // ### Drive letter normalisation if (r.drive) r.drive = r.drive[0] + ':' // ### Path segement normalisation if (hasOpaquePath (r) && r.dirs) r.dirs = r.dirs.slice () else { const dirs = [] for (const x of r.dirs ?? []) { switch (isDottedSegment (x, coded)) { case 0: dirs.push (x) case 1: continue case 2: if (dirs.length && dirs[dirs.length-1] !== '..') dirs.pop () else if (!r.root) dirs.push ('..') } } if (dirs.length) r.dirs = dirs else if (ord (r) === componentTypes.dir) r.dirs = ['.'] else delete r.dirs } // ### Scheme-based authority normalisation if (r.scheme === 'file' && isLocalHost (r.host)) r.host = '' else if (r.port === defaultPorts [r.scheme]) delete r.port // for (const k in _attributeNames) // if (r[k] == null) delete r[k] return r } // Percent Coding URLs // ------------------- // NB: has no effect on URLDomainNames; // Doman toASCII is deferred to a serialisation option. const percentEncode = (url, settings) => percentEncodeMut (assign ({}, url), settings) function percentEncodeMut (r, settings) { if (r.user != null) r.user = pct.encode (r.user, S.user, settings) if (r.pass != null) r.pass = pct.encode (r.pass, S.pass, settings) if (r.host != null) { r.host = typeof r.host === 'string' ? pct.encode (r.host, S.opaqueHost, settings) : r.host } // opaque paths vs hierarchical paths const encodeSeg = hasOpaquePath (r) ? S.opaquePath : S.pathSegment if (r.dirs) r.dirs = r.dirs.map (x => pct.encode (x, encodeSeg, settings)) if (r.file != null) r.file = pct.encode (r.file, encodeSeg, settings) if (r.query != null) { const config = modeFor (r) const querySet = config & opts.specialQuery ? S.specialQuery : S.query r.query = pct.encode (r.query, querySet, settings) } if (r.hash != null) r.hash = pct.encode (r.hash, S.fragment, settings) return r } // Percent decoding // TODO consider doing puny decoding as well // NB this is a bit dangerous to expose like this as the reuslt looks like // an URL object, but its components are not percent coded strings anymore const _dont = { scheme:1, port:1, drive:1, root:1 } const percentDecode = url => { const r = { } for (let k in _attributeNames) if (url[k] != null) r[k] = _dont [k] ? url[k] : k === 'dirs' ? url[k] .map (pct.decode) : typeof url[k] === 'string' ? pct.decode (url[k]) : url[k] return r } // URL Printing // ------------ const isSchemeLike = /^([a-zA-Z][a-zA-Z+\-.]*):(.*)$/ const isDriveLike = /^[a-zA-Z][:|]$/ // function isDriveLike (str) { // const a = str.charCodeAt(0) | 32 // return (str[1] === ':' || str[1] === '|') // && 97 <= a && a <= 122 // } function print (url, options) { const url_ = normaliseForPrinting (url, options) return unsafePrint (url_, options) } function normaliseForPrinting (url, options) { url = percentEncode (url, options) // prevent accidentally producing an authority or a path-root const authNorDrive = url.host == null && url.drive == null const emptyFirstDir = url.dirs && url.dirs[0] === '' if (authNorDrive && emptyFirstDir) url.dirs.unshift ('.') // prevent accidentally producing a scheme let match, position const o = ord (url) if (o === componentTypes.dir && (match = isSchemeLike.exec (url.dirs[0]))) url.dirs[0] = match[1] + '%3A' + match[2] else if (o === componentTypes.file && (match = isSchemeLike.exec (url.file))) url.file = match[1] + '%3A' + match[2] // prevent accidentally producing a drive // TODO this still misses some cases. else if (modeFor (url) & o.winDrive && (position = _firstNonEmptySegment (url)) && isDriveLike.exec (position.value)) { const value = position.value[0] + (position.value[1] === ':' ? '%3A' : '%7C') if (position.ord === componentTypes.file) url.file = value else url.dirs[position.index] = value } return url } // ### Printing the path of an URL const pathname = ({ drive, root, dirs, file }, spec) => print ({ drive, root, dirs, file }, spec) const filePath = ({ drive, root, dirs, file }) => unsafePrint (percentDecode ({ drive, root, dirs, file })) // TODO consider throwing an error if a dir or a file contains '/' // ### Printing prepared URLs function unsafePrint (url, options) { let result = '' const hasCredentials = url.user != null for (const k in _attributeNames) if (url[k] != null) { const v = url[k] result += k === 'scheme' ? ( v + ':') : k === 'user' ? ('//' + v) : k === 'pass' ? ( ':' + v) : k === 'host' ? ((hasCredentials ? '@' : '//') + printHost (v, options)) : k === 'port' ? (':' + v) : k === 'drive' ? ('/' + v) : k === 'root' ? ('/' ) : k === 'dirs' ? (v.join ('/') + '/') : k === 'file' ? (v) : k === 'query' ? ('?' + v) : k === 'hash' ? ('#' + v) : '' } return result } // Exports // ======= const version = '2.5.0-dev' const unstable = { utf8, pct, percentEncodeMut, normaliseMut, encodeSets:S } export { version, modes, modeFor, componentTypes, componentTypes as ords, ord, upto, pureRebase, hasOpaquePath, rebase, rebase as parseRebase, goto, resolve, resolve as parseResolve, resolve as WHATWGResolve, normalise, normalise as normalize, percentEncode, percentDecode, parse, parsePath, parseAuth, parseHost, printHost, URLIPv4Address, URLIPv6Address, URLDomainName, validateOpaqueHost, print, pathname, filePath, unsafePrint, ipv4, ipv6, unstable }