UNPKG

@hyperjump/uri

Version:

A small and fast library for validating parsing and resolving URIs and IRIs

378 lines (310 loc) 13.4 kB
/** * @typedef {{ * scheme: string; * authority: string; * userinfo?: string; * host: string; * port?: string; * path: string; * query?: string; * fragment?: string; * }} IdentifierComponents */ /** * @typedef {{ * scheme: string; * authority: string; * userinfo?: string; * host: string; * port?: string; * path: string; * query?: string; * }} AbsoluteIdentifierComponents */ /** * @typedef {{ * scheme?: string; * authority?: string; * userinfo?: string; * host?: string; * port?: string; * path: string; * query?: string; * fragment?: string; * }} RelativeIdentifierComponents */ /** * @template A * @typedef {(value: string) => A} Parser */ /** * @typedef {(value: string) => string} Normalizer */ /** * @typedef {{ * parseAbsolute: Parser<AbsoluteIdentifierComponents>; * parseReference: Parser<RelativeIdentifierComponents>; * parse: Parser<IdentifierComponents>; * normalizePath: Normalizer; * normalizeQuery: Normalizer; * normalizeFragment: Normalizer; * }} Strategy */ // Common const hexdig = `[a-fA-F0-9]`; const unreserved = `[a-zA-Z0-9-._~]`; const subDelims = `[!$&'()*+,;=]`; const pctEncoded = `%${hexdig}${hexdig}`; const decOctet = `(?:\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])`; const ipV4Address = `${decOctet}\\.${decOctet}\\.${decOctet}\\.${decOctet}`; const h16 = `${hexdig}{1,4}`; const ls32 = `(?:${h16}:${h16}|${ipV4Address})`; const ipV6Address = `(?:(?:${h16}:){6}${ls32}|::(?:${h16}:){5}${ls32}|(?:${h16})?::(?:${h16}:){4}${ls32}|(?:(?:${h16}:){0,1}${h16})?::(?:${h16}:){3}${ls32}|(?:(?:${h16}:){0,2}${h16})?::(?:${h16}:){2}${ls32}|(?:(?:${h16}:){0,3}${h16})?::(?:${h16}:){1}${ls32}|(?:(?:${h16}:){0,4}${h16})?::${ls32}|(?:(?:${h16}:){0,5}${h16})?::${h16}|(?:(?:${h16}:){0,6}${h16})?::)`; const ipVFuture = `v${hexdig}+\\.(?:${unreserved}|${subDelims}|:)+`; const ipLiteral = `\\[(?:${ipV6Address}|${ipVFuture})\\]`; const scheme = `(?<scheme>[a-zA-Z][a-zA-Z0-9-+.]*)`; const port = `:(?<port>\\d*)`; // URI const regName = `(?:${unreserved}|${pctEncoded}|${subDelims})*?`; const host = `(?<host>${ipLiteral}|${ipV4Address}|${regName})`; const userinfo = `(?<userinfo>(?:${unreserved}|${pctEncoded}|${subDelims}|:)*)`; const pchar = `(?:${unreserved}|${pctEncoded}|${subDelims}|:|@)`; const segment = `${pchar}*?`; const pathAbEmpty = `(?:/${segment})*`; const authority = `(?<authority>(?:${userinfo}@)?${host}(?:${port})?)`; const path = `(?<path>${pathAbEmpty})`; const pathWithoutAuthority = `(?<path2>${segment}${pathAbEmpty})`; const query = `(?:\\?(?<query>(?:${pchar}|/|\\?)*))?`; const fragment = `(?:#(?<fragment>(?:${pchar}|/|\\?)*))?`; const uri = `^${scheme}:(?://${authority}${path}|${pathWithoutAuthority})${query}${fragment}$`; const uriReference = `^(?:${scheme}:|)(?://${authority}${path}|${pathWithoutAuthority})${query}${fragment}$`; const absoluteUri = `^${scheme}:(?://${authority}${path}|${pathWithoutAuthority})${query}$`; // IRI const iunreserved = `[a-zA-Z0-9-._~\\00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF\\u10000-\\u1FFFD\\u20000-\\u2FFFD\\u30000-\\u3FFFD\\u40000-\\u4FFFD\\u50000-\\u5FFFD\\u60000-\\u6FFFD\\u70000-\\u7FFFD\\u80000-\\u8FFFD\\u90000-\\u9FFFD\\uA0000-\\uAFFFD\\uB0000-\\uBFFFD\\uC0000-\\uCFFFD\\uD0000-\\uDFFFD\\uE1000-\\uEFFFD]`; const iprivate = `[\\uE000-\\uF8FF\\uF0000-\\uFFFFD\\u100000-\\u10FFFD]`; const iregName = `(?:${iunreserved}|${pctEncoded}|${subDelims})*?`; const ihost = `(?<host>${ipLiteral}|${ipV4Address}|${iregName})`; const iuserinfo = `(?<userinfo>(?:${iunreserved}|${pctEncoded}|${subDelims}|:)*)`; const ipchar = `(?:${iunreserved}|${pctEncoded}|${subDelims}|:|@)`; const isegment = `${ipchar}*?`; const ipathAbEmpty = `(?:/${isegment})*`; const iauthority = `(?<authority>(?:${iuserinfo}@)?${ihost}(?:${port})?)`; const ipath = `(?<path>${ipathAbEmpty})`; const ipathWithoutAuthority = `(?<path2>${isegment}${ipathAbEmpty})`; const iquery = `(?:\\?(?<query>(?:${ipchar}|${iprivate}|/|\\?)*))?`; const ifragment = `(?:#(?<fragment>(?:${ipchar}|/|\\?)*))?`; const iri = `^${scheme}:(?://${iauthority}${ipath}|${ipathWithoutAuthority})${iquery}${ifragment}$`; const iriReference = `^(?:${scheme}:|)(?://${iauthority}${ipath}|${ipathWithoutAuthority})${iquery}${ifragment}$`; const absoluteIri = `^${scheme}:(?://${iauthority}${ipath}|${ipathWithoutAuthority})${iquery}$`; // Components /** @type (strategy: Strategy) => (reference: string, base: string) => string */ const resolveReference = (strategy) => (reference, base) => { const resolvedComponents = /** @type IdentifierComponents */ (strategy.parseReference(reference)); if (resolvedComponents.scheme === undefined) { const baseComponents = strategy.parseAbsolute(base); resolvedComponents.scheme = baseComponents.scheme; if (resolvedComponents.authority === undefined) { resolvedComponents.authority = baseComponents.authority; if (resolvedComponents.path === "") { resolvedComponents.path = baseComponents.path; if (resolvedComponents.query === undefined) { resolvedComponents.query = baseComponents.query; } } else if (!resolvedComponents.path.startsWith("/")) { resolvedComponents.path = mergePaths(resolvedComponents.path, baseComponents); } } } return composeIdentifier(strategy, resolvedComponents); }; /** @type (path: string, base: IdentifierComponents) => string */ const mergePaths = (path, base) => { if (base.authority && base.path === "") { return "/" + path; } else { const position = base.path.lastIndexOf("/"); return position === -1 ? path : base.path.slice(0, position + 1) + path; } }; const isNoOpSegment = /^\.?\.\/|^\.\.?$/; const isSlashDotSegment = /^\/\.(?:\/|$)/; const isUpSegment = /^\/\.\.(?:\/|$)/; /** @type (path: string) => string */ const removeDotSegments = (path) => { let output = ""; while (path.length > 0) { if (isNoOpSegment.test(path)) { path = removeSegment(path); } else if (isSlashDotSegment.test(path)) { path = replaceSegmentWithSlash(path); } else if (isUpSegment.test(path)) { path = replaceSegmentWithSlash(path); output = removeLastSegment(output); } else { const segment = getSegment(path); path = removeSegment(path); output += segment; } } return output; }; /** @type (path: string) => string */ const removeSegment = (path) => { const position = path.indexOf("/", 1); return position === -1 ? "" : "/" + path.slice(position + 1); }; /** @type (path: string) => string */ const replaceSegmentWithSlash = (path) => { const position = path.indexOf("/", 1); return position === -1 ? "/" : "/" + path.slice(position + 1); }; /** @type (path: string) => string */ const removeLastSegment = (path) => { const position = path.lastIndexOf("/"); return position === -1 ? path : path.slice(0, position); }; /** @type (path: string) => string */ const getSegment = (path) => { const position = path.indexOf("/", 1); return position === -1 ? path : path.slice(0, position); }; /** @type (strategy: Strategy, components: IdentifierComponents) => string */ const composeIdentifier = (strategy, components) => { let resolved = components.scheme.toLowerCase() + ":"; resolved += components.authority === undefined ? "" : "//" + components.authority.toLowerCase(); resolved += strategy.normalizePath(components.path); resolved += components.query === undefined ? "" : "?" + strategy.normalizeQuery(components.query); resolved += components.fragment === undefined ? "" : "#" + strategy.normalizeFragment(components.fragment); return resolved; }; const percentEncoded = new RegExp(pctEncoded, "g"); /** @type (isAllowed: (value: string) => boolean) => (match: string) => string */ const percentEncodedToChar = (isAllowed) => (match) => { const charCode = parseInt(match.slice(1), 16); const char = String.fromCharCode(charCode); return isAllowed(char) ? char : match.toUpperCase(); }; const isAllowedUnescapedInPath = RegExp.prototype.test.bind(new RegExp(`${unreserved}|${subDelims}|[:@]`)); const isAllowedUnescapedInIPath = RegExp.prototype.test.bind(new RegExp(`${iunreserved}|${subDelims}|[:@]`)); /** @type (isAllowed: (value: string) => boolean) => (segment: string) => string */ const normalizePath = (isAllowed) => (segment) => removeDotSegments(segment).replaceAll(percentEncoded, percentEncodedToChar(isAllowed)); const isAllowedUnescapedInQuery = RegExp.prototype.test.bind(new RegExp(`${unreserved}|${subDelims}|[:@/?]`)); const isAllowedUnescapedInIQuery = RegExp.prototype.test.bind(new RegExp(`${iunreserved}|${subDelims}|[:@/?]`)); /** @type (isAllowed: (value: string) => boolean) => (segment: string) => string */ const normalizeQuery = (isAllowed) => (query) => query.replaceAll(percentEncoded, percentEncodedToChar(isAllowed)); // API export const isUri = RegExp.prototype.test.bind(new RegExp(uri)); export const isUriReference = RegExp.prototype.test.bind(new RegExp(uriReference)); export const isAbsoluteUri = RegExp.prototype.test.bind(new RegExp(absoluteUri)); export const isIri = RegExp.prototype.test.bind(new RegExp(iri)); export const isIriReference = RegExp.prototype.test.bind(new RegExp(iriReference)); export const isAbsoluteIri = RegExp.prototype.test.bind(new RegExp(absoluteIri)); /** * @template A * @type (pattern: RegExp, type: string) => Parser<A> */ const createParser = (pattern, type) => (value) => { const match = pattern.exec(value); if (match === null) { throw Error(`Invalid ${type}: ${value}`); } const groups = /** @type Record<string, string> */ (match.groups); if (groups.authority === undefined) { groups.path = groups.path2; } delete groups.path2; return /** @type A */ (groups); }; export const parseUri = /** @type Parser<IdentifierComponents> */ ( createParser(new RegExp(uri), "URI") ); export const parseUriReference = /** @type Parser<RelativeIdentifierComponents> */ ( createParser(new RegExp(uriReference), "URI-reference") ); export const parseAbsoluteUri = /** @type Parser<AbsoluteIdentifierComponents> */ ( createParser(new RegExp(absoluteUri), "absolute-URI") ); export const parseIri = /** @type Parser<IdentifierComponents> */ ( createParser(new RegExp(iri), "IRI") ); export const parseIriReference = /** @type Parser<RelativeIdentifierComponents> */ ( createParser(new RegExp(iriReference), "IRI-reference") ); export const parseAbsoluteIri = /** @type Parser<AbsoluteIdentifierComponents> */ ( createParser(new RegExp(absoluteIri), "absolute-IRI") ); /** @type Record<string, Strategy> */ const strategies = { uri: { parseAbsolute: parseAbsoluteUri, parseReference: parseUriReference, parse: parseUri, normalizePath: normalizePath(isAllowedUnescapedInPath), normalizeQuery: normalizeQuery(isAllowedUnescapedInQuery), normalizeFragment: normalizeQuery(isAllowedUnescapedInQuery) }, iri: { parseAbsolute: parseAbsoluteIri, parseReference: parseIriReference, parse: parseIri, normalizePath: normalizePath(isAllowedUnescapedInIPath), normalizeQuery: normalizeQuery(isAllowedUnescapedInIQuery), normalizeFragment: normalizeQuery(isAllowedUnescapedInIQuery) } }; /** @type (strategy: Strategy) => (identifier: string) => string */ const toAbsolute = (strategy) => (identifier) => { const components = strategy.parse(identifier); delete components.fragment; return composeIdentifier(strategy, components); }; export const toAbsoluteUri = toAbsolute(strategies.uri); export const toAbsoluteIri = toAbsolute(strategies.iri); /** @type (strategy: Strategy) => (identifier: string) => string */ const normalize = (strategy) => (identifier) => { const components = strategy.parse(identifier); return composeIdentifier(strategy, components); }; export const normalizeUri = normalize(strategies.uri); export const normalizeIri = normalize(strategies.iri); export const resolveUri = resolveReference(strategies.uri); export const resolveIri = resolveReference(strategies.iri); /** @type (strategy: Strategy) => (uri: string, relativeTo: string) => string */ const toRelative = (strategy) => (uri, relativeTo) => { const fromUri = strategy.parseAbsolute(uri); const toUri = strategy.parse(relativeTo); if (toUri.scheme !== fromUri.scheme) { return relativeTo; } if (toUri.authority !== fromUri.authority) { return relativeTo; } let result; if (fromUri.path === toUri.path) { result = ""; } else { const fromSegments = fromUri.path.split("/"); const toSegments = toUri.path.split("/"); let position = 0; while (fromSegments[position] === toSegments[position] && position < fromSegments.length - 1 && position < toSegments.length - 1) { position++; } const segments = []; for (let index = position + 1; index < fromSegments.length; index++) { segments.push(".."); } for (let index = position; index < toSegments.length; index++) { segments.push(toSegments[index]); } result = segments.join("/"); } if (toUri.query !== undefined) { result += `?${toUri.query}`; } if (toUri.fragment !== undefined) { result += `#${toUri.fragment}`; } return result; }; export const toRelativeUri = toRelative(strategies.uri); export const toRelativeIri = toRelative(strategies.iri);