UNPKG

@hyperjump/uri

Version:

A small and fast library for validating parsing and resolving URIs and IRIs

351 lines (289 loc) 13.5 kB
/** * @import * as API from "./index.d.ts" */ /** * @template A * @typedef {(value: string) => A} Parser */ /** * @typedef {(value: string) => string} Normalizer */ /** * @typedef {{ * parseAbsolute: Parser<API.AbsoluteIdentifierComponents>; * parseReference: Parser<API.RelativeIdentifierComponents>; * parse: Parser<API.IdentifierComponents>; * normalizePath: Normalizer; * normalizeQuery: Normalizer; * normalizeFragment: Normalizer; * }} Strategy */ // Common const hexdig = `[a-fA-F0-9]`; const unreserved = `[a-zA-Z0-9-._~]`; const subDelims = `[!$&'()*+,;=]`; const pctEncoded = `%${hexdig}${hexdig}`; const decOctet = `(?:\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])`; const ipV4Address = `${decOctet}\\.${decOctet}\\.${decOctet}\\.${decOctet}`; const h16 = `${hexdig}{1,4}`; const ls32 = `(?:${h16}:${h16}|${ipV4Address})`; const ipV6Address = `(?:(?:${h16}:){6}${ls32}|::(?:${h16}:){5}${ls32}|(?:${h16})?::(?:${h16}:){4}${ls32}|(?:(?:${h16}:){0,1}${h16})?::(?:${h16}:){3}${ls32}|(?:(?:${h16}:){0,2}${h16})?::(?:${h16}:){2}${ls32}|(?:(?:${h16}:){0,3}${h16})?::(?:${h16}:){1}${ls32}|(?:(?:${h16}:){0,4}${h16})?::${ls32}|(?:(?:${h16}:){0,5}${h16})?::${h16}|(?:(?:${h16}:){0,6}${h16})?::)`; const ipVFuture = `[vV]${hexdig}+\\.(?:${unreserved}|${subDelims}|:)+`; const ipLiteral = `\\[(?:${ipV6Address}|${ipVFuture})\\]`; const scheme = `(?<scheme>[a-zA-Z][a-zA-Z0-9-+.]*)`; const port = `:(?<port>\\d*)`; // URI const regName = `(?:${unreserved}|${pctEncoded}|${subDelims})*?`; const host = `(?<host>${ipLiteral}|${ipV4Address}|${regName})`; const userinfo = `(?<userinfo>(?:${unreserved}|${pctEncoded}|${subDelims}|:)*)`; const pchar = `(?:${unreserved}|${pctEncoded}|${subDelims}|:|@)`; const segment = `${pchar}*?`; const pathAbEmpty = `(?:/${segment})*`; const authority = `(?<authority>(?:${userinfo}@)?${host}(?:${port})?)`; const path = `(?<path>${pathAbEmpty})`; const pathWithoutAuthority = `(?<path2>(?!//)${segment}${pathAbEmpty})`; const query = `(?:\\?(?<query>(?:${pchar}|/|\\?)*))?`; const fragment = `(?:#(?<fragment>(?:${pchar}|/|\\?)*))?`; const uri = `^${scheme}:(?://${authority}${path}|${pathWithoutAuthority})${query}${fragment}$`; const uriReference = `^(?:${scheme}:|)(?://${authority}${path}|${pathWithoutAuthority})${query}${fragment}$`; const absoluteUri = `^${scheme}:(?://${authority}${path}|${pathWithoutAuthority})${query}$`; // IRI const iunreserved = `[a-zA-Z0-9\\-._~\\u{A0}-\\u{D7FF}\\u{F900}-\\u{FDCF}\\u{FDF0}-\\u{FFEF}\\u{10000}-\\u{1FFFD}\\u{20000}-\\u{2FFFD}\\u{30000}-\\u{3FFFD}\\u{40000}-\\u{4FFFD}\\u{50000}-\\u{5FFFD}\\u{60000}-\\u{6FFFD}\\u{70000}-\\u{7FFFD}\\u{80000}-\\u{8FFFD}\\u{90000}-\\u{9FFFD}\\u{A0000}-\\u{AFFFD}\\u{B0000}-\\u{BFFFD}\\u{C0000}-\\u{CFFFD}\\u{D0000}-\\u{DFFFD}\\u{E1000}-\\u{EFFFD}]`; const iprivate = `[\\u{E000}-\\u{F8FF}\\u{F0000}-\\u{FFFFD}\\u{100000}-\\u{10FFFD}]`; const iregName = `(?:${iunreserved}|${pctEncoded}|${subDelims})*?`; const ihost = `(?<host>${ipLiteral}|${ipV4Address}|${iregName})`; const iuserinfo = `(?<userinfo>(?:${iunreserved}|${pctEncoded}|${subDelims}|:)*)`; const ipchar = `(?:${iunreserved}|${pctEncoded}|${subDelims}|:|@)`; const isegment = `${ipchar}*?`; const ipathAbEmpty = `(?:/${isegment})*`; const iauthority = `(?<authority>(?:${iuserinfo}@)?${ihost}(?:${port})?)`; const ipath = `(?<path>${ipathAbEmpty})`; const ipathWithoutAuthority = `(?<path2>(?!//)${isegment}${ipathAbEmpty})`; const iquery = `(?:\\?(?<query>(?:${ipchar}|${iprivate}|/|\\?)*))?`; const ifragment = `(?:#(?<fragment>(?:${ipchar}|/|\\?)*))?`; const iri = `^${scheme}:(?://${iauthority}${ipath}|${ipathWithoutAuthority})${iquery}${ifragment}$`; const iriReference = `^(?:${scheme}:|)(?://${iauthority}${ipath}|${ipathWithoutAuthority})${iquery}${ifragment}$`; const absoluteIri = `^${scheme}:(?://${iauthority}${ipath}|${ipathWithoutAuthority})${iquery}$`; // Components /** @type (strategy: Strategy) => (reference: string, base: string) => string */ const resolveReference = (strategy) => (reference, base) => { const resolvedComponents = /** @type API.IdentifierComponents */ (strategy.parseReference(reference)); if (resolvedComponents.scheme === undefined) { const baseComponents = strategy.parseAbsolute(base); resolvedComponents.scheme = baseComponents.scheme; if (resolvedComponents.authority === undefined) { resolvedComponents.authority = baseComponents.authority; resolvedComponents.userinfo = baseComponents.userinfo; resolvedComponents.host = baseComponents.host; resolvedComponents.port = baseComponents.port; if (resolvedComponents.path === "") { resolvedComponents.path = baseComponents.path; resolvedComponents.query ??= baseComponents.query; } else if (!resolvedComponents.path.startsWith("/")) { resolvedComponents.path = mergePaths(resolvedComponents.path, baseComponents); } } } return composeIdentifier(strategy, resolvedComponents); }; /** @type (path: string, base: API.IdentifierComponents) => string */ const mergePaths = (path, base) => { if (base.authority && base.path === "") { return "/" + path; } else { const position = base.path.lastIndexOf("/"); return position === -1 ? path : base.path.slice(0, position + 1) + path; } }; const isNoOpSegment = /^\.?\.\/|^\.\.?$/; const isSlashDotSegment = /^\/\.(?:\/|$)/; const isUpSegment = /^\/\.\.(?:\/|$)/; /** @type (path: string) => string */ const removeDotSegments = (path) => { let output = ""; while (path.length > 0) { if (isNoOpSegment.test(path)) { path = removeSegment(path); } else if (isSlashDotSegment.test(path)) { path = replaceSegmentWithSlash(path); } else if (isUpSegment.test(path)) { path = replaceSegmentWithSlash(path); output = removeLastSegment(output); } else { const segment = getSegment(path); path = removeSegment(path); output += segment; } } return output; }; /** @type (path: string) => string */ const removeSegment = (path) => { const position = path.indexOf("/", 1); return position === -1 ? "" : "/" + path.slice(position + 1); }; /** @type (path: string) => string */ const replaceSegmentWithSlash = (path) => { const position = path.indexOf("/", 1); return position === -1 ? "/" : "/" + path.slice(position + 1); }; /** @type (path: string) => string */ const removeLastSegment = (path) => { const position = path.lastIndexOf("/"); return position === -1 ? path : path.slice(0, position); }; /** @type (path: string) => string */ const getSegment = (path) => { const position = path.indexOf("/", 1); return position === -1 ? path : path.slice(0, position); }; /** @type (strategy: Strategy, components: API.IdentifierComponents) => string */ const composeIdentifier = (strategy, components) => { let resolved = components.scheme.toLowerCase() + ":"; resolved += components.authority === undefined ? "" : "//" + (components.userinfo === undefined ? "" : components.userinfo + "@") + components.host.toLowerCase() + (components.port === undefined ? "" : ":" + components.port); resolved += strategy.normalizePath(components.path); resolved += components.query === undefined ? "" : "?" + strategy.normalizeQuery(components.query); resolved += components.fragment === undefined ? "" : "#" + strategy.normalizeFragment(components.fragment); return resolved; }; const percentEncoded = new RegExp(pctEncoded, "g"); /** @type (isAllowed: (value: string) => boolean) => (match: string) => string */ const percentEncodedToChar = (isAllowed) => (match) => { const charCode = parseInt(match.slice(1), 16); const char = String.fromCharCode(charCode); return isAllowed(char) ? char : match.toUpperCase(); }; const isAllowedUnescapedInPath = RegExp.prototype.test.bind(new RegExp(`${unreserved}|${subDelims}|[:@]`)); const isAllowedUnescapedInIPath = RegExp.prototype.test.bind(new RegExp(`${iunreserved}|${subDelims}|[:@]`, "u")); /** @type (isAllowed: (value: string) => boolean) => (segment: string) => string */ const normalizePath = (isAllowed) => (segment) => removeDotSegments(segment).replaceAll(percentEncoded, percentEncodedToChar(isAllowed)); const isAllowedUnescapedInQuery = RegExp.prototype.test.bind(new RegExp(`${unreserved}|${subDelims}|[:@/?]`)); const isAllowedUnescapedInIQuery = RegExp.prototype.test.bind(new RegExp(`${iunreserved}|${subDelims}|[:@/?]`, "u")); /** @type (isAllowed: (value: string) => boolean) => (segment: string) => string */ const normalizeQuery = (isAllowed) => (query) => query.replaceAll(percentEncoded, percentEncodedToChar(isAllowed)); // API /** @type API.isUri */ export const isUri = RegExp.prototype.test.bind(new RegExp(uri)); /** @type API.isUriReference */ export const isUriReference = RegExp.prototype.test.bind(new RegExp(uriReference)); /** @type API.isAbsoluteUri */ export const isAbsoluteUri = RegExp.prototype.test.bind(new RegExp(absoluteUri)); /** @type API.isIri */ export const isIri = RegExp.prototype.test.bind(new RegExp(iri, "u")); /** @type API.isIriReference */ export const isIriReference = RegExp.prototype.test.bind(new RegExp(iriReference, "u")); /** @type API.isAbsoluteIri */ export const isAbsoluteIri = RegExp.prototype.test.bind(new RegExp(absoluteIri, "u")); /** * @type (pattern: RegExp, type: string) => (value: string) => any */ const createParser = (pattern, type) => (value) => { const match = pattern.exec(value); if (match === null) { throw Error(`Invalid ${type}: ${value}`); } const groups = /** @type Record<string, string> */ (match.groups); if (groups.authority === undefined) { groups.path = groups.path2; } delete groups.path2; return groups; }; /** @type API.parseUri */ export const parseUri = createParser(new RegExp(uri), "URI"); /** @type API.parseUriReference */ export const parseUriReference = createParser(new RegExp(uriReference), "URI-reference"); /** @type API.parseAbsoluteUri */ export const parseAbsoluteUri = createParser(new RegExp(absoluteUri), "absolute-URI"); /** @type API.parseIri */ export const parseIri = createParser(new RegExp(iri, "u"), "IRI"); /** @type API.parseIriReference */ export const parseIriReference = createParser(new RegExp(iriReference, "u"), "IRI-reference"); /** @type API.parseAbsoluteIri */ export const parseAbsoluteIri = createParser(new RegExp(absoluteIri, "u"), "absolute-IRI"); /** @type Record<string, Strategy> */ const strategies = { uri: { parseAbsolute: parseAbsoluteUri, parseReference: parseUriReference, parse: parseUri, normalizePath: normalizePath(isAllowedUnescapedInPath), normalizeQuery: normalizeQuery(isAllowedUnescapedInQuery), normalizeFragment: normalizeQuery(isAllowedUnescapedInQuery) }, iri: { parseAbsolute: parseAbsoluteIri, parseReference: parseIriReference, parse: parseIri, normalizePath: normalizePath(isAllowedUnescapedInIPath), normalizeQuery: normalizeQuery(isAllowedUnescapedInIQuery), normalizeFragment: normalizeQuery(isAllowedUnescapedInIQuery) } }; /** @type (strategy: Strategy) => (identifier: string) => string */ const toAbsolute = (strategy) => (identifier) => { const components = strategy.parse(identifier); delete components.fragment; return composeIdentifier(strategy, components); }; /** @type API.toAbsoluteUri */ export const toAbsoluteUri = toAbsolute(strategies.uri); /** @type API.toAbsoluteIri */ export const toAbsoluteIri = toAbsolute(strategies.iri); /** @type (strategy: Strategy) => (identifier: string) => string */ const normalize = (strategy) => (identifier) => { const components = strategy.parse(identifier); return composeIdentifier(strategy, components); }; /** @type API.normalizeUri */ export const normalizeUri = normalize(strategies.uri); /** @type API.normalizeIri */ export const normalizeIri = normalize(strategies.iri); /** @type API.resolveUri */ export const resolveUri = resolveReference(strategies.uri); /** @type API.resolveIri */ export const resolveIri = resolveReference(strategies.iri); /** @type (strategy: Strategy) => (uri: string, relativeTo: string) => string */ const toRelative = (strategy) => (uri, relativeTo) => { const fromUri = strategy.parseAbsolute(uri); const toUri = strategy.parse(relativeTo); if (toUri.scheme !== fromUri.scheme) { return relativeTo; } if (toUri.authority !== fromUri.authority) { return relativeTo; } let result; if (fromUri.path === toUri.path) { result = ""; } else { const fromSegments = fromUri.path.split("/"); const toSegments = toUri.path.split("/"); let position = 0; while (fromSegments[position] === toSegments[position] && position < fromSegments.length - 1 && position < toSegments.length - 1) { position++; } const segments = []; for (let index = position + 1; index < fromSegments.length; index++) { segments.push(".."); } for (let index = position; index < toSegments.length; index++) { segments.push(toSegments[index]); } result = segments.join("/"); } if (toUri.query !== undefined) { result += `?${toUri.query}`; } if (toUri.fragment !== undefined) { result += `#${toUri.fragment}`; } return result; }; /** @type API.toRelativeUri */ export const toRelativeUri = toRelative(strategies.uri); /** @type API.toRelativeIri */ export const toRelativeIri = toRelative(strategies.iri);