@fastly/as-url
Version:
 
432 lines (368 loc) • 14.3 kB
text/typescript
// Copyright 2021 Fastly, Inc.
import { SPECIAL_SCHEMES, throwInvalidUrlError } from "./util";
import { URLProperties } from "./url-properties";
export class URLParser {
static isAbsoluteUrl(url: string): boolean {
if (url.startsWith("//")) {
// Protocol Relative URL
return false;
}
if (url.indexOf(":") > 0) {
return true;
}
return false;
}
static applySchemeOrPathRelativeUrl(
relativeUrl: string,
urlProps: URLProperties
): void {
// We already have our absolute URL when this is called
// So we just need to work off the existing properties
// Figure out our type of relative URL
// https://url.spec.whatwg.org/#relative-url-string
if (relativeUrl.startsWith("//")) {
// Protocol (scheme) relative URLs
let urlAfterAuth = URLParser.parseAuth(
relativeUrl.substring(2),
urlProps
);
let urlAfterHost = URLParser.parseHost(urlAfterAuth, urlProps);
let urlAfterPath = URLParser.parsePath(urlAfterHost, urlProps);
let urlAfterSearch = URLParser.parseSearch(urlAfterPath, urlProps);
URLParser.parseHash(urlAfterSearch, urlProps);
return;
}
// Check if we need to preserve the original pathname
if (relativeUrl.startsWith(".")) {
// Do Nothing
} else {
urlProps.pathname = "";
urlProps.search = "";
urlProps.hash = "";
}
// Must be a path relative URL
URLParser.applyPathRelativeUrl(relativeUrl, urlProps);
}
static applyPathRelativeUrl(
relativeUrl: string,
urlProps: URLProperties
): void {
// We already have our absolute URL when this is called
// So we just need to work off the existing properties
// Handle path navigation in path relative URLs (e.g, ../ or ./) or path absolute urls (e.g, /path/absolute)
let appliedRelativeUrl = urlProps.pathname;
if (relativeUrl.startsWith("/")) {
appliedRelativeUrl = relativeUrl;
} else {
if (!appliedRelativeUrl.endsWith("/") && !relativeUrl.startsWith("/")) {
appliedRelativeUrl += "/" + relativeUrl;
} else {
appliedRelativeUrl += relativeUrl;
}
}
// Ensure our navigation identifiers have the correct slashes
if (appliedRelativeUrl.endsWith(".")) {
appliedRelativeUrl += "/";
}
// Remove any filler navigation (e.g ./)
while (appliedRelativeUrl.includes("/./")) {
appliedRelativeUrl = appliedRelativeUrl.replace("/./", "/");
}
// Do any parent navigation
while (appliedRelativeUrl.includes("/../")) {
let parentDirectoryIndex = appliedRelativeUrl.indexOf("../");
// > 1 because the leading slash will be there
if (parentDirectoryIndex > 1) {
// Remove this directory, and the one before
let parentIndex = appliedRelativeUrl.lastIndexOf(
"/",
parentDirectoryIndex - 2
);
let parentReplaceTerm = appliedRelativeUrl.slice(
parentIndex,
parentDirectoryIndex + 3
);
appliedRelativeUrl = appliedRelativeUrl.replace(parentReplaceTerm, "/");
} else {
throw new Error(
"Relative url " +
relativeUrl +
" cannot be applied to the url " +
urlProps.toString()
);
}
}
relativeUrl = appliedRelativeUrl;
if (!relativeUrl.startsWith("/")) {
relativeUrl = "/" + relativeUrl;
}
// We hit a normal path relative URL (e.g hello/goodbye/)
// Just continue on our current path
let urlAfterHost = relativeUrl + urlProps.search + urlProps.hash;
let urlAfterPath = URLParser.parsePath(urlAfterHost, urlProps);
let urlAfterSearch = URLParser.parseSearch(urlAfterPath, urlProps);
URLParser.parseHash(urlAfterSearch, urlProps);
// Also, if our original relative URL had a trailing slash, we need to re-add that trailing slash
if (relativeUrl.endsWith("/") && !urlProps.pathname.endsWith("/")) {
urlProps.pathname += "/";
}
return;
}
static parseAbsoluteUrl(absoluteUrl: string, urlProps: URLProperties): void {
// For file URLs, we should replace | with :
if (absoluteUrl.startsWith("file:")) {
absoluteUrl = absoluteUrl.replaceAll("|", ":");
}
// Chain our independent parsing functions
let urlAfterProtocol = URLParser.parseProtocol(absoluteUrl, urlProps);
let urlAfterAuth = URLParser.parseAuth(urlAfterProtocol, urlProps);
let urlAfterHost = URLParser.parseHost(urlAfterAuth, urlProps);
// Now we are at the path, let's apply the relative URL on top of our empty path
if (urlAfterHost.length > 0) {
urlProps.pathname = "";
urlProps.search = "";
urlProps.hash = "";
// Lets remove all parent navigation as they are ignored on absolute URLs
while (urlAfterHost.startsWith("/../")) {
urlAfterHost = urlAfterHost.replace("/../", "/");
}
URLParser.applyPathRelativeUrl(urlAfterHost, urlProps);
}
}
// Takes in an absolute URL,
// Apply the URL protocol from the absolute url
// and returns a partial url with everything after the protocol (auth, host, pathname, search, hash).
static parseProtocol(absoluteUrl: string, urlProps: URLProperties): string {
// Get the protocol and remaining URL
let protocolIndex = absoluteUrl.indexOf(":");
// Find where the slashes end after the :
if (protocolIndex > -1) {
urlProps.protocol = absoluteUrl.substring(0, protocolIndex + 1);
// Files need to have :// , and the path starts at the third slash
// Ignore anything in between
if (urlProps.protocol == "file:") {
let absoluteUrlNoProtocol = absoluteUrl.replace(
urlProps.protocol + "//",
""
);
if (absoluteUrlNoProtocol.indexOf("/") > -1) {
return absoluteUrlNoProtocol.substring(
absoluteUrlNoProtocol.indexOf("/")
);
} else {
return "";
}
}
let protocolEndIndex = protocolIndex + 1;
while (
absoluteUrl.charAt(protocolEndIndex) == "/" &&
protocolEndIndex < absoluteUrl.length - 1
) {
protocolEndIndex++;
}
return absoluteUrl.substring(protocolEndIndex);
}
// Did not have a protocol
return absoluteUrl;
}
// Takes in a partial URL without the protocol
// Applies auth from the partial URL (if there is one)
// Returns a partial URL with everything after auth (host, pathname, search, hash).
static parseAuth(urlAfterProtocol: string, urlProps: URLProperties): string {
// Next, try to get a username and password
let authIndex = urlAfterProtocol.indexOf("@");
// This will require finding out the remaining pieces of the url
// We can check for the path, because the host wont neccesariy have a
// '.' for the domain (localhost) or a ':' for the port
let pathIndex = urlAfterProtocol.indexOf("/");
// @ must proceed the path (/) as stated in the W3 URI Spec
if (authIndex > 0 && (pathIndex == -1 || authIndex < pathIndex)) {
let auth = urlAfterProtocol.substring(0, authIndex);
if (auth.includes(":")) {
let authSplit = auth.split(":");
urlProps.username = authSplit[0];
urlProps.password = authSplit[1];
} else {
urlProps.username = auth;
}
// Return the remaining url
return urlAfterProtocol.substring(auth.length + 1);
}
// If there was no auth, just return the url
return urlAfterProtocol;
}
// Takes in a partial URL without the protocol or auth
// Applies the host from the partial url
// Returns a partial URL with everything after the host (pathname, search, hash).
static parseHost(urlAfterAuth: string, urlProps: URLProperties): string {
// Create our return value
let urlAfterHost = "";
// Next let's get the hostname and port
// This will require finding out the remaining pieces of the url
let hostnameAndPort = "";
let pathIndex = urlAfterAuth.indexOf("/");
let searchIndex = urlAfterAuth.indexOf("?");
let hashIndex = urlAfterAuth.indexOf("#");
if (pathIndex > -1) {
hostnameAndPort = urlAfterAuth.substring(0, pathIndex);
urlAfterHost = urlAfterAuth.substring(pathIndex);
} else if (searchIndex > -1) {
hostnameAndPort = urlAfterAuth.substring(0, searchIndex);
urlAfterHost = urlAfterAuth.substring(searchIndex);
} else if (hashIndex > -1) {
hostnameAndPort = urlAfterAuth.substring(0, hashIndex);
urlAfterHost = urlAfterAuth.substring(hashIndex);
} else {
hostnameAndPort = urlAfterAuth;
urlAfterHost = "";
}
let hostname = "";
let port = "";
if (hostnameAndPort.includes("[")) {
// This could be an ipv6 address
// https://url.spec.whatwg.org/#host-writing
if (!hostnameAndPort.startsWith("[") || !hostnameAndPort.endsWith("]")) {
throwInvalidUrlError();
}
let splitAddress = hostnameAndPort.split(":");
if (splitAddress.length != 8) {
throwInvalidUrlError();
}
hostname = hostnameAndPort;
} else if (hostnameAndPort.includes(":")) {
let hostnameAndPortSplit = hostnameAndPort.split(":");
hostname = hostnameAndPortSplit[0];
// The port must be a number, so try to parse it
let portOrNaN = F32.parseInt(hostnameAndPortSplit[1], 10);
if (isNaN(portOrNaN) || portOrNaN <= 0 || portOrNaN >= 65536) {
throwInvalidUrlError();
}
port = I32.parseInt(hostnameAndPortSplit[1], 10).toString();
} else {
hostname = hostnameAndPort;
}
// Ensure that port is null if the port is a default port per the protocol
// https://url.spec.whatwg.org/#default-port
if (
port.length > 0 &&
((urlProps.protocol == "ftp:" && port == "22") ||
(urlProps.protocol == "http:" && port == "80") ||
(urlProps.protocol == "https:" && port == "443") ||
(urlProps.protocol == "ws:" && port == "80") ||
(urlProps.protocol == "wss:" && port == "443"))
) {
port = "";
}
urlProps.hostname = hostname;
urlProps.port = port;
// return our resulting URL
return urlAfterHost;
}
// Takes in a parital URL without the protocol, auth, or host
// Applies the path from the partial url
// Returns a partial URL with everything after the path (search, hash).
static parsePath(urlAfterHost: string, urlProps: URLProperties): string {
if (urlAfterHost.length == 0) {
return "";
}
let pathIndex = urlAfterHost.indexOf("/");
let searchIndex = urlAfterHost.indexOf("?");
let hashIndex = urlAfterHost.indexOf("#");
if (pathIndex > -1) {
// Get the pathname
if (searchIndex > -1) {
urlProps.pathname = urlAfterHost.substring(0, searchIndex);
} else if (hashIndex > -1) {
urlProps.pathname = urlAfterHost.substring(0, hashIndex);
} else {
urlProps.pathname = urlAfterHost;
}
// Remove any trailing slash, if the character before is not a slash
if (
urlProps.pathname.endsWith("/") &&
!urlProps.pathname.endsWith("//")
) {
urlProps.pathname = urlProps.pathname.slice(
0,
urlProps.pathname.length - 1
);
}
// Return the reamaining string without the path
if (searchIndex > -1) {
return urlAfterHost.substring(searchIndex);
} else if (hashIndex > -1) {
return urlAfterHost.substring(hashIndex);
} else {
return "";
}
}
// Just return the string if there was no path
return urlAfterHost;
}
// Takes in a partial URL without the protocol, auth, host, or path
// Applies the search from the partial url
// Returns a partial URL with everything after the search (hash).
static parseSearch(urlAfterPath: string, urlProps: URLProperties): string {
if (urlAfterPath.length == 0) {
return "";
}
let searchIndex = urlAfterPath.indexOf("?");
let hashIndex = urlAfterPath.indexOf("#");
if (searchIndex > -1) {
if (hashIndex > -1) {
urlProps.search = urlAfterPath.substring(0, hashIndex);
return urlAfterPath.substring(hashIndex);
} else {
urlProps.search = urlAfterPath;
return "";
}
}
// Just return the string if there was no search
return urlAfterPath;
}
// Takes in a partial URL without the protocol, auth, host, path, or search (Only a hash)
// Applies the hash from the partial url
static parseHash(urlAfterSearch: string, urlProps: URLProperties): void {
let hashIndex = urlAfterSearch.indexOf("#");
if (urlAfterSearch.length > 0 && hashIndex > -1) {
urlProps.hash = urlAfterSearch.substring(hashIndex);
}
}
static validateUrl(urlProps: URLProperties): void {
// Hostname checks
if (urlProps.hostname.includes(".")) {
// Check if the hostname is a domain or IPv4 address
if (!SPECIAL_SCHEMES.includes(urlProps.protocol)) {
// This is an invlid URL according to the spec:
// https://url.spec.whatwg.org/#url-representation
// However, this is supported by node and chrome:
// https://nodejs.org/api/url.html#url_special_schemes
// Do Nothing, instead of throwing the error below:
// throw new Error("Failed to construct 'URL': Invalid URL");
}
} else if (urlProps.hostname == "") {
// Check for empty host
if (
SPECIAL_SCHEMES.includes(urlProps.protocol) &&
urlProps.protocol != "file:"
) {
throwInvalidUrlError();
}
} else {
// Must be an opaque host (e.g localhost), or ipv6
if (urlProps.hostname.includes("[")) {
// This url is ipv6, we are good!
} else if (
SPECIAL_SCHEMES.includes(urlProps.protocol) &&
urlProps.protocol != "http:" &&
urlProps.protocol != "https:"
) {
// We do not want to allow special schemes for opaque hosts,
// but for opaque hosts like localhost, http: and https: is valid in v8.
// Thus, we should allow those, but not other special schemes.
throwInvalidUrlError();
}
}
// The Url is valid!
}
}