UNPKG

shaka-player

Version:
136 lines (127 loc) 4.38 kB
/*! @license * Copyright 2008 The Closure Library Authors * SPDX-License-Identifier: Apache-2.0 */ /** * @fileoverview Simple utilities for splitting URI strings. * * Uses features of RFC 3986 for parsing/formatting URIs: * http://www.ietf.org/rfc/rfc3986.txt * * @author gboyer@google.com (Garrett Boyer) - The "lightened" design. * @author msamuel@google.com (Mike Samuel) - Domain knowledge and regexes. */ goog.provide('goog.uri.utils'); goog.provide('goog.uri.utils.ComponentIndex'); /** * A regular expression for breaking a URI into its component parts. * * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B * As the "first-match-wins" algorithm is identical to the "greedy" * disambiguation method used by POSIX regular expressions, it is natural and * commonplace to use a regular expression for parsing the potential five * components of a URI reference. * * The following line is the regular expression for breaking-down a * well-formed URI reference into its components. * * <pre> * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * 12 3 4 5 6 7 8 9 * </pre> * * The numbers in the second line above are only to assist readability; they * indicate the reference points for each subexpression (i.e., each paired * parenthesis). We refer to the value matched for subexpression <n> as $<n>. * For example, matching the above expression to * <pre> * http://www.ics.uci.edu/pub/ietf/uri/#Related * </pre> * results in the following subexpression matches: * <pre> * $1 = http: * $2 = http * $3 = //www.ics.uci.edu * $4 = www.ics.uci.edu * $5 = /pub/ietf/uri/ * $6 = <undefined> * $7 = <undefined> * $8 = #Related * $9 = Related * </pre> * where <undefined> indicates that the component is not present, as is the * case for the query component in the above example. Therefore, we can * determine the value of the five components as * <pre> * scheme = $2 * authority = $4 * path = $5 * query = $7 * fragment = $9 * </pre> * * The regular expression has been modified slightly to expose the * userInfo, domain, and port separately from the authority. * The modified version yields * <pre> * $1 = http scheme * $2 = <undefined> userInfo -\ * $3 = www.ics.uci.edu domain | authority * $4 = <undefined> port -/ * $5 = /pub/ietf/uri/ path * $6 = <undefined> query without ? * $7 = Related fragment without # * </pre> * @type {!RegExp} * @private */ goog.uri.utils.splitRe_ = new RegExp( '^' + '(?:' + '([^:/?#.]+)' + // scheme - ignore special characters // used by other URL parts such as :, // ?, /, #, and . ':)?' + '(?://' + '(?:([^/?#]*)@)?' + // userInfo '([^/#?]*?)' + // domain '(?::([0-9]+))?' + // port '(?=[/#?]|$)' + // authority-terminating character ')?' + '([^?#]+)?' + // path '(?:\\?([^#]*))?' + // query '(?:#(.*))?' + // fragment '$'); /** * The index of each URI component in the return value of goog.uri.utils.split. * @enum {number} */ goog.uri.utils.ComponentIndex = { SCHEME: 1, USER_INFO: 2, DOMAIN: 3, PORT: 4, PATH: 5, QUERY_DATA: 6, FRAGMENT: 7 }; /** * Splits a URI into its component parts. * * Each component can be accessed via the component indices; for example: * <pre> * goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA]; * </pre> * * @param {string} uri The URI string to examine. * @return {!Array<string|undefined>} Each component still URI-encoded. * Each component that is present will contain the encoded value, whereas * components that are not present will be undefined or empty, depending * on the browser's regular expression implementation. Never null, since * arbitrary strings may still look like path names. */ goog.uri.utils.split = function(uri) { // See @return comment -- never null. return /** @type {!Array<string|undefined>} */ ( uri.match(goog.uri.utils.splitRe_)); };