linkifyjs
Version:
Find URLs, email addresses, #hashtags and @mentions in plain-text strings, then convert them into HTML <a> links.
1,427 lines (1,333 loc) • 66.1 kB
JavaScript
var linkify = (function (exports) {
'use strict';
// THIS FILE IS AUTOMATICALLY GENERATED DO NOT EDIT DIRECTLY
// See update-tlds.js for encoding/decoding format
// https://data.iana.org/TLD/tlds-alpha-by-domain.txt
const encodedTlds = 'aaa1rp3bb0ott3vie4c1le2ogado5udhabi7c0ademy5centure6ountant0s9o1tor4d0s1ult4e0g1ro2tna4f0l1rica5g0akhan5ency5i0g1rbus3force5tel5kdn3l0ibaba4pay4lfinanz6state5y2sace3tom5m0azon4ericanexpress7family11x2fam3ica3sterdam8nalytics7droid5quan4z2o0l2partments8p0le4q0uarelle8r0ab1mco4chi3my2pa2t0e3s0da2ia2sociates9t0hleta5torney7u0ction5di0ble3o3spost5thor3o0s4w0s2x0a2z0ure5ba0by2idu3namex4d1k2r0celona5laycard4s5efoot5gains6seball5ketball8uhaus5yern5b0c1t1va3cg1n2d1e0ats2uty4er2ntley5rlin4st0buy5t2f1g1h0arti5i0ble3d1ke2ng0o3o1z2j1lack0friday9ockbuster8g1omberg7ue3m0s1w2n0pparibas9o0ats3ehringer8fa2m1nd2o0k0ing5sch2tik2on4t1utique6x2r0adesco6idgestone9oadway5ker3ther5ussels7s1t1uild0ers6siness6y1zz3v1w1y1z0h3ca0b1fe2l0l1vinklein9m0era3p2non3petown5ital0one8r0avan4ds2e0er0s4s2sa1e1h1ino4t0ering5holic7ba1n1re3c1d1enter4o1rn3f0a1d2g1h0anel2nel4rity4se2t2eap3intai5ristmas6ome4urch5i0priani6rcle4sco3tadel4i0c2y3k1l0aims4eaning6ick2nic1que6othing5ud3ub0med6m1n1o0ach3des3ffee4llege4ogne5m0mbank4unity6pany2re3uter5sec4ndos3struction8ulting7tact3ractors9oking4l1p2rsica5untry4pon0s4rses6pa2r0edit0card4union9icket5own3s1uise0s6u0isinella9v1w1x1y0mru3ou3z2dad1nce3ta1e1ing3sun4y2clk3ds2e0al0er2s3gree4livery5l1oitte5ta3mocrat6ntal2ist5si0gn4v2hl2iamonds6et2gital5rect0ory7scount3ver5h2y2j1k1m1np2o0cs1tor4g1mains5t1wnload7rive4tv2ubai3nlop4pont4rban5vag2r2z2earth3t2c0o2deka3u0cation8e1g1mail3erck5nergy4gineer0ing9terprises10pson4quipment8r0icsson6ni3s0q1tate5t1u0rovision8s2vents5xchange6pert3osed4ress5traspace10fage2il1rwinds6th3mily4n0s2rm0ers5shion4t3edex3edback6rrari3ero6i0delity5o2lm2nal1nce1ial7re0stone6mdale6sh0ing5t0ness6j1k1lickr3ghts4r2orist4wers5y2m1o0o0d1tball6rd1ex2sale4um3undation8x2r0ee1senius7l1ogans4ntier7tr2ujitsu5n0d2rniture7tbol5yi3ga0l0lery3o1up4me0s3p1rden4y2b0iz3d0n2e0a1nt0ing5orge5f1g0ee3h1i0ft0s3ves2ing5l0ass3e1obal2o4m0ail3bh2o1x2n1odaddy5ld0point6f2o0dyear5g0le4p1t1v2p1q1r0ainger5phics5tis4een3ipe3ocery4up4s1t1u0cci3ge2ide2tars5ru3w1y2hair2mburg5ngout5us3bo2dfc0bank7ealth0care8lp1sinki6re1mes5iphop4samitsu7tachi5v2k0t2m1n1ockey4ldings5iday5medepot5goods5s0ense7nda3rse3spital5t0ing5t0els3mail5use3w2r1sbc3t1u0ghes5yatt3undai7ibm2cbc2e1u2d1e0ee3fm2kano4l1m0amat4db2mo0bilien9n0c1dustries8finiti5o2g1k1stitute6urance4e4t0ernational10uit4vestments10o1piranga7q1r0ish4s0maili5t0anbul7t0au2v3jaguar4va3cb2e0ep2tzt3welry6io2ll2m0p2nj2o0bs1urg4t1y2p0morgan6rs3uegos4niper7kaufen5ddi3e0rryhotels6logistics9properties14fh2g1h1i0a1ds2m1ndle4tchen5wi3m1n1oeln3matsu5sher5p0mg2n2r0d1ed3uokgroup8w1y0oto4z2la0caixa5mborghini8er3ncaster6d0rover6xess5salle5t0ino3robe5w0yer5b1c1ds2ease3clerc5frak4gal2o2xus4gbt3i0dl2fe0insurance9style7ghting6ke2lly3mited4o2ncoln4k2psy3ve1ing5k1lc1p2oan0s3cker3us3l1ndon4tte1o3ve3pl0financial11r1s1t0d0a3u0ndbeck6xe1ury5v1y2ma0drid4if1son4keup4n0agement7go3p1rket0ing3s4riott5shalls7ttel5ba2c0kinsey7d1e0d0ia3et2lbourne7me1orial6n0u2rckmsd7g1h1iami3crosoft7l1ni1t2t0subishi9k1l0b1s2m0a2n1o0bi0le4da2e1i1m1nash3ey2ster5rmon3tgage6scow4to0rcycles9v0ie4p1q1r1s0d2t0n1r2u0seum3ic4v1w1x1y1z2na0b1goya4me2vy3ba2c1e0c1t0bank4flix4work5ustar5w0s2xt0direct7us4f0l2g0o2hk2i0co2ke1on3nja3ssan1y5l1o0kia3rton4w0ruz3tv4p1r0a1w2tt2u1yc2z2obi1server7ffice5kinawa6layan0group9lo3m0ega4ne1g1l0ine5oo2pen3racle3nge4g0anic5igins6saka4tsuka4t2vh3pa0ge2nasonic7ris2s1tners4s1y3y2ccw3e0t2f0izer5g1h0armacy6d1ilips5one2to0graphy6s4ysio5ics1tet2ures6d1n0g1k2oneer5zza4k1l0ace2y0station9umbing5s3m1n0c2ohl2ker3litie5rn2st3r0america6xi3ess3ime3o0d0uctions8f1gressive8mo2perties3y5tection8u0dential9s1t1ub2w0c2y2qa1pon3uebec3st5racing4dio4e0ad1lestate6tor2y4cipes5d0stone5umbrella9hab3ise0n3t2liance6n0t0als5pair3ort3ublican8st0aurant8view0s5xroth6ich0ardli6oh3l1o1p2o0cks3deo3gers4om3s0vp3u0gby3hr2n2w0e2yukyu6sa0arland6fe0ty4kura4le1on3msclub4ung5ndvik0coromant12ofi4p1rl2s1ve2xo3b0i1s2c0b1haeffler7midt4olarships8ol3ule3warz5ience5ot3d1e0arch3t2cure1ity6ek2lect4ner3rvices6ven3w1x0y3fr2g1h0angrila6rp3ell3ia1ksha5oes2p0ping5uji3w3i0lk2na1gles5te3j1k0i0n2y0pe4l0ing4m0art3ile4n0cf3o0ccer3ial4ftbank4ware6hu2lar2utions7ng1y2y2pa0ce3ort2t3r0l2s1t0ada2ples4r1tebank4farm7c0group6ockholm6rage3e3ream4udio2y3yle4u0cks3pplies3y2ort5rf1gery5zuki5v1watch4iss4x1y0dney4stems6z2tab1ipei4lk2obao4rget4tamotors6r2too4x0i3c0i2d0k2eam2ch0nology8l1masek5nnis4va3f1g1h0d1eater2re6iaa2ckets5enda4ps2res2ol4j0maxx4x2k0maxx5l1m0all4n1o0day3kyo3ols3p1ray3shiba5tal3urs3wn2yota3s3r0ade1ing4ining5vel0ers0insurance16ust3v2t1ube2i1nes3shu4v0s2w1z2ua1bank3s2g1k1nicom3versity8o2ol2ps2s1y1z2va0cations7na1guard7c1e0gas3ntures6risign5mögensberater2ung14sicherung10t2g1i0ajes4deo3g1king4llas4n1p1rgin4sa1ion4va1o3laanderen9n1odka3lvo3te1ing3o2yage5u2wales2mart4ter4ng0gou5tch0es6eather0channel12bcam3er2site5d0ding5ibo2r3f1hoswho6ien2ki2lliamhill9n0dows4e1ners6me2olterskluwer11odside6rk0s2ld3w2s1tc1f3xbox3erox4ihuan4n2xx2yz3yachts4hoo3maxun5ndex5e1odobashi7ga2kohama6u0tube6t1un3za0ppos4ra3ero3ip2m1one3uerich6w2';
// Internationalized domain names containing non-ASCII
const encodedUtlds = 'ελ1υ2бг1ел3дети4ею2католик6ом3мкд2он1сква6онлайн5рг3рус2ф2сайт3рб3укр3қаз3հայ3ישראל5קום3ابوظبي5رامكو5لاردن4بحرين5جزائر5سعودية6عليان5مغرب5مارات5یران5بارت2زار4يتك3ھارت5تونس4سودان3رية5شبكة4عراق2ب2مان4فلسطين6قطر3كاثوليك6وم3مصر2ليسيا5وريتانيا7قع4همراه5پاکستان7ڀارت4कॉम3नेट3भारत0म्3ोत5संगठन5বাংলা5ভারত2ৰত4ਭਾਰਤ4ભારત4ଭାରତ4இந்தியா6லங்கை6சிங்கப்பூர்11భారత్5ಭಾರತ4ഭാരതം5ලංකා4คอม3ไทย3ລາວ3გე2みんな3アマゾン4クラウド4グーグル4コム2ストア3セール3ファッション6ポイント4世界2中信1国1國1文网3亚马逊3企业2佛山2信息2健康2八卦2公司1益2台湾1灣2商城1店1标2嘉里0大酒店5在线2大拿2天主教3娱乐2家電2广东2微博2慈善2我爱你3手机2招聘2政务1府2新加坡2闻2时尚2書籍2机构2淡马锡3游戏2澳門2点看2移动2组织机构4网址1店1站1络2联通2谷歌2购物2通販2集团2電訊盈科4飞利浦3食品2餐厅2香格里拉3港2닷넷1컴2삼성2한국2';
/**
* @template A
* @template B
* @param {A} target
* @param {B} properties
* @return {A & B}
*/
const assign = (target, properties) => {
for (const key in properties) {
target[key] = properties[key];
}
return target;
};
/**
* Finite State Machine generation utilities
*/
/**
* @template T
* @typedef {{ [group: string]: T[] }} Collections
*/
/**
* @typedef {{ [group: string]: true }} Flags
*/
// Keys in scanner Collections instances
const numeric = 'numeric';
const ascii = 'ascii';
const alpha = 'alpha';
const asciinumeric = 'asciinumeric';
const alphanumeric = 'alphanumeric';
const domain = 'domain';
const emoji = 'emoji';
const scheme = 'scheme';
const slashscheme = 'slashscheme';
const whitespace = 'whitespace';
/**
* @template T
* @param {string} name
* @param {Collections<T>} groups to register in
* @returns {T[]} Current list of tokens in the given collection
*/
function registerGroup(name, groups) {
if (!(name in groups)) {
groups[name] = [];
}
return groups[name];
}
/**
* @template T
* @param {T} t token to add
* @param {Collections<T>} groups
* @param {Flags} flags
*/
function addToGroups(t, flags, groups) {
if (flags[numeric]) {
flags[asciinumeric] = true;
flags[alphanumeric] = true;
}
if (flags[ascii]) {
flags[asciinumeric] = true;
flags[alpha] = true;
}
if (flags[asciinumeric]) {
flags[alphanumeric] = true;
}
if (flags[alpha]) {
flags[alphanumeric] = true;
}
if (flags[alphanumeric]) {
flags[domain] = true;
}
if (flags[emoji]) {
flags[domain] = true;
}
for (const k in flags) {
const group = registerGroup(k, groups);
if (group.indexOf(t) < 0) {
group.push(t);
}
}
}
/**
* @template T
* @param {T} t token to check
* @param {Collections<T>} groups
* @returns {Flags} group flags that contain this token
*/
function flagsForToken(t, groups) {
const result = {};
for (const c in groups) {
if (groups[c].indexOf(t) >= 0) {
result[c] = true;
}
}
return result;
}
/**
* @template T
* @typedef {null | T } Transition
*/
/**
* Define a basic state machine state. j is the list of character transitions,
* jr is the list of regex-match transitions, jd is the default state to
* transition to t is the accepting token type, if any. If this is the terminal
* state, then it does not emit a token.
*
* The template type T represents the type of the token this state accepts. This
* should be a string (such as of the token exports in `text.js`) or a
* MultiToken subclass (from `multi.js`)
*
* @template T
* @param {T} [token] Token that this state emits
*/
function State(token = null) {
// this.n = null; // DEBUG: State name
/** @type {{ [input: string]: State<T> }} j */
this.j = {}; // IMPLEMENTATION 1
// this.j = []; // IMPLEMENTATION 2
/** @type {[RegExp, State<T>][]} jr */
this.jr = [];
/** @type {?State<T>} jd */
this.jd = null;
/** @type {?T} t */
this.t = token;
}
/**
* Scanner token groups
* @type Collections<string>
*/
State.groups = {};
State.prototype = {
accepts() {
return !!this.t;
},
/**
* Follow an existing transition from the given input to the next state.
* Does not mutate.
* @param {string} input character or token type to transition on
* @returns {?State<T>} the next state, if any
*/
go(input) {
const state = this;
const nextState = state.j[input];
if (nextState) {
return nextState;
}
for (let i = 0; i < state.jr.length; i++) {
const regex = state.jr[i][0];
const nextState = state.jr[i][1]; // note: might be empty to prevent default jump
if (nextState && regex.test(input)) {
return nextState;
}
}
// Nowhere left to jump! Return default, if any
return state.jd;
},
/**
* Whether the state has a transition for the given input. Set the second
* argument to true to only look for an exact match (and not a default or
* regular-expression-based transition)
* @param {string} input
* @param {boolean} exactOnly
*/
has(input, exactOnly = false) {
return exactOnly ? input in this.j : !!this.go(input);
},
/**
* Short for "transition all"; create a transition from the array of items
* in the given list to the same final resulting state.
* @param {string | string[]} inputs Group of inputs to transition on
* @param {Transition<T> | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
*/
ta(inputs, next, flags, groups) {
for (let i = 0; i < inputs.length; i++) {
this.tt(inputs[i], next, flags, groups);
}
},
/**
* Short for "take regexp transition"; defines a transition for this state
* when it encounters a token which matches the given regular expression
* @param {RegExp} regexp Regular expression transition (populate first)
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
* @returns {State<T>} taken after the given input
*/
tr(regexp, next, flags, groups) {
groups = groups || State.groups;
let nextState;
if (next && next.j) {
nextState = next;
} else {
// Token with maybe token groups
nextState = new State(next);
if (flags && groups) {
addToGroups(next, flags, groups);
}
}
this.jr.push([regexp, nextState]);
return nextState;
},
/**
* Short for "take transitions", will take as many sequential transitions as
* the length of the given input and returns the
* resulting final state.
* @param {string | string[]} input
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
* @returns {State<T>} taken after the given input
*/
ts(input, next, flags, groups) {
let state = this;
const len = input.length;
if (!len) {
return state;
}
for (let i = 0; i < len - 1; i++) {
state = state.tt(input[i]);
}
return state.tt(input[len - 1], next, flags, groups);
},
/**
* Short for "take transition", this is a method for building/working with
* state machines.
*
* If a state already exists for the given input, returns it.
*
* If a token is specified, that state will emit that token when reached by
* the linkify engine.
*
* If no state exists, it will be initialized with some default transitions
* that resemble existing default transitions.
*
* If a state is given for the second argument, that state will be
* transitioned to on the given input regardless of what that input
* previously did.
*
* Specify a token group flags to define groups that this token belongs to.
* The token will be added to corresponding entires in the given groups
* object.
*
* @param {string} input character, token type to transition on
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of groups
* @returns {State<T>} taken after the given input
*/
tt(input, next, flags, groups) {
groups = groups || State.groups;
const state = this;
// Check if existing state given, just a basic transition
if (next && next.j) {
state.j[input] = next;
return next;
}
const t = next;
// Take the transition with the usual default mechanisms and use that as
// a template for creating the next state
let nextState,
templateState = state.go(input);
if (templateState) {
nextState = new State();
assign(nextState.j, templateState.j);
nextState.jr.push.apply(nextState.jr, templateState.jr);
nextState.jd = templateState.jd;
nextState.t = templateState.t;
} else {
nextState = new State();
}
if (t) {
// Ensure newly token is in the same groups as the old token
if (groups) {
if (nextState.t && typeof nextState.t === 'string') {
const allFlags = assign(flagsForToken(nextState.t, groups), flags);
addToGroups(t, allFlags, groups);
} else if (flags) {
addToGroups(t, flags, groups);
}
}
nextState.t = t; // overwrite anything that was previously there
}
state.j[input] = nextState;
return nextState;
}
};
// Helper functions to improve minification (not exported outside linkifyjs module)
/**
* @template T
* @param {State<T>} state
* @param {string | string[]} input
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const ta = (state, input, next, flags, groups) => state.ta(input, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {RegExp} regexp
* @param {T | State<T>} [next]
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const tr = (state, regexp, next, flags, groups) => state.tr(regexp, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {string | string[]} input
* @param {T | State<T>} [next]
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const ts = (state, input, next, flags, groups) => state.ts(input, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {string} input
* @param {T | State<T>} [next]
* @param {Collections<T>} [groups]
* @param {Flags} [flags]
*/
const tt = (state, input, next, flags, groups) => state.tt(input, next, flags, groups);
/******************************************************************************
Text Tokens
Identifiers for token outputs from the regexp scanner
******************************************************************************/
// A valid web domain token
const WORD = 'WORD'; // only contains a-z
const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN
const ASCIINUMERICAL = 'ASCIINUMERICAL'; // contains a-z, 0-9
const ALPHANUMERICAL = 'ALPHANUMERICAL'; // contains numbers and letters other than a-z, used for IDN
// Special case of word
const LOCALHOST = 'LOCALHOST';
// Valid top-level domain, special case of WORD (see tlds.js)
const TLD = 'TLD';
// Valid IDN TLD, special case of UWORD (see tlds.js)
const UTLD = 'UTLD';
// The scheme portion of a web URI protocol. Supported types include: `mailto`,
// `file`, and user-defined custom protocols. Limited to schemes that contain
// only letters
const SCHEME = 'SCHEME';
// Similar to SCHEME, except makes distinction for schemes that must always be
// followed by `://`, not just `:`. Supported types include `http`, `https`,
// `ftp`, `ftps`
const SLASH_SCHEME = 'SLASH_SCHEME';
// Any sequence of digits 0-9
const NUM = 'NUM';
// Any number of consecutive whitespace characters that are not newline
const WS = 'WS';
// New line (unix style)
const NL = 'NL'; // \n
// Opening/closing bracket classes
// TODO: Rename OPEN -> LEFT and CLOSE -> RIGHT in v5 to fit with Unicode names
// Also rename angle brackes to LESSTHAN and GREATER THAN
const OPENBRACE = 'OPENBRACE'; // {
const CLOSEBRACE = 'CLOSEBRACE'; // }
const OPENBRACKET = 'OPENBRACKET'; // [
const CLOSEBRACKET = 'CLOSEBRACKET'; // ]
const OPENPAREN = 'OPENPAREN'; // (
const CLOSEPAREN = 'CLOSEPAREN'; // )
const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // <
const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // >
const FULLWIDTHLEFTPAREN = 'FULLWIDTHLEFTPAREN'; // (
const FULLWIDTHRIGHTPAREN = 'FULLWIDTHRIGHTPAREN'; // )
const LEFTCORNERBRACKET = 'LEFTCORNERBRACKET'; // 「
const RIGHTCORNERBRACKET = 'RIGHTCORNERBRACKET'; // 」
const LEFTWHITECORNERBRACKET = 'LEFTWHITECORNERBRACKET'; // 『
const RIGHTWHITECORNERBRACKET = 'RIGHTWHITECORNERBRACKET'; // 』
const FULLWIDTHLESSTHAN = 'FULLWIDTHLESSTHAN'; // <
const FULLWIDTHGREATERTHAN = 'FULLWIDTHGREATERTHAN'; // >
// Various symbols
const AMPERSAND = 'AMPERSAND'; // &
const APOSTROPHE = 'APOSTROPHE'; // '
const ASTERISK = 'ASTERISK'; // *
const AT = 'AT'; // @
const BACKSLASH = 'BACKSLASH'; // \
const BACKTICK = 'BACKTICK'; // `
const CARET = 'CARET'; // ^
const COLON = 'COLON'; // :
const COMMA = 'COMMA'; // ,
const DOLLAR = 'DOLLAR'; // $
const DOT = 'DOT'; // .
const EQUALS = 'EQUALS'; // =
const EXCLAMATION = 'EXCLAMATION'; // !
const HYPHEN = 'HYPHEN'; // -
const PERCENT = 'PERCENT'; // %
const PIPE = 'PIPE'; // |
const PLUS = 'PLUS'; // +
const POUND = 'POUND'; // #
const QUERY = 'QUERY'; // ?
const QUOTE = 'QUOTE'; // "
const FULLWIDTHMIDDLEDOT = 'FULLWIDTHMIDDLEDOT'; // ・
const SEMI = 'SEMI'; // ;
const SLASH = 'SLASH'; // /
const TILDE = 'TILDE'; // ~
const UNDERSCORE = 'UNDERSCORE'; // _
// Emoji symbol
const EMOJI$1 = 'EMOJI';
// Default token - anything that is not one of the above
const SYM = 'SYM';
var tk = /*#__PURE__*/Object.freeze({
__proto__: null,
WORD: WORD,
UWORD: UWORD,
ASCIINUMERICAL: ASCIINUMERICAL,
ALPHANUMERICAL: ALPHANUMERICAL,
LOCALHOST: LOCALHOST,
TLD: TLD,
UTLD: UTLD,
SCHEME: SCHEME,
SLASH_SCHEME: SLASH_SCHEME,
NUM: NUM,
WS: WS,
NL: NL,
OPENBRACE: OPENBRACE,
CLOSEBRACE: CLOSEBRACE,
OPENBRACKET: OPENBRACKET,
CLOSEBRACKET: CLOSEBRACKET,
OPENPAREN: OPENPAREN,
CLOSEPAREN: CLOSEPAREN,
OPENANGLEBRACKET: OPENANGLEBRACKET,
CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
FULLWIDTHLEFTPAREN: FULLWIDTHLEFTPAREN,
FULLWIDTHRIGHTPAREN: FULLWIDTHRIGHTPAREN,
LEFTCORNERBRACKET: LEFTCORNERBRACKET,
RIGHTCORNERBRACKET: RIGHTCORNERBRACKET,
LEFTWHITECORNERBRACKET: LEFTWHITECORNERBRACKET,
RIGHTWHITECORNERBRACKET: RIGHTWHITECORNERBRACKET,
FULLWIDTHLESSTHAN: FULLWIDTHLESSTHAN,
FULLWIDTHGREATERTHAN: FULLWIDTHGREATERTHAN,
AMPERSAND: AMPERSAND,
APOSTROPHE: APOSTROPHE,
ASTERISK: ASTERISK,
AT: AT,
BACKSLASH: BACKSLASH,
BACKTICK: BACKTICK,
CARET: CARET,
COLON: COLON,
COMMA: COMMA,
DOLLAR: DOLLAR,
DOT: DOT,
EQUALS: EQUALS,
EXCLAMATION: EXCLAMATION,
HYPHEN: HYPHEN,
PERCENT: PERCENT,
PIPE: PIPE,
PLUS: PLUS,
POUND: POUND,
QUERY: QUERY,
QUOTE: QUOTE,
FULLWIDTHMIDDLEDOT: FULLWIDTHMIDDLEDOT,
SEMI: SEMI,
SLASH: SLASH,
TILDE: TILDE,
UNDERSCORE: UNDERSCORE,
EMOJI: EMOJI$1,
SYM: SYM
});
// Note that these two Unicode ones expand into a really big one with Babel
const ASCII_LETTER = /[a-z]/;
const LETTER = /\p{L}/u; // Any Unicode character with letter data type
const EMOJI = /\p{Emoji}/u; // Any Unicode emoji character
const EMOJI_VARIATION$1 = /\ufe0f/;
const DIGIT = /\d/;
const SPACE = /\s/;
var regexp = /*#__PURE__*/Object.freeze({
__proto__: null,
ASCII_LETTER: ASCII_LETTER,
LETTER: LETTER,
EMOJI: EMOJI,
EMOJI_VARIATION: EMOJI_VARIATION$1,
DIGIT: DIGIT,
SPACE: SPACE
});
/**
The scanner provides an interface that takes a string of text as input, and
outputs an array of tokens instances that can be used for easy URL parsing.
*/
const CR = '\r'; // carriage-return character
const LF = '\n'; // line-feed character
const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others
const EMOJI_JOINER = '\u200d'; // zero-width joiner
const OBJECT_REPLACEMENT = '\ufffc'; // whitespace placeholder that sometimes appears in rich text editors
let tlds = null,
utlds = null; // don't change so only have to be computed once
/**
* Scanner output token:
* - `t` is the token name (e.g., 'NUM', 'EMOJI', 'TLD')
* - `v` is the value of the token (e.g., '123', '❤️', 'com')
* - `s` is the start index of the token in the original string
* - `e` is the end index of the token in the original string
* @typedef {{t: string, v: string, s: number, e: number}} Token
*/
/**
* @template T
* @typedef {{ [collection: string]: T[] }} Collections
*/
/**
* Initialize the scanner character-based state machine for the given start
* state
* @param {[string, boolean][]} customSchemes List of custom schemes, where each
* item is a length-2 tuple with the first element set to the string scheme, and
* the second element set to `true` if the `://` after the scheme is optional
*/
function init$2(customSchemes = []) {
// Frequently used states (name argument removed during minification)
/** @type Collections<string> */
const groups = {}; // of tokens
State.groups = groups;
/** @type State<string> */
const Start = new State();
if (tlds == null) {
tlds = decodeTlds(encodedTlds);
}
if (utlds == null) {
utlds = decodeTlds(encodedUtlds);
}
// States for special URL symbols that accept immediately after start
tt(Start, "'", APOSTROPHE);
tt(Start, '{', OPENBRACE);
tt(Start, '}', CLOSEBRACE);
tt(Start, '[', OPENBRACKET);
tt(Start, ']', CLOSEBRACKET);
tt(Start, '(', OPENPAREN);
tt(Start, ')', CLOSEPAREN);
tt(Start, '<', OPENANGLEBRACKET);
tt(Start, '>', CLOSEANGLEBRACKET);
tt(Start, '(', FULLWIDTHLEFTPAREN);
tt(Start, ')', FULLWIDTHRIGHTPAREN);
tt(Start, '「', LEFTCORNERBRACKET);
tt(Start, '」', RIGHTCORNERBRACKET);
tt(Start, '『', LEFTWHITECORNERBRACKET);
tt(Start, '』', RIGHTWHITECORNERBRACKET);
tt(Start, '<', FULLWIDTHLESSTHAN);
tt(Start, '>', FULLWIDTHGREATERTHAN);
tt(Start, '&', AMPERSAND);
tt(Start, '*', ASTERISK);
tt(Start, '@', AT);
tt(Start, '`', BACKTICK);
tt(Start, '^', CARET);
tt(Start, ':', COLON);
tt(Start, ',', COMMA);
tt(Start, '$', DOLLAR);
tt(Start, '.', DOT);
tt(Start, '=', EQUALS);
tt(Start, '!', EXCLAMATION);
tt(Start, '-', HYPHEN);
tt(Start, '%', PERCENT);
tt(Start, '|', PIPE);
tt(Start, '+', PLUS);
tt(Start, '#', POUND);
tt(Start, '?', QUERY);
tt(Start, '"', QUOTE);
tt(Start, '/', SLASH);
tt(Start, ';', SEMI);
tt(Start, '~', TILDE);
tt(Start, '_', UNDERSCORE);
tt(Start, '\\', BACKSLASH);
tt(Start, '・', FULLWIDTHMIDDLEDOT);
const Num = tr(Start, DIGIT, NUM, {
[numeric]: true
});
tr(Num, DIGIT, Num);
const Asciinumeric = tr(Num, ASCII_LETTER, ASCIINUMERICAL, {
[asciinumeric]: true
});
const Alphanumeric = tr(Num, LETTER, ALPHANUMERICAL, {
[alphanumeric]: true
});
// State which emits a word token
const Word = tr(Start, ASCII_LETTER, WORD, {
[ascii]: true
});
tr(Word, DIGIT, Asciinumeric);
tr(Word, ASCII_LETTER, Word);
tr(Asciinumeric, DIGIT, Asciinumeric);
tr(Asciinumeric, ASCII_LETTER, Asciinumeric);
// Same as previous, but specific to non-fsm.ascii alphabet words
const UWord = tr(Start, LETTER, UWORD, {
[alpha]: true
});
tr(UWord, ASCII_LETTER); // Non-accepting
tr(UWord, DIGIT, Alphanumeric);
tr(UWord, LETTER, UWord);
tr(Alphanumeric, DIGIT, Alphanumeric);
tr(Alphanumeric, ASCII_LETTER); // Non-accepting
tr(Alphanumeric, LETTER, Alphanumeric); // Non-accepting
// Whitespace jumps
// Tokens of only non-newline whitespace are arbitrarily long
// If any whitespace except newline, more whitespace!
const Nl = tt(Start, LF, NL, {
[whitespace]: true
});
const Cr = tt(Start, CR, WS, {
[whitespace]: true
});
const Ws = tr(Start, SPACE, WS, {
[whitespace]: true
});
tt(Start, OBJECT_REPLACEMENT, Ws);
tt(Cr, LF, Nl); // \r\n
tt(Cr, OBJECT_REPLACEMENT, Ws);
tr(Cr, SPACE, Ws);
tt(Ws, CR); // non-accepting state to avoid mixing whitespaces
tt(Ws, LF); // non-accepting state to avoid mixing whitespaces
tr(Ws, SPACE, Ws);
tt(Ws, OBJECT_REPLACEMENT, Ws);
// Emoji tokens. They are not grouped by the scanner except in cases where a
// zero-width joiner is present
const Emoji = tr(Start, EMOJI, EMOJI$1, {
[emoji]: true
});
tt(Emoji, '#'); // no transition, emoji regex seems to match #
tr(Emoji, EMOJI, Emoji);
tt(Emoji, EMOJI_VARIATION, Emoji);
// tt(Start, EMOJI_VARIATION, Emoji); // This one is sketchy
const EmojiJoiner = tt(Emoji, EMOJI_JOINER);
tt(EmojiJoiner, '#');
tr(EmojiJoiner, EMOJI, Emoji);
// tt(EmojiJoiner, EMOJI_VARIATION, Emoji); // also sketchy
// Generates states for top-level domains
// Note that this is most accurate when tlds are in alphabetical order
const wordjr = [[ASCII_LETTER, Word], [DIGIT, Asciinumeric]];
const uwordjr = [[ASCII_LETTER, null], [LETTER, UWord], [DIGIT, Alphanumeric]];
for (let i = 0; i < tlds.length; i++) {
fastts(Start, tlds[i], TLD, WORD, wordjr);
}
for (let i = 0; i < utlds.length; i++) {
fastts(Start, utlds[i], UTLD, UWORD, uwordjr);
}
addToGroups(TLD, {
tld: true,
ascii: true
}, groups);
addToGroups(UTLD, {
utld: true,
alpha: true
}, groups);
// Collect the states generated by different protocols. NOTE: If any new TLDs
// get added that are also protocols, set the token to be the same as the
// protocol to ensure parsing works as expected.
fastts(Start, 'file', SCHEME, WORD, wordjr);
fastts(Start, 'mailto', SCHEME, WORD, wordjr);
fastts(Start, 'http', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'https', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'ftp', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'ftps', SLASH_SCHEME, WORD, wordjr);
addToGroups(SCHEME, {
scheme: true,
ascii: true
}, groups);
addToGroups(SLASH_SCHEME, {
slashscheme: true,
ascii: true
}, groups);
// Register custom schemes. Assumes each scheme is asciinumeric with hyphens
customSchemes = customSchemes.sort((a, b) => a[0] > b[0] ? 1 : -1);
for (let i = 0; i < customSchemes.length; i++) {
const sch = customSchemes[i][0];
const optionalSlashSlash = customSchemes[i][1];
const flags = optionalSlashSlash ? {
[scheme]: true
} : {
[slashscheme]: true
};
if (sch.indexOf('-') >= 0) {
flags[domain] = true;
} else if (!ASCII_LETTER.test(sch)) {
flags[numeric] = true; // numbers only
} else if (DIGIT.test(sch)) {
flags[asciinumeric] = true;
} else {
flags[ascii] = true;
}
ts(Start, sch, sch, flags);
}
// Localhost token
ts(Start, 'localhost', LOCALHOST, {
ascii: true
});
// Set default transition for start state (some symbol)
Start.jd = new State(SYM);
return {
start: Start,
tokens: assign({
groups
}, tk)
};
}
/**
Given a string, returns an array of TOKEN instances representing the
composition of that string.
@method run
@param {State<string>} start scanner starting state
@param {string} str input string to scan
@return {Token[]} list of tokens, each with a type and value
*/
function run$1(start, str) {
// State machine is not case sensitive, so input is tokenized in lowercased
// form (still returns regular case). Uses selective `toLowerCase` because
// lowercasing the entire string causes the length and character position to
// vary in some non-English strings with V8-based runtimes.
const iterable = stringToArray(str.replace(/[A-Z]/g, c => c.toLowerCase()));
const charCount = iterable.length; // <= len if there are emojis, etc
const tokens = []; // return value
// cursor through the string itself, accounting for characters that have
// width with length 2 such as emojis
let cursor = 0;
// Cursor through the array-representation of the string
let charCursor = 0;
// Tokenize the string
while (charCursor < charCount) {
let state = start;
let nextState = null;
let tokenLength = 0;
let latestAccepting = null;
let sinceAccepts = -1;
let charsSinceAccepts = -1;
while (charCursor < charCount && (nextState = state.go(iterable[charCursor]))) {
state = nextState;
// Keep track of the latest accepting state
if (state.accepts()) {
sinceAccepts = 0;
charsSinceAccepts = 0;
latestAccepting = state;
} else if (sinceAccepts >= 0) {
sinceAccepts += iterable[charCursor].length;
charsSinceAccepts++;
}
tokenLength += iterable[charCursor].length;
cursor += iterable[charCursor].length;
charCursor++;
}
// Roll back to the latest accepting state
cursor -= sinceAccepts;
charCursor -= charsSinceAccepts;
tokenLength -= sinceAccepts;
// No more jumps, just make a new token from the last accepting one
tokens.push({
t: latestAccepting.t,
// token type/name
v: str.slice(cursor - tokenLength, cursor),
// string value
s: cursor - tokenLength,
// start index
e: cursor // end index (excluding)
});
}
return tokens;
}
/**
* Convert a String to an Array of characters, taking into account that some
* characters like emojis take up two string indexes.
*
* Adapted from core-js (MIT license)
* https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js
*
* @function stringToArray
* @param {string} str
* @returns {string[]}
*/
function stringToArray(str) {
const result = [];
const len = str.length;
let index = 0;
while (index < len) {
let first = str.charCodeAt(index);
let second;
let char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character
: str.slice(index, index + 2); // two-index characters
result.push(char);
index += char.length;
}
return result;
}
/**
* Fast version of ts function for when transition defaults are well known
* @param {State<string>} state
* @param {string} input
* @param {string} t
* @param {string} defaultt
* @param {[RegExp, State<string>][]} jr
* @returns {State<string>}
*/
function fastts(state, input, t, defaultt, jr) {
let next;
const len = input.length;
for (let i = 0; i < len - 1; i++) {
const char = input[i];
if (state.j[char]) {
next = state.j[char];
} else {
next = new State(defaultt);
next.jr = jr.slice();
state.j[char] = next;
}
state = next;
}
next = new State(t);
next.jr = jr.slice();
state.j[input[len - 1]] = next;
return next;
}
/**
* Converts a string of Top-Level Domain names encoded in update-tlds.js back
* into a list of strings.
* @param {str} encoded encoded TLDs string
* @returns {str[]} original TLDs list
*/
function decodeTlds(encoded) {
const words = [];
const stack = [];
let i = 0;
let digits = '0123456789';
while (i < encoded.length) {
let popDigitCount = 0;
while (digits.indexOf(encoded[i + popDigitCount]) >= 0) {
popDigitCount++; // encountered some digits, have to pop to go one level up trie
}
if (popDigitCount > 0) {
words.push(stack.join('')); // whatever preceded the pop digits must be a word
for (let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10); popCount > 0; popCount--) {
stack.pop();
}
i += popDigitCount;
} else {
stack.push(encoded[i]); // drop down a level into the trie
i++;
}
}
return words;
}
/**
* An object where each key is a valid DOM Event Name such as `click` or `focus`
* and each value is an event handler function.
*
* https://developer.mozilla.org/en-US/docs/Web/API/Element#events
* @typedef {?{ [event: string]: Function }} EventListeners
*/
/**
* All formatted properties required to render a link, including `tagName`,
* `attributes`, `content` and `eventListeners`.
* @typedef {{ tagName: any, attributes: {[attr: string]: any}, content: string,
* eventListeners: EventListeners }} IntermediateRepresentation
*/
/**
* Specify either an object described by the template type `O` or a function.
*
* The function takes a string value (usually the link's href attribute), the
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
* of the link. It should return an object of the template type `O`
* @template O
* @typedef {O | ((value: string, type: string, token: MultiToken) => O)} OptObj
*/
/**
* Specify either a function described by template type `F` or an object.
*
* Each key in the object should be a link type (`'url'`, `'hashtag`', etc.). Each
* value should be a function with template type `F` that is called when the
* corresponding link type is encountered.
* @template F
* @typedef {F | { [type: string]: F}} OptFn
*/
/**
* Specify either a value with template type `V`, a function that returns `V` or
* an object where each value resolves to `V`.
*
* The function takes a string value (usually the link's href attribute), the
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
* of the link. It should return an object of the template type `V`
*
* For the object, each key should be a link type (`'url'`, `'hashtag`', etc.).
* Each value should either have type `V` or a function that returns V. This
* function similarly takes a string value and a token.
*
* Example valid types for `Opt<string>`:
*
* ```js
* 'hello'
* (value, type, token) => 'world'
* { url: 'hello', email: (value, token) => 'world'}
* ```
* @template V
* @typedef {V | ((value: string, type: string, token: MultiToken) => V) | { [type: string]: V | ((value: string, token: MultiToken) => V) }} Opt
*/
/**
* See available options: https://linkify.js.org/docs/options.html
* @typedef {{
* defaultProtocol?: string,
* events?: OptObj<EventListeners>,
* format?: Opt<string>,
* formatHref?: Opt<string>,
* nl2br?: boolean,
* tagName?: Opt<any>,
* target?: Opt<string>,
* rel?: Opt<string>,
* validate?: Opt<boolean>,
* truncate?: Opt<number>,
* className?: Opt<string>,
* attributes?: OptObj<({ [attr: string]: any })>,
* ignoreTags?: string[],
* render?: OptFn<((ir: IntermediateRepresentation) => any)>
* }} Opts
*/
/**
* @type Required<Opts>
*/
const defaults = {
defaultProtocol: 'http',
events: null,
format: noop,
formatHref: noop,
nl2br: false,
tagName: 'a',
target: null,
rel: null,
validate: true,
truncate: Infinity,
className: null,
attributes: null,
ignoreTags: [],
render: null
};
/**
* Utility class for linkify interfaces to apply specified
* {@link Opts formatting and rendering options}.
*
* @param {Opts | Options} [opts] Option value overrides.
* @param {(ir: IntermediateRepresentation) => any} [defaultRender] (For
* internal use) default render function that determines how to generate an
* HTML element based on a link token's derived tagName, attributes and HTML.
* Similar to render option
*/
function Options(opts, defaultRender = null) {
let o = assign({}, defaults);
if (opts) {
o = assign(o, opts instanceof Options ? opts.o : opts);
}
// Ensure all ignored tags are uppercase
const ignoredTags = o.ignoreTags;
const uppercaseIgnoredTags = [];
for (let i = 0; i < ignoredTags.length; i++) {
uppercaseIgnoredTags.push(ignoredTags[i].toUpperCase());
}
/** @protected */
this.o = o;
if (defaultRender) {
this.defaultRender = defaultRender;
}
this.ignoreTags = uppercaseIgnoredTags;
}
Options.prototype = {
o: defaults,
/**
* @type string[]
*/
ignoreTags: [],
/**
* @param {IntermediateRepresentation} ir
* @returns {any}
*/
defaultRender(ir) {
return ir;
},
/**
* Returns true or false based on whether a token should be displayed as a
* link based on the user options.
* @param {MultiToken} token
* @returns {boolean}
*/
check(token) {
return this.get('validate', token.toString(), token);
},
// Private methods
/**
* Resolve an option's value based on the value of the option and the given
* params. If operator and token are specified and the target option is
* callable, automatically calls the function with the given argument.
* @template {keyof Opts} K
* @param {K} key Name of option to use
* @param {string} [operator] will be passed to the target option if it's a
* function. If not specified, RAW function value gets returned
* @param {MultiToken} [token] The token from linkify.tokenize
* @returns {Opts[K] | any}
*/
get(key, operator, token) {
const isCallable = operator != null;
let option = this.o[key];
if (!option) {
return option;
}
if (typeof option === 'object') {
option = token.t in option ? option[token.t] : defaults[key];
if (typeof option === 'function' && isCallable) {
option = option(operator, token);
}
} else if (typeof option === 'function' && isCallable) {
option = option(operator, token.t, token);
}
return option;
},
/**
* @template {keyof Opts} L
* @param {L} key Name of options object to use
* @param {string} [operator]
* @param {MultiToken} [token]
* @returns {Opts[L] | any}
*/
getObj(key, operator, token) {
let obj = this.o[key];
if (typeof obj === 'function' && operator != null) {
obj = obj(operator, token.t, token);
}
return obj;
},
/**
* Convert the given token to a rendered element that may be added to the
* calling-interface's DOM
* @param {MultiToken} token Token to render to an HTML element
* @returns {any} Render result; e.g., HTML string, DOM element, React
* Component, etc.
*/
render(token) {
const ir = token.render(this); // intermediate representation
const renderFn = this.get('render', null, token) || this.defaultRender;
return renderFn(ir, token.t, token);
}
};
function noop(val) {
return val;
}
var options = /*#__PURE__*/Object.freeze({
__proto__: null,
defaults: defaults,
Options: Options,
assign: assign
});
/******************************************************************************
Multi-Tokens
Tokens composed of arrays of TextTokens
******************************************************************************/
/**
* @param {string} value
* @param {Token[]} tokens
*/
function MultiToken(value, tokens) {
this.t = 'token';
this.v = value;
this.tk = tokens;
}
/**
* Abstract class used for manufacturing tokens of text tokens. That is rather
* than the value for a token being a small string of text, it's value an array
* of text tokens.
*
* Used for grouping together URLs, emails, hashtags, and other potential
* creations.
* @class MultiToken
* @property {string} t
* @property {string} v
* @property {Token[]} tk
* @abstract
*/
MultiToken.prototype = {
isLink: false,
/**
* Return the string this token represents.
* @return {string}
*/
toString() {
return this.v;
},
/**
* What should the value for this token be in the `href` HTML attribute?
* Returns the `.toString` value by default.
* @param {string} [scheme]
* @return {string}
*/
toHref(scheme) {
return this.toString();
},
/**
* @param {Options} options Formatting options
* @returns {string}
*/
toFormattedString(options) {
const val = this.toString();
const truncate = options.get('truncate', val, this);
const formatted = options.get('format', val, this);
return truncate && formatted.length > truncate ? formatted.substring(0, truncate) + '…' : formatted;
},
/**
*
* @param {Options} options
* @returns {string}
*/
toFormattedHref(options) {
return options.get('formatHref', this.toHref(options.get('defaultProtocol')), this);
},
/**
* The start index of this token in the original input string
* @returns {number}
*/
startIndex() {
return this.tk[0].s;
},
/**
* The end index of this token in the original input string (up to this
* index but not including it)
* @returns {number}
*/
endIndex() {
return this.tk[this.tk.length - 1].e;
},
/**
Returns an object of relevant values for this token, which includes keys
* type - Kind of token ('url', 'email', etc.)
* value - Original text
* href - The value that should be added to the anchor tag's href
attribute
@method toObject
@param {string} [protocol] `'http'` by default
*/
toObject(protocol = defaults.defaultProtocol) {
return {
type: this.t,
value: this.toString(),
isLink: this.isLink,
href: this.toHref(protocol),
start: this.startIndex(),
end: this.endIndex()
};
},
/**
*
* @param {Options} options Formatting option
*/
toFormattedObject(options) {
return {
type: this.t,
value: this.toFormattedString(options),
isLink: this.isLink,
href: this.toFormattedHref(options),
start: this.startIndex(),
end: this.endIndex()
};
},
/**
* Whether this token should be rendered as a link according to the given options
* @param {Options} options
* @returns {boolean}
*/
validate(options) {
return options.get('validate', this.toString(), this);
},
/**
* Return an object that represents how this link should be rendered.
* @param {Options} options Formattinng options
*/
render(options) {
const token = this;
const href = this.toHref(options.get('defaultProtocol'));
const formattedHref = options.get('formatHref', href, this);
const tagName = options.get('tagName', href, token);
const content = this.toFormattedString(options);
const attributes = {};
const className = options.get('className', href, token);
const target = options.get('target', href, token);
const rel = options.get('rel', href, token);
const attrs = options.getObj('attributes', href, token);
const eventListeners = options.getObj('events', href, token);
attributes.href = formattedHref;
if (className) {
attributes.class = className;
}
if (target) {
attributes.target = target;
}
if (rel) {
attributes.rel = rel;
}
if (attrs) {
assign(attributes, attrs);
}
return {
tagName,
attributes,
content,
eventListeners
};
}
};
/**
* Create a new token that can be emitted by the parser state machine
* @param {string} type readable type of the token
* @param {object} props properties to assign or override, including isLink = true or false
* @returns {new (value: string, tokens: Token[]) => MultiToken} new token class
*/
function createTokenClass(type, props) {
class Token extends MultiToken {
constructor(value, tokens) {
super(value, tokens);
this.t = type;
}
}
for (const p in props) {
Token.prototype[p] = props[p];
}
Token.t = type;
return Token;
}
/**
Represents a list of tokens making up a valid email address
*/
const Email = createTokenClass('email', {
isLink: true,
toHref() {
return 'mailto:' + this.toString();
}
});
/**
Represents some plain text
*/
const Text = createTokenClass('text');
/**
Multi-linebreak token - represents a line break
@class Nl
*/
const Nl = createTokenClass('nl');
/**
Represents a list of text tokens making up a valid URL
@class Url
*/
const Url = createTokenClass('url', {
isLink: true,
/**
Lowercases relevant parts of the domain and adds the protocol if
required. Note that this will not escape unsafe HTML characters in the
URL.
@param {string} [scheme] default scheme (e.g., 'https')
@return {string} the full href
*/
toHref(scheme = defaults.defaultProtocol) {
// Check if already has a prefix scheme
return this.hasProtocol() ? this.v : `${scheme}://${this.v}`;
},
/**
* Check whether this URL token has a protocol
* @return {boolean}
*/
hasProtocol() {
const tokens = this.tk;
return tokens.length >= 2 && tokens[0].t !== LOCALHOST && tokens[1].t === COLON;
}
});
var multi = /*#__PURE__*/Object.freeze({
__proto__: null,
MultiToken: MultiToken,
Base: MultiToken,
createTokenClass: createTokenClass,
Email: Email,
Text: Text,
Nl: Nl,
Url: Url
});
/**
Not exactly parser, more like the second-stage scanner (although we can
theoretically hotswap the code here with a real parser in the future... but
for a little URL-finding utility abstract syntax trees may be a little
overkill).
URL format: http://en.wikipedia.org/wiki/URI_scheme
Email format: http://en.wikipedia.org/wiki/EmailAddress (links to RFC in
reference)
@module linkify
@submodule parser
@main run
*/
const makeState = arg => new State(arg);
/**
* Generate the parser multi token-based state machine
* @param {{ groups: Collections<string> }} tokens
*/
function init$1({
groups
}) {
// Types of characters the URL can definitely end in
const qsAccepting = groups.domain.concat([AMPERSAND, ASTERISK, AT, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, NUM, PERCENT, PIPE, PLUS, POUND, SLASH, SYM, TILDE, UNDERSCORE]);
// Types of tokens that can follow a URL and be part of the query string
// but cannot be the very last characters
// Characters that cannot appear in the URL at all should be excluded
const qsNonAccepting = [COLON, COMMA, DOT, EXCLAMATION, PERCENT, QUERY, QUOTE, SEMI, OPENANGLEBRACKET, CLOSEANGLEBRACKET, OPENBRACE, CLOSEBRACE, CLOSEBRACKET, OPENBRACKET, OPENPAREN, CLOSEPAREN, FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN, LEFTCORNERBRACKET, RIGHTCORNERBRACKET, LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET, FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN];
// For addresses without the mailto prefix
// Tokens allowed in the localpart of the email
const localpartAccepting = [AMPERSAND, APOSTROPHE, ASTERISK, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, OPENBRACE, CLOSEBRACE, PERCENT, PIPE, PLUS, POUND, QUERY, SLASH, SYM, TILDE, UNDERSCORE];
// The universal starting state.
/**
* @type State<Token>
*/
const Start = makeState();
const Localpart = tt(Start, TILDE); // Local part of the email address
ta(Localpart, localpartAccepting, Localpart);
ta(Localpart, groups.domain, Localpart);
const Domain = makeState(),
Scheme = makeState(),
SlashScheme = makeState();
ta(Start, groups.domain, Domain); // parsed string ends with a potential domain name (A)
ta(Start, groups.scheme, Scheme); // e.g., 'mailto'
ta(Start, groups.slashscheme, SlashScheme); // e.g., 'http'
ta(Domain, localpartAccepting, Localpart);
ta(Domain, groups.domain, Domain);
const LocalpartAt = tt(Domain, AT); // Local part of the email address plus @
tt(Localpart, AT, LocalpartAt); // close to an email address now
// Local part of an email address can be e.g. 'http' or 'mailto'
tt(Scheme, AT, LocalpartAt);
tt(SlashScheme, AT, LocalpartAt);
const LocalpartDot = tt(Localpart, DOT); // Local part of the email address plus '.' (localpart cannot end in .)
ta(LocalpartDot, localpartAccepting, Localpart);
ta(LocalpartDot, groups.domain, Localpart);
const EmailDomain = makeState();
ta(LocalpartAt, groups.domain, EmailDomain); // parsed string starts with local email info + @ with a potential domain name
ta(EmailDomain, groups.domain, EmailDomain);
const EmailDomainDot = tt(EmailDomain, DOT); // domain followed by DOT
ta(EmailDomainDot, groups.domain, EmailDomain);
const Email$1 = makeState(Email