UNPKG

delta-detect-links

Version:

Detect links in a delta object.

github.com/holidaycheck/delta-detect-links

holidaycheck/delta-detect-links

1,196 lines (1,037 loc) • 91.9 kB

JavaScript

'use strict'; var regex = /[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/; var regex$2 = /[\0-\x1F\x7F-\x9F]/; var regex$4 = /[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000]/; var regex$6 = /[!-#%-\*,-/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E44\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD807[\uDC41-\uDC45\uDC70\uDC71]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]/; var re = function re(opts) { var re = {}; // Use direct extract instead of `regenerate` to reduse browserified size re.src_Any = regex.source; re.src_Cc = regex$2.source; re.src_Z = regex$4.source; re.src_P = regex$6.source; // \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation) re.src_ZPCc = [re.src_Z, re.src_P, re.src_Cc].join('|'); // \p{\Z\Cc} (white spaces + control) re.src_ZCc = [re.src_Z, re.src_Cc].join('|'); // All possible word characters (everything without punctuation, spaces & controls) // Defined via punctuation & spaces to save space // Should be something like \p{\L\N\S\M} (\w but without `_`) re.src_pseudo_letter = '(?:(?!>|<|' + re.src_ZPCc + ')' + re.src_Any + ')'; // The same as abothe but without [0-9] // var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')'; //////////////////////////////////////////////////////////////////////////////// re.src_ip4 = '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'; // Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch. re.src_auth = '(?:(?:(?!' + re.src_ZCc + '|[@/\\[\\]()]).)+@)?'; re.src_port = '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?'; re.src_host_terminator = '(?=$|>|<|' + re.src_ZPCc + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + re.src_ZPCc + '))'; re.src_path = '(?:' + '[/?#]' + '(?:' + '(?!' + re.src_ZCc + '|[()[\\]{}.,"\'?!\\-<>]).|' + '\\[(?:(?!' + re.src_ZCc + '|\\]).)*\\]|' + '\\((?:(?!' + re.src_ZCc + '|[)]).)*\\)|' + '\\{(?:(?!' + re.src_ZCc + '|[}]).)*\\}|' + '\\"(?:(?!' + re.src_ZCc + '|["]).)+\\"|' + "\\'(?:(?!" + re.src_ZCc + "|[']).)+\\'|" + "\\'(?=" + re.src_pseudo_letter + '|[-]).|' + // allow `I'm_king` if no pair found '\\.{2,3}[a-zA-Z0-9%/]|' + // github has ... in commit range links. Restrict to // - english // - percent-encoded // - parts of file path // until more examples found. '\\.(?!' + re.src_ZCc + '|[.]).|' + (opts && opts['---'] ? '\\-(?!--(?:[^-]|$))(?:-*)|' // `---` => long dash, terminate : '\\-+|') + '\\,(?!' + re.src_ZCc + ').|' + // allow `,,,` in paths '\\!(?!' + re.src_ZCc + '|[!]).|' + '\\?(?!' + re.src_ZCc + '|[?]).' + ')+' + '|\\/' + ')?'; re.src_email_name = '[\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]+'; re.src_xn = 'xn--[a-z0-9\\-]{1,59}'; // More to read about domain names // http://serverfault.com/questions/638260/ re.src_domain_root = // Allow letters & digits (http://test1) '(?:' + re.src_xn + '|' + re.src_pseudo_letter + '{1,63}' + ')'; re.src_domain = '(?:' + re.src_xn + '|' + '(?:' + re.src_pseudo_letter + ')' + '|' + // don't allow `--` in domain names, because: // - that can conflict with markdown — / – // - nobody use those anyway '(?:' + re.src_pseudo_letter + '(?:-(?!-)|' + re.src_pseudo_letter + '){0,61}' + re.src_pseudo_letter + ')' + ')'; re.src_host = '(?:' + // Don't need IP check, because digits are already allowed in normal domain names // src_ip4 + // '|' + '(?:(?:(?:' + re.src_domain + ')\\.)*' + re.src_domain /*_root*/ + ')' + ')'; re.tpl_host_fuzzy = '(?:' + re.src_ip4 + '|' + '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))' + ')'; re.tpl_host_no_ip_fuzzy = '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))'; re.src_host_strict = re.src_host + re.src_host_terminator; re.tpl_host_fuzzy_strict = re.tpl_host_fuzzy + re.src_host_terminator; re.src_host_port_strict = re.src_host + re.src_port + re.src_host_terminator; re.tpl_host_port_fuzzy_strict = re.tpl_host_fuzzy + re.src_port + re.src_host_terminator; re.tpl_host_port_no_ip_fuzzy_strict = re.tpl_host_no_ip_fuzzy + re.src_port + re.src_host_terminator; //////////////////////////////////////////////////////////////////////////////// // Main rules // Rude test fuzzy links by host, for quick deny re.tpl_host_fuzzy_test = 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + re.src_ZPCc + '|>|$))'; re.tpl_email_fuzzy = '(^|<|>|\\(|' + re.src_ZCc + ')(' + re.src_email_name + '@' + re.tpl_host_fuzzy_strict + ')'; re.tpl_link_fuzzy = // Fuzzy link can't be prepended with .:/\- and non punctuation. // but can start with > (markdown blockquote) '(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + re.src_ZPCc + '))' + '((?![$+<=>^`|])' + re.tpl_host_port_fuzzy_strict + re.src_path + ')'; re.tpl_link_no_ip_fuzzy = // Fuzzy link can't be prepended with .:/\- and non punctuation. // but can start with > (markdown blockquote) '(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + re.src_ZPCc + '))' + '((?![$+<=>^`|])' + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ')'; return re; }; //////////////////////////////////////////////////////////////////////////////// // Helpers // Merge objects // function assign(obj /*from1, from2, from3, ...*/) { var sources = Array.prototype.slice.call(arguments, 1); sources.forEach(function (source) { if (!source) { return; } Object.keys(source).forEach(function (key) { obj[key] = source[key]; }); }); return obj; } function _class(obj) { return Object.prototype.toString.call(obj); } function isString(obj) { return _class(obj) === '[object String]'; } function isObject(obj) { return _class(obj) === '[object Object]'; } function isRegExp(obj) { return _class(obj) === '[object RegExp]'; } function isFunction(obj) { return _class(obj) === '[object Function]'; } function escapeRE(str) { return str.replace(/[.?*+^$[\]\\(){}|-]/g, '\\$&'); } //////////////////////////////////////////////////////////////////////////////// var defaultOptions = { fuzzyLink: true, fuzzyEmail: true, fuzzyIP: false }; function isOptionsObj(obj) { return Object.keys(obj || {}).reduce(function (acc, k) { return acc || defaultOptions.hasOwnProperty(k); }, false); } var defaultSchemas = { 'http:': { validate: function validate(text, pos, self) { var tail = text.slice(pos); if (!self.re.http) { // compile lazily, because "host"-containing variables can change on tlds update. self.re.http = new RegExp('^\\/\\/' + self.re.src_auth + self.re.src_host_port_strict + self.re.src_path, 'i'); } if (self.re.http.test(tail)) { return tail.match(self.re.http)[0].length; } return 0; } }, 'https:': 'http:', 'ftp:': 'http:', '//': { validate: function validate(text, pos, self) { var tail = text.slice(pos); if (!self.re.no_http) { // compile lazily, because "host"-containing variables can change on tlds update. self.re.no_http = new RegExp('^' + self.re.src_auth + // Don't allow single-level domains, because of false positives like '//test' // with code comments '(?:localhost|(?:(?:' + self.re.src_domain + ')\\.)+' + self.re.src_domain_root + ')' + self.re.src_port + self.re.src_host_terminator + self.re.src_path, 'i'); } if (self.re.no_http.test(tail)) { // should not be `://` & `///`, that protects from errors in protocol name if (pos >= 3 && text[pos - 3] === ':') { return 0; } if (pos >= 3 && text[pos - 3] === '/') { return 0; } return tail.match(self.re.no_http)[0].length; } return 0; } }, 'mailto:': { validate: function validate(text, pos, self) { var tail = text.slice(pos); if (!self.re.mailto) { self.re.mailto = new RegExp('^' + self.re.src_email_name + '@' + self.re.src_host_strict, 'i'); } if (self.re.mailto.test(tail)) { return tail.match(self.re.mailto)[0].length; } return 0; } } }; /*eslint-disable max-len*/ // RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js) var tlds_2ch_src_re = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]'; // DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead var tlds_default = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|'); /*eslint-enable max-len*/ //////////////////////////////////////////////////////////////////////////////// function resetScanCache(self) { self.__index__ = -1; self.__text_cache__ = ''; } function createValidator(re$$1) { return function (text, pos) { var tail = text.slice(pos); if (re$$1.test(tail)) { return tail.match(re$$1)[0].length; } return 0; }; } function createNormalizer() { return function (match, self) { self.normalize(match); }; } // Schemas compiler. Build regexps. // function compile(self) { // Load & clone RE patterns. var re$$1 = self.re = re(self.__opts__); // Define dynamic patterns var tlds = self.__tlds__.slice(); self.onCompile(); if (!self.__tlds_replaced__) { tlds.push(tlds_2ch_src_re); } tlds.push(re$$1.src_xn); re$$1.src_tlds = tlds.join('|'); function untpl(tpl) { return tpl.replace('%TLDS%', re$$1.src_tlds); } re$$1.email_fuzzy = RegExp(untpl(re$$1.tpl_email_fuzzy), 'i'); re$$1.link_fuzzy = RegExp(untpl(re$$1.tpl_link_fuzzy), 'i'); re$$1.link_no_ip_fuzzy = RegExp(untpl(re$$1.tpl_link_no_ip_fuzzy), 'i'); re$$1.host_fuzzy_test = RegExp(untpl(re$$1.tpl_host_fuzzy_test), 'i'); // // Compile each schema // var aliases = []; self.__compiled__ = {}; // Reset compiled data function schemaError(name, val) { throw new Error('(LinkifyIt) Invalid schema "' + name + '": ' + val); } Object.keys(self.__schemas__).forEach(function (name) { var val = self.__schemas__[name]; // skip disabled methods if (val === null) { return; } var compiled = { validate: null, link: null }; self.__compiled__[name] = compiled; if (isObject(val)) { if (isRegExp(val.validate)) { compiled.validate = createValidator(val.validate); } else if (isFunction(val.validate)) { compiled.validate = val.validate; } else { schemaError(name, val); } if (isFunction(val.normalize)) { compiled.normalize = val.normalize; } else if (!val.normalize) { compiled.normalize = createNormalizer(); } else { schemaError(name, val); } return; } if (isString(val)) { aliases.push(name); return; } schemaError(name, val); }); // // Compile postponed aliases // aliases.forEach(function (alias) { if (!self.__compiled__[self.__schemas__[alias]]) { // Silently fail on missed schemas to avoid errons on disable. // schemaError(alias, self.__schemas__[alias]); return; } self.__compiled__[alias].validate = self.__compiled__[self.__schemas__[alias]].validate; self.__compiled__[alias].normalize = self.__compiled__[self.__schemas__[alias]].normalize; }); // // Fake record for guessed links // self.__compiled__[''] = { validate: null, normalize: createNormalizer() }; // // Build schema condition // var slist = Object.keys(self.__compiled__).filter(function (name) { // Filter disabled & fake schemas return name.length > 0 && self.__compiled__[name]; }).map(escapeRE).join('|'); // (?!_) cause 1.5x slowdown self.re.schema_test = RegExp('(^|(?!_)(?:[><]|' + re$$1.src_ZPCc + '))(' + slist + ')', 'i'); self.re.schema_search = RegExp('(^|(?!_)(?:[><]|' + re$$1.src_ZPCc + '))(' + slist + ')', 'ig'); self.re.pretest = RegExp('(' + self.re.schema_test.source + ')|' + '(' + self.re.host_fuzzy_test.source + ')|' + '@', 'i'); // // Cleanup // resetScanCache(self); } /** * class Match * * Match result. Single element of array, returned by [[LinkifyIt#match]] **/ function Match(self, shift) { var start = self.__index__, end = self.__last_index__, text = self.__text_cache__.slice(start, end); /** * Match#schema -> String * * Prefix (protocol) for matched string. **/ this.schema = self.__schema__.toLowerCase(); /** * Match#index -> Number * * First position of matched string. **/ this.index = start + shift; /** * Match#lastIndex -> Number * * Next position after matched string. **/ this.lastIndex = end + shift; /** * Match#raw -> String * * Matched string. **/ this.raw = text; /** * Match#text -> String * * Notmalized text of matched string. **/ this.text = text; /** * Match#url -> String * * Normalized url of matched string. **/ this.url = text; } function createMatch(self, shift) { var match = new Match(self, shift); self.__compiled__[match.schema].normalize(match, self); return match; } /** * class LinkifyIt **/ /** * new LinkifyIt(schemas, options) * - schemas (Object): Optional. Additional schemas to validate (prefix/validator) * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false } * * Creates new linkifier instance with optional additional schemas. * Can be called without `new` keyword for convenience. * * By default understands: * * - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links * - "fuzzy" links and emails (example.com, foo@bar.com). * * `schemas` is an object, where each key/value describes protocol/rule: * * - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:` * for example). `linkify-it` makes shure that prefix is not preceeded with * alphanumeric char and symbols. Only whitespaces and punctuation allowed. * - __value__ - rule to check tail after link prefix * - _String_ - just alias to existing rule * - _Object_ * - _validate_ - validator function (should return matched length on success), * or `RegExp`. * - _normalize_ - optional function to normalize text & url of matched result * (for example, for @twitter mentions). * * `options`: * * - __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`. * - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts * like version numbers. Default `false`. * - __fuzzyEmail__ - recognize emails without `mailto:` prefix. * **/ function LinkifyIt(schemas, options) { if (!(this instanceof LinkifyIt)) { return new LinkifyIt(schemas, options); } if (!options) { if (isOptionsObj(schemas)) { options = schemas; schemas = {}; } } this.__opts__ = assign({}, defaultOptions, options); // Cache last tested result. Used to skip repeating steps on next `match` call. this.__index__ = -1; this.__last_index__ = -1; // Next scan position this.__schema__ = ''; this.__text_cache__ = ''; this.__schemas__ = assign({}, defaultSchemas, schemas); this.__compiled__ = {}; this.__tlds__ = tlds_default; this.__tlds_replaced__ = false; this.re = {}; compile(this); } /** chainable * LinkifyIt#add(schema, definition) * - schema (String): rule name (fixed pattern prefix) * - definition (String|RegExp|Object): schema definition * * Add new rule definition. See constructor description for details. **/ LinkifyIt.prototype.add = function add(schema, definition) { this.__schemas__[schema] = definition; compile(this); return this; }; /** chainable * LinkifyIt#set(options) * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false } * * Set recognition options for links without schema. **/ LinkifyIt.prototype.set = function set(options) { this.__opts__ = assign(this.__opts__, options); return this; }; /** * LinkifyIt#test(text) -> Boolean * * Searches linkifiable pattern and returns `true` on success or `false` on fail. **/ LinkifyIt.prototype.test = function test(text) { // Reset scan cache this.__text_cache__ = text; this.__index__ = -1; if (!text.length) { return false; } var m, ml, me, len, shift, next, re$$1, tld_pos, at_pos; // try to scan for link with schema - that's the most simple rule if (this.re.schema_test.test(text)) { re$$1 = this.re.schema_search; re$$1.lastIndex = 0; while ((m = re$$1.exec(text)) !== null) { len = this.testSchemaAt(text, m[2], re$$1.lastIndex); if (len) { this.__schema__ = m[2]; this.__index__ = m.index + m[1].length; this.__last_index__ = m.index + m[0].length + len; break; } } } if (this.__opts__.fuzzyLink && this.__compiled__['http:']) { // guess schemaless links tld_pos = text.search(this.re.host_fuzzy_test); if (tld_pos >= 0) { // if tld is located after found link - no need to check fuzzy pattern if (this.__index__ < 0 || tld_pos < this.__index__) { if ((ml = text.match(this.__opts__.fuzzyIP ? this.re.link_fuzzy : this.re.link_no_ip_fuzzy)) !== null) { shift = ml.index + ml[1].length; if (this.__index__ < 0 || shift < this.__index__) { this.__schema__ = ''; this.__index__ = shift; this.__last_index__ = ml.index + ml[0].length; } } } } } if (this.__opts__.fuzzyEmail && this.__compiled__['mailto:']) { // guess schemaless emails at_pos = text.indexOf('@'); if (at_pos >= 0) { // We can't skip this check, because this cases are possible: // 192.168.1.1@gmail.com, my.in@example.com if ((me = text.match(this.re.email_fuzzy)) !== null) { shift = me.index + me[1].length; next = me.index + me[0].length; if (this.__index__ < 0 || shift < this.__index__ || shift === this.__index__ && next > this.__last_index__) { this.__schema__ = 'mailto:'; this.__index__ = shift; this.__last_index__ = next; } } } } return this.__index__ >= 0; }; /** * LinkifyIt#pretest(text) -> Boolean * * Very quick check, that can give false positives. Returns true if link MAY BE * can exists. Can be used for speed optimization, when you need to check that * link NOT exists. **/ LinkifyIt.prototype.pretest = function pretest(text) { return this.re.pretest.test(text); }; /** * LinkifyIt#testSchemaAt(text, name, position) -> Number * - text (String): text to scan * - name (String): rule (schema) name * - position (Number): text offset to check from * * Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly * at given position. Returns length of found pattern (0 on fail). **/ LinkifyIt.prototype.testSchemaAt = function testSchemaAt(text, schema, pos) { // If not supported schema check requested - terminate if (!this.__compiled__[schema.toLowerCase()]) { return 0; } return this.__compiled__[schema.toLowerCase()].validate(text, pos, this); }; /** * LinkifyIt#match(text) -> Array|null * * Returns array of found link descriptions or `null` on fail. We strongly * recommend to use [[LinkifyIt#test]] first, for best speed. * * ##### Result match description * * - __schema__ - link schema, can be empty for fuzzy links, or `//` for * protocol-neutral links. * - __index__ - offset of matched text * - __lastIndex__ - index of next char after mathch end * - __raw__ - matched text * - __text__ - normalized text * - __url__ - link, generated from matched text **/ LinkifyIt.prototype.match = function match(text) { var shift = 0, result = []; // Try to take previous element from cache, if .test() called before if (this.__index__ >= 0 && this.__text_cache__ === text) { result.push(createMatch(this, shift)); shift = this.__last_index__; } // Cut head if cache was used var tail = shift ? text.slice(shift) : text; // Scan string until end reached while (this.test(tail)) { result.push(createMatch(this, shift)); tail = tail.slice(this.__last_index__); shift += this.__last_index__; } if (result.length) { return result; } return null; }; /** chainable * LinkifyIt#tlds(list [, keepOld]) -> this * - list (Array): list of tlds * - keepOld (Boolean): merge with current list if `true` (`false` by default) * * Load (or merge) new tlds list. Those are user for fuzzy links (without prefix) * to avoid false positives. By default this algorythm used: * * - hostname with any 2-letter root zones are ok. * - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф * are ok. * - encoded (`xn--...`) root zones are ok. * * If list is replaced, then exact match for 2-chars root zones will be checked. **/ LinkifyIt.prototype.tlds = function tlds(list, keepOld) { list = Array.isArray(list) ? list : [list]; if (!keepOld) { this.__tlds__ = list.slice(); this.__tlds_replaced__ = true; compile(this); return this; } this.__tlds__ = this.__tlds__.concat(list).sort().filter(function (el, idx, arr) { return el !== arr[idx - 1]; }).reverse(); compile(this); return this; }; /** * LinkifyIt#normalize(match) * * Default normalizer (if schema does not define it's own). **/ LinkifyIt.prototype.normalize = function normalize(match) { // Do minimal possible changes by default. Need to collect feedback prior // to move forward https://github.com/markdown-it/linkify-it/issues/1 if (!match.schema) { match.url = 'http://' + match.url; } if (match.schema === 'mailto:' && !/^mailto:/i.test(match.url)) { match.url = 'mailto:' + match.url; } }; /** * LinkifyIt#onCompile() * * Override to modify basic RegExp-s. **/ LinkifyIt.prototype.onCompile = function onCompile() {}; var index$1 = LinkifyIt; var index$2 = ["aaa", "aarp", "abarth", "abb", "abbott", "abbvie", "abc", "able", "abogado", "abudhabi", "ac", "academy", "accenture", "accountant", "accountants", "aco", "active", "actor", "ad", "adac", "ads", "adult", "ae", "aeg", "aero", "aetna", "af", "afamilycompany", "afl", "ag", "agakhan", "agency", "ai", "aig", "aigo", "airbus", "airforce", "airtel", "akdn", "al", "alfaromeo", "alibaba", "alipay", "allfinanz", "allstate", "ally", "alsace", "alstom", "am", "americanexpress", "americanfamily", "amex", "amfam", "amica", "amsterdam", "analytics", "android", "anquan", "anz", "ao", "apartments", "app", "apple", "aq", "aquarelle", "ar", "aramco", "archi", "army", "arpa", "art", "arte", "as", "asda", "asia", "associates", "at", "athleta", "attorney", "au", "auction", "audi", "audible", "audio", "auspost", "author", "auto", "autos", "avianca", "aw", "aws", "ax", "axa", "az", "azure", "ba", "baby", "baidu", "banamex", "bananarepublic", "band", "bank", "bar", "barcelona", "barclaycard", "barclays", "barefoot", "bargains", "baseball", "basketball", "bauhaus", "bayern", "bb", "bbc", "bbt", "bbva", "bcg", "bcn", "bd", "be", "beats", "beauty", "beer", "bentley", "berlin", "best", "bestbuy", "bet", "bf", "bg", "bh", "bharti", "bi", "bible", "bid", "bike", "bing", "bingo", "bio", "biz", "bj", "black", "blackfriday", "blanco", "blockbuster", "blog", "bloomberg", "blue", "bm", "bms", "bmw", "bn", "bnl", "bnpparibas", "bo", "boats", "boehringer", "bofa", "bom", "bond", "boo", "book", "booking", "boots", "bosch", "bostik", "bot", "boutique", "br", "bradesco", "bridgestone", "broadway", "broker", "brother", "brussels", "bs", "bt", "budapest", "bugatti", "build", "builders", "business", "buy", "buzz", "bv", "bw", "by", "bz", "bzh", "ca", "cab", "cafe", "cal", "call", "calvinklein", "cam", "camera", "camp", "cancerresearch", "canon", "capetown", "capital", "capitalone", "car", "caravan", "cards", "care", "career", "careers", "cars", "cartier", "casa", "case", "caseih", "cash", "casino", "cat", "catering", "cba", "cbn", "cbre", "cbs", "cc", "cd", "ceb", "center", "ceo", "cern", "cf", "cfa", "cfd", "cg", "ch", "chanel", "channel", "chase", "chat", "cheap", "chintai", "chloe", "christmas", "chrome", "chrysler", "church", "ci", "cipriani", "circle", "cisco", "citadel", "citi", "citic", "city", "cityeats", "ck", "cl", "claims", "cleaning", "click", "clinic", "clinique", "clothing", "cloud", "club", "clubmed", "cm", "cn", "co", "coach", "codes", "coffee", "college", "cologne", "com", "comcast", "commbank", "community", "company", "compare", "computer", "comsec", "condos", "construction", "consulting", "contact", "contractors", "cooking", "cookingchannel", "cool", "coop", "corsica", "country", "coupon", "coupons", "courses", "cr", "credit", "creditcard", "creditunion", "cricket", "crown", "crs", "cruises", "csc", "cu", "cuisinella", "cv", "cw", "cx", "cy", "cymru", "cyou", "cz", "dabur", "dad", "dance", "date", "dating", "datsun", "day", "dclk", "dds", "de", "deal", "dealer", "deals", "degree", "delivery", "dell", "deloitte", "delta", "democrat", "dental", "dentist", "desi", "design", "dev", "dhl", "diamonds", "diet", "digital", "direct", "directory", "discount", "discover", "dish", "diy", "dj", "dk", "dm", "dnp", "do", "docs", "doctor", "dodge", "dog", "doha", "domains", "dot", "download", "drive", "dtv", "dubai", "duck", "dunlop", "duns", "dupont", "durban", "dvag", "dvr", "dz", "earth", "eat", "ec", "eco", "edeka", "edu", "education", "ee", "eg", "email", "emerck", "energy", "engineer", "engineering", "enterprises", "epost", "epson", "equipment", "er", "ericsson", "erni", "es", "esq", "estate", "esurance", "et", "eu", "eurovision", "eus", "events", "everbank", "exchange", "expert", "exposed", "express", "extraspace", "fage", "fail", "fairwinds", "faith", "family", "fan", "fans", "farm", "farmers", "fashion", "fast", "fedex", "feedback", "ferrari", "ferrero", "fi", "fiat", "fidelity", "fido", "film", "final", "finance", "financial", "fire", "firestone", "firmdale", "fish", "fishing", "fit", "fitness", "fj", "fk", "flickr", "flights", "flir", "florist", "flowers", "fly", "fm", "fo", "foo", "foodnetwork", "football", "ford", "forex", "forsale", "forum", "foundation", "fox", "fr", "fresenius", "frl", "frogans", "frontdoor", "frontier", "ftr", "fujitsu", "fujixerox", "fund", "furniture", "futbol", "fyi", "ga", "gal", "gallery", "gallo", "gallup", "game", "games", "gap", "garden", "gb", "gbiz", "gd", "gdn", "ge", "gea", "gent", "genting", "george", "gf", "gg", "ggee", "gh", "gi", "gift", "gifts", "gives", "giving", "gl", "glade", "glass", "gle", "global", "globo", "gm", "gmail", "gmbh", "gmo", "gmx", "gn", "godaddy", "gold", "goldpoint", "golf", "goo", "goodhands", "goodyear", "goog", "google", "gop", "got", "gov", "gp", "gq", "gr", "grainger", "graphics", "gratis", "green", "gripe", "group", "gs", "gt", "gu", "guardian", "gucci", "guge", "guide", "guitars", "guru", "gw", "gy", "hamburg", "hangout", "haus", "hbo", "hdfc", "hdfcbank", "health", "healthcare", "help", "helsinki", "here", "hermes", "hgtv", "hiphop", "hisamitsu", "hitachi", "hiv", "hk", "hkt", "hm", "hn", "hockey", "holdings", "holiday", "homedepot", "homegoods", "homes", "homesense", "honda", "honeywell", "horse", "host", "hosting", "hot", "hoteles", "hotmail", "house", "how", "hr", "hsbc", "ht", "htc", "hu", "hughes", "hyatt", "hyundai", "ibm", "icbc", "ice", "icu", "id", "ie", "ieee", "ifm", "iinet", "ikano", "il", "im", "imamat", "imdb", "immo", "immobilien", "in", "industries", "infiniti", "info", "ing", "ink", "institute", "insurance", "insure", "int", "intel", "international", "intuit", "investments", "io", "ipiranga", "iq", "ir", "irish", "is", "iselect", "ismaili", "ist", "istanbul", "it", "itau", "itv", "iveco", "iwc", "jaguar", "java", "jcb", "jcp", "je", "jeep", "jetzt", "jewelry", "jlc", "jll", "jm", "jmp", "jnj", "jo", "jobs", "joburg", "jot", "joy", "jp", "jpmorgan", "jprs", "juegos", "juniper", "kaufen", "kddi", "ke", "kerryhotels", "kerrylogistics", "kerryproperties", "kfh", "kg", "kh", "ki", "kia", "kim", "kinder", "kindle", "kitchen", "kiwi", "km", "kn", "koeln", "komatsu", "kosher", "kp", "kpmg", "kpn", "kr", "krd", "kred", "kuokgroup", "kw", "ky", "kyoto", "kz", "la", "lacaixa", "ladbrokes", "lamborghini", "lamer", "lancaster", "lancia", "lancome", "land", "landrover", "lanxess", "lasalle", "lat", "latino", "latrobe", "law", "lawyer", "lb", "lc", "lds", "lease", "leclerc", "lefrak", "legal", "lego", "lexus", "lgbt", "li", "liaison", "lidl", "life", "lifeinsurance", "lifestyle", "lighting", "like", "lilly", "limited", "limo", "lincoln", "linde", "link", "lipsy", "live", "living", "lixil", "lk", "loan", "loans", "locker", "locus", "loft", "lol", "london", "lotte", "lotto", "love", "lpl", "lplfinancial", "lr", "ls", "lt", "ltd", "ltda", "lu", "lundbeck", "lupin", "luxe", "luxury", "lv", "ly", "ma", "macys", "madrid", "maif", "maison", "makeup", "man", "management", "mango", "market", "marketing", "markets", "marriott", "marshalls", "maserati", "mattel", "mba", "mc", "mcd", "mcdonalds", "mckinsey", "md", "me", "med", "media", "meet", "melbourne", "meme", "memorial", "men", "menu", "meo", "metlife", "mg", "mh", "miami", "microsoft", "mil", "mini", "mint", "mit", "mitsubishi", "mk", "ml", "mlb", "mls", "mm", "mma", "mn", "mo", "mobi", "mobily", "moda", "moe", "moi", "mom", "monash", "money", "monster", "montblanc", "mopar", "mormon", "mortgage", "moscow", "motorcycles", "mov", "movie", "movistar", "mp", "mq", "mr", "ms", "msd", "mt", "mtn", "mtpc", "mtr", "mu", "museum", "mutual", "mutuelle", "mv", "mw", "mx", "my", "mz", "na", "nab", "nadex", "nagoya", "name", "nationwide", "natura", "navy", "nba", "nc", "ne", "nec", "net", "netbank", "netflix", "network", "neustar", "new", "newholland", "news", "next", "nextdirect", "nexus", "nf", "nfl", "ng", "ngo", "nhk", "ni", "nico", "nike", "nikon", "ninja", "nissan", "nissay", "nl", "no", "nokia", "northwesternmutual", "norton", "now", "nowruz", "nowtv", "np", "nr", "nra", "nrw", "ntt", "nu", "nyc", "nz", "obi", "observer", "off", "office", "okinawa", "olayan", "olayangroup", "oldnavy", "ollo", "om", "omega", "one", "ong", "onl", "online", "onyourside", "ooo", "open", "oracle", "orange", "org", "organic", "orientexpress", "origins", "osaka", "otsuka", "ott", "ovh", "pa", "page", "pamperedchef", "panasonic", "panerai", "paris", "pars", "partners", "parts", "party", "passagens", "pay", "pccw", "pe", "pet", "pf", "pfizer", "pg", "ph", "pharmacy", "philips", "photo", "photography", "photos", "physio", "piaget", "pics", "pictet", "pictures", "pid", "pin", "ping", "pink", "pioneer", "pizza", "pk", "pl", "place", "play", "playstation", "plumbing", "plus", "pm", "pn", "pnc", "pohl", "poker", "politie", "porn", "post", "pr", "pramerica", "praxi", "press", "prime", "pro", "prod", "productions", "prof", "progressive", "promo", "properties", "property", "protection", "pru", "prudential", "ps", "pt", "pub", "pw", "pwc", "py", "qa", "qpon", "quebec", "quest", "qvc", "racing", "radio", "raid", "re", "read", "realestate", "realtor", "realty", "recipes", "red", "redstone", "redumbrella", "rehab", "reise", "reisen", "reit", "ren", "rent", "rentals", "repair", "report", "republican", "rest", "restaurant", "review", "reviews", "rexroth", "rich", "richardli", "ricoh", "rightathome", "rio", "rip", "ro", "rocher", "rocks", "rodeo", "rogers", "room", "rs", "rsvp", "ru", "ruhr", "run", "rw", "rwe", "ryukyu", "sa", "saarland", "safe", "safety", "sakura", "sale", "salon", "samsclub", "samsung", "sandvik", "sandvikcoromant", "sanofi", "sap", "sapo", "sarl", "sas", "save", "saxo", "sb", "sbi", "sbs", "sc", "sca", "scb", "schaeffler", "schmidt", "scholarships", "school", "schule", "schwarz", "science", "scjohnson", "scor", "scot", "sd", "se", "seat", "secure", "security", "seek", "select", "sener", "services", "ses", "seven", "sew", "sex", "sexy", "sfr", "sg", "sh", "shangrila", "sharp", "shaw", "shell", "shia", "shiksha", "shoes", "shop", "shopping", "shouji", "show", "showtime", "shriram", "si", "silk", "sina", "singles", "site", "sj", "sk", "ski", "skin", "sky", "skype", "sl", "sling", "sm", "smart", "smile", "sn", "sncf", "so", "soccer", "social", "softbank", "software", "sohu", "solar", "solutions", "song", "sony", "soy", "space", "spiegel", "spot", "spreadbetting", "sr", "srl", "srt", "st", "stada", "staples", "star", "starhub", "statebank", "statefarm", "statoil", "stc", "stcgroup", "stockholm", "storage", "store", "stream", "studio", "study", "style", "su", "sucks", "supplies", "supply", "support", "surf", "surgery", "suzuki", "sv", "swatch", "swiftcover", "swiss", "sx", "sy", "sydney", "symantec", "systems", "sz", "tab", "taipei", "talk", "taobao", "target", "tatamotors", "tatar", "tattoo", "tax", "taxi", "tc", "tci", "td", "tdk", "team", "tech", "technology", "tel", "telecity", "telefonica", "temasek", "tennis", "teva", "tf", "tg", "th", "thd", "theater", "theatre", "tiaa", "tickets", "tienda", "tiffany", "tips", "tires", "tirol", "tj", "tjmaxx", "tjx", "tk", "tkmaxx", "tl", "tm", "tmall", "tn", "to", "today", "tokyo", "tools", "top", "toray", "toshiba", "total", "tours", "town", "toyota", "toys", "tr", "trade", "trading", "training", "travel", "travelchannel", "travelers", "travelersinsurance", "trust", "trv", "tt", "tube", "tui", "tunes", "tushu", "tv", "tvs", "tw", "tz", "ua", "ubank", "ubs", "uconnect", "ug", "uk", "unicom", "university", "uno", "uol", "ups", "us", "uy", "uz", "va", "vacations", "vana", "vanguard", "vc", "ve", "vegas", "ventures", "verisign", "versicherung", "vet", "vg", "vi", "viajes", "video", "vig", "viking", "villas", "vin", "vip", "virgin", "visa", "vision", "vista", "vistaprint", "viva", "vivo", "vlaanderen", "vn", "vodka", "volkswagen", "volvo", "vote", "voting", "voto", "voyage", "vu", "vuelos", "wales", "walmart", "walter", "wang", "wanggou", "warman", "watch", "watches", "weather", "weatherchannel", "webcam", "weber", "website", "wed", "wedding", "weibo", "weir", "wf", "whoswho", "wien", "wiki", "williamhill", "win", "windows", "wine", "winners", "wme", "wolterskluwer", "woodside", "work", "works", "world", "wow", "ws", "wtc", "wtf", "xbox", "xerox", "xfinity", "xihuan", "xin", "कॉम", // xn--11b4c3d "セール", // xn--1ck2e1b "佛山", // xn--1qqw23a "慈善", // xn--30rr7y "集团", // xn--3bst00m "在线", // xn--3ds443g "한국", // xn--3e0b707e "大众汽车", // xn--3oq18vl8pn36a "点看", // xn--3pxu8k "คอม", // xn--42c2d9a "ভারত", // xn--45brj9c "八卦", // xn--45q11c "موقع", // xn--4gbrim "বাংলা", // xn--54b7fta0cc "公益", // xn--55qw42g "公司", // xn--55qx5d "香格里拉", // xn--5su34j936bgsg "网站", // xn--5tzm5g "移动", // xn--6frz82g "我爱你", // xn--6qq986b3xl "москва", // xn--80adxhks "қаз", // xn--80ao21a "онлайн", // xn--80asehdb "сайт", // xn--80aswg "联通", // xn--8y0a063a "срб", // xn--90a3ac "бг", // xn--90ae "бел", // xn--90ais "קום", // xn--9dbq2a "时尚", // xn--9et52u "微博", // xn--9krt00a "淡马锡", // xn--b4w605ferd "ファッション", // xn--bck1b9a5dre4c "орг", // xn--c1avg "नेट", // xn--c2br7g "ストア", // xn--cck2b3b "삼성", // xn--cg4bki "சிங்கப்பூர்", // xn--clchc0ea0b2g2a9gcd "商标", // xn--czr694b "商店", // xn--czrs0t "商城", // xn--czru2d "дети", // xn--d1acj3b "мкд", // xn--d1alf "ею", // xn--e1a4c "ポイント", // xn--eckvdtc9d "新闻", // xn--efvy88h "工行", // xn--estv75g "家電", // xn--fct429k "كوم", // xn--fhbei "中文网", // xn--fiq228c5hs "中信", // xn--fiq64b "中国", // xn--fiqs8s "中國", // xn--fiqz9s "娱乐", // xn--fjq720a "谷歌", // xn--flw351e "భారత్", // xn--fpcrj9c3d "ලංකා", // xn--fzc2c9e2c "電訊盈科", // xn--fzys8d69uvgm "购物", // xn--g2xx48c "クラウド", // xn--gckr3f0f "ભારત", // xn--gecrj9c "通販", // xn--gk3at1e "भारत", // xn--h2brj9c "网店", // xn--hxt814e "संगठन", // xn--i1b6b1a6a2e "餐厅", // xn--imr513n "网络", // xn--io0a7i "ком", // xn--j1aef "укр", // xn--j1amh "香港", // xn--j6w193g "诺基亚", // xn--jlq61u9w7b "食品", // xn--jvr189m "飞利浦", // xn--kcrx77d1x4a "台湾", // xn--kprw13d "台灣", // xn--kpry57d "手表", // xn--kpu716f "手机", // xn--kput3i "мон", // xn--l1acc "الجزائر", // xn--lgbbat1ad8j "عمان", // xn--mgb9awbf "ارامكو", // xn--mgba3a3ejt "ایران", // xn--mgba3a4f16a "العليان", // xn--mgba7c0bbn0a "امارات", // xn--mgbaam7a8h "بازار", // xn--mgbab2bd "الاردن", // xn--mgbayh7gpa "موبايلي", // xn--mgbb9fbpob "بھارت", // xn--mgbbh1a71e "المغرب", // xn--mgbc0a9azcg "ابوظبي", // xn--mgbca7dzdo "السعودية", // xn--mgberp4a5d4ar "سودان", // xn--mgbpl2fh "همراه", // xn--mgbt3dhd "عراق", // xn--mgbtx2b "مليسيا", // xn--mgbx4cd0ab "澳門", // xn--mix891f "닷컴", // xn--mk1bu44c "政府", // xn--mxtq1m "شبكة", // xn--ngbc5azd "بيتك", // xn--ngbe9e0a "გე", // xn--node "机构", // xn--nqv7f "组织机构", // xn--nqv7fs00ema "健康", // xn--nyqy26a "ไทย", // xn--o3cw4h "سورية", // xn--ogbpf8fl "рус", // xn--p1acf "рф", // xn--p1ai "珠宝", // xn--pbt977c "تونس", // xn--pgbs0dh "大拿", // xn--pssy2u "みんな", // xn--q9jyb4c "グーグル", // xn--qcka1pmc "ελ", // xn--qxam "世界", // xn--rhqv96g "書籍", // xn--rovu88b "ਭਾਰਤ", // xn--s9brj9c "网址", // xn--ses554g "닷넷", // xn--t60b56a "コム", // xn--tckwe "游戏", // xn--unup4y "vermögensberater", // xn--vermgensberater-ctb "vermögensberatung", // xn--vermgensberatung-pwb "企业", // xn--vhquv "信息", // xn--vuq861b "嘉里大酒店", // xn--w4r85el8fhu5dnra "嘉里", // xn--w4rs40l "مصر", // xn--wgbh1c "قطر", // xn--wgbl6a "广东", // xn--xhq521b "இலங்கை", // xn--xkc2al3hye2a "இந்தியா", // xn--xkc2dl3a5ee0h "հայ", // xn--y9a3aq "新加坡", // xn--yfro4i67o "فلسطين", // xn--ygbi2ammx "政务", // xn--zfr164b "xperia", "xxx", "xyz", "yachts", "yahoo", "yamaxun", "yandex", "ye", "yodobashi", "yoga", "yokohama", "you", "youtube", "yt", "yun", "za", "zappos", "zara", "zero", "zip", "zippo", "zm", "zone", "zuerich", "zw"]; /** * This library modifies the diff-patch-match library by Neil Fraser * by removing the patch and match functionality and certain advanced * options in the diff function. The original license is as follows: * * === * * Diff Match and Patch * * Copyright 2006 Google Inc. * http://code.google.com/p/google-diff-match-patch/ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * The data structure representing a diff is an array of tuples: * [[DIFF_DELETE, 'Hello'], [DIFF_INSERT, 'Goodbye'], [DIFF_EQUAL, ' world.']] * which means: delete 'Hello', add 'Goodbye' and keep ' world.' */ var DIFF_DELETE = -1; var DIFF_INSERT = 1; var DIFF_EQUAL = 0; /** * Find the differences between two texts. Simplifies the problem by stripping * any common prefix or suffix off the texts before diffing. * @param {string} text1 Old string to be diffed. * @param {string} text2 New string to be diffed. * @param {Int} cursor_pos Expected edit position in text1 (optional) * @return {Array} Array of diff tuples. */ function diff_main(text1, text2, cursor_pos) { // Check for equality (speedup). if (text1 == text2) { if (text1) { return [[DIFF_EQUAL, text1]]; } return []; } // Check cursor_pos within bounds if (cursor_pos < 0 || text1.length < cursor_pos) { cursor_pos = null; } // Trim off common prefix (speedup). var commonlength = diff_commonPrefix(text1, text2); var commonprefix = text1.substring(0, commonlength); text1 = text1.substring(commonlength); text2 = text2.substring(commonlength); // Trim off common suffix (speedup). commonlength = diff_commonSuffix(text1, text2); var commonsuffix = text1.substring(text1.length - commonlength); text1 = text1.substring(0, text1.length - commonlength); text2 = text2.substring(0, text2.length - commonlength); // Compute the diff on the middle block. var diffs = diff_compute_(text1, text2); // Restore the prefix and suffix. if (commonprefix) { diffs.unshift([DIFF_EQUAL, commonprefix]); } if (commonsuffix) { diffs.push([DIFF_EQUAL, commonsuffix]); } diff_cleanupMerge(diffs); if (cursor_pos != null) { diffs = fix_cursor(diffs, cursor_pos); } return diffs; } /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param {string} text1 Old string to be diffed. * @param {string} text2 New string to be diffed. * @return {Array} Array of diff tuples. */ function diff_compute_(text1, text2) { var diffs; if (!text1) { // Just add some text (speedup). return [[DIFF_INSERT, text2]]; } if (!text2) { // Just delete some text (speedup). return [[DIFF_DELETE, text1]]; } var longtext = text1.length > text2.length ? text1 : text2; var shorttext = text1.length > text2.length ? text2 : text1; var i = longtext.indexOf(shorttext); if (i != -1) { // Shorter text is inside the longer text (speedup). diffs = [[DIFF_INSERT, longtext.substring(0, i)], [DIFF_EQUAL, shorttext], [DIFF_INSERT, longtext.substring(i + shorttext.length)]]; // Swap insertions for deletions if diff is reversed. if (text1.length > text2.length) { diffs[0][0] = diffs[2][0] = DIFF_DELETE; } return diffs; } if (shorttext.length == 1) { // Single character string. // After the previous speedup, the character can't be an equality. return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]]; } // Check to see if the problem can be split in two. var hm = diff_halfMatch_(text1, text2); if (hm) { // A half-match was found, sort out the return data. var text1_a = hm[0]; var text1_b = hm[1]; var text2_a = hm[2]; var text2_b = hm[3]; var mid_common = hm[4]; // Send both pairs off for separate processing. var diffs_a = diff_main(text1_a, text2_a); var diffs_b = diff_main(text1_b, text2_b); // Merge the results. return diffs_a.concat([[DIFF_EQUAL, mid_common]], diffs_b); } return diff_bisect_(text1, text2); } /** * Find the 'middle snake' of a diff, split the problem in two * and return the recursively constructed diff. * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. * @param {string} text1 Old string to be diffed. * @param {string} text2 New string to be diffed. * @return {Array} Array of diff tuples. * @private */ function diff_bisect_(text1, text2) { // Cache the text lengths to prevent multiple calls. var text1_length = text1.length; var text2_length = text2.length; var max_d = Math.ceil((text1_length + text2_length) / 2); var v_offset = max_d; var v_length = 2 * max_d; var v1 = new Array(v_length); var v2 = new Array(v_length); // Setting all elements to -1 is faster in Chrome & Firefox than mixing // integers and undefined. for (var x = 0; x < v_length; x++) { v1[x] = -1; v2[x] = -1; } v1[v_offset + 1] = 0; v2[v_offset + 1] = 0; var delta = text1_length - text2_length; // If the total number of characters is odd, then the front path will collide // with the reverse path. var front = delta % 2 != 0; // Offsets for start and end of k loop. // Prevents mapping of space beyond the grid. var k1start = 0; var k1end = 0; var k2start = 0; var k2end = 0; for (var d = 0; d < max_d; d++) { // Walk the front path one step. for (var k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { var k1_offset = v_offset + k1; var x1; if (k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) { x1 = v1[k1_offset + 1]; } else { x1 = v1[k1_offset - 1] + 1; } var y1 = x1 - k1; while (x1 < text1_length && y1 < text2_length && text1.charAt(x1) == text2.charAt(y1)) { x1++; y1++; } v1[k1_offset] = x1; if (x1 > text1_length) { // Ran off the right of the graph. k1end += 2; } else if (y1 > text2_length) { // Ran off the bottom of the graph. k1start += 2; } else if (front) { var k2_offset = v_offset + delta - k1; if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { // Mirror x2 onto top-left coordinate system. var x2 = text1_length - v2[k2_offset]; if (x1 >= x2) { // Overlap detected. return diff_bisectSplit_(text1, text2, x1, y1); } } } } // Walk the reverse path one step. for (var k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { var k2_offset = v_offset + k2; var x2; if (k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) { x2 = v2[k2_offset + 1]; } else { x2 = v2[k2_offset - 1] + 1; } var y2 = x2 - k2; while (x2 < text1_length && y2 < text2_length && text1.charAt(text1_length - x2 - 1) == text2.charAt(text2_length - y2 - 1)) { x2++; y2++; } v2[k2_offset] = x2; if (x2 > text1_length) { // Ran off the left of the graph. k2end += 2; } else if (y2 > text2_length) { // Ran off the top of the graph. k2start += 2; } else if (!front) { var k1_offset = v_offset + delta - k2; if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { var x1 = v1[k1_offset]; var y1 = v_offset + x1 - k1_offset; // Mirror x2 onto top-left coordinate system. x2 = text1_length - x2; if (x1 >= x2) { // Overlap detected. return diff_bisectSplit_(text1, text2, x1, y1); } } } } } // Diff took too long and hit the deadline or // number of diffs equals number of characters, no commonality at all. return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]]; } /** * Given the location of the 'middle snake', split the diff in two parts * and recurse. * @param {string} text1 Old string to be diffed. * @param {string} text2 New string to be diffed. * @param {number} x Index of split point in text1. * @param {number} y Index of split point in text2. * @return {Array} Array of diff tuples. */ function diff_bisectSplit_(text1, text2, x, y) { var text1a = text1.substring(0, x); var text2a = text2.substring(0, y); var text1b = text1.substring(x); var text2b = text2.substring(y); // Compute both diffs serially. var diffs = diff_main(text1a, text2a); var diffsb = diff_main(text1b, text2b); return diffs.concat(diffsb); } /** * Determine the common prefix of two strings. * @param {string} text1 First string. * @param {string} text2 Second string. * @return {number} The number of characters common to the start of each * string. */ function diff_commonPrefix(text1, text2) { // Quick check for common null cases. if (!text1 || !text2 || text1.charAt(0) != text2.charAt(0)) { return 0; } // Binary search. // Performance analysis: http://neil.fraser.name/news/2007/10/09/ var pointermin = 0; var pointermax = Math.min(text1.length, text2.length); var pointermid = pointermax; var pointerstart = 0; while (pointermin < pointermid) { if (text1.substring(pointerstart, pointermid) == text2.substring(pointerstart, pointermid)) { pointermin = pointermid; pointerstart = pointermin; } else { pointermax = pointermid; } pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); } return pointermid; } /** * Determine the common suffix of two strings. * @param {string} text1 First string. * @param {string} text2 Second string. * @return {number} The number of characters common to the end of each string. */ function diff_commonSuffix(text1, text2) { // Quick check for common null cases. if (!text1 || !text2 || text1.charAt(text1.length - 1) != text2.charAt(text2.length - 1)) { return 0; } // Binary search. // Performance analysis: http://neil.fraser.name/news/2007/10/09/ var pointermin = 0; var pointermax = Math.min(text1.length, text2.length);