delta-detect-links
Version:
Detect links in a delta object.
1,196 lines (1,037 loc) • 91.9 kB
JavaScript
'use strict';
var regex = /[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/;
var regex$2 = /[\0-\x1F\x7F-\x9F]/;
var regex$4 = /[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000]/;
var regex$6 = /[!-#%-\*,-/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E44\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD807[\uDC41-\uDC45\uDC70\uDC71]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]/;
var re = function re(opts) {
var re = {};
// Use direct extract instead of `regenerate` to reduse browserified size
re.src_Any = regex.source;
re.src_Cc = regex$2.source;
re.src_Z = regex$4.source;
re.src_P = regex$6.source;
// \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation)
re.src_ZPCc = [re.src_Z, re.src_P, re.src_Cc].join('|');
// \p{\Z\Cc} (white spaces + control)
re.src_ZCc = [re.src_Z, re.src_Cc].join('|');
// All possible word characters (everything without punctuation, spaces & controls)
// Defined via punctuation & spaces to save space
// Should be something like \p{\L\N\S\M} (\w but without `_`)
re.src_pseudo_letter = '(?:(?!>|<|' + re.src_ZPCc + ')' + re.src_Any + ')';
// The same as abothe but without [0-9]
// var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')';
////////////////////////////////////////////////////////////////////////////////
re.src_ip4 = '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)';
// Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch.
re.src_auth = '(?:(?:(?!' + re.src_ZCc + '|[@/\\[\\]()]).)+@)?';
re.src_port = '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?';
re.src_host_terminator = '(?=$|>|<|' + re.src_ZPCc + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + re.src_ZPCc + '))';
re.src_path = '(?:' + '[/?#]' + '(?:' + '(?!' + re.src_ZCc + '|[()[\\]{}.,"\'?!\\-<>]).|' + '\\[(?:(?!' + re.src_ZCc + '|\\]).)*\\]|' + '\\((?:(?!' + re.src_ZCc + '|[)]).)*\\)|' + '\\{(?:(?!' + re.src_ZCc + '|[}]).)*\\}|' + '\\"(?:(?!' + re.src_ZCc + '|["]).)+\\"|' + "\\'(?:(?!" + re.src_ZCc + "|[']).)+\\'|" + "\\'(?=" + re.src_pseudo_letter + '|[-]).|' + // allow `I'm_king` if no pair found
'\\.{2,3}[a-zA-Z0-9%/]|' + // github has ... in commit range links. Restrict to
// - english
// - percent-encoded
// - parts of file path
// until more examples found.
'\\.(?!' + re.src_ZCc + '|[.]).|' + (opts && opts['---'] ? '\\-(?!--(?:[^-]|$))(?:-*)|' // `---` => long dash, terminate
: '\\-+|') + '\\,(?!' + re.src_ZCc + ').|' + // allow `,,,` in paths
'\\!(?!' + re.src_ZCc + '|[!]).|' + '\\?(?!' + re.src_ZCc + '|[?]).' + ')+' + '|\\/' + ')?';
re.src_email_name = '[\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]+';
re.src_xn = 'xn--[a-z0-9\\-]{1,59}';
// More to read about domain names
// http://serverfault.com/questions/638260/
re.src_domain_root =
// Allow letters & digits (http://test1)
'(?:' + re.src_xn + '|' + re.src_pseudo_letter + '{1,63}' + ')';
re.src_domain = '(?:' + re.src_xn + '|' + '(?:' + re.src_pseudo_letter + ')' + '|' +
// don't allow `--` in domain names, because:
// - that can conflict with markdown — / –
// - nobody use those anyway
'(?:' + re.src_pseudo_letter + '(?:-(?!-)|' + re.src_pseudo_letter + '){0,61}' + re.src_pseudo_letter + ')' + ')';
re.src_host = '(?:' +
// Don't need IP check, because digits are already allowed in normal domain names
// src_ip4 +
// '|' +
'(?:(?:(?:' + re.src_domain + ')\\.)*' + re.src_domain /*_root*/ + ')' + ')';
re.tpl_host_fuzzy = '(?:' + re.src_ip4 + '|' + '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))' + ')';
re.tpl_host_no_ip_fuzzy = '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))';
re.src_host_strict = re.src_host + re.src_host_terminator;
re.tpl_host_fuzzy_strict = re.tpl_host_fuzzy + re.src_host_terminator;
re.src_host_port_strict = re.src_host + re.src_port + re.src_host_terminator;
re.tpl_host_port_fuzzy_strict = re.tpl_host_fuzzy + re.src_port + re.src_host_terminator;
re.tpl_host_port_no_ip_fuzzy_strict = re.tpl_host_no_ip_fuzzy + re.src_port + re.src_host_terminator;
////////////////////////////////////////////////////////////////////////////////
// Main rules
// Rude test fuzzy links by host, for quick deny
re.tpl_host_fuzzy_test = 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + re.src_ZPCc + '|>|$))';
re.tpl_email_fuzzy = '(^|<|>|\\(|' + re.src_ZCc + ')(' + re.src_email_name + '@' + re.tpl_host_fuzzy_strict + ')';
re.tpl_link_fuzzy =
// Fuzzy link can't be prepended with .:/\- and non punctuation.
// but can start with > (markdown blockquote)
'(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + re.src_ZPCc + '))' + '((?![$+<=>^`|])' + re.tpl_host_port_fuzzy_strict + re.src_path + ')';
re.tpl_link_no_ip_fuzzy =
// Fuzzy link can't be prepended with .:/\- and non punctuation.
// but can start with > (markdown blockquote)
'(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + re.src_ZPCc + '))' + '((?![$+<=>^`|])' + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ')';
return re;
};
////////////////////////////////////////////////////////////////////////////////
// Helpers
// Merge objects
//
function assign(obj /*from1, from2, from3, ...*/) {
var sources = Array.prototype.slice.call(arguments, 1);
sources.forEach(function (source) {
if (!source) {
return;
}
Object.keys(source).forEach(function (key) {
obj[key] = source[key];
});
});
return obj;
}
function _class(obj) {
return Object.prototype.toString.call(obj);
}
function isString(obj) {
return _class(obj) === '[object String]';
}
function isObject(obj) {
return _class(obj) === '[object Object]';
}
function isRegExp(obj) {
return _class(obj) === '[object RegExp]';
}
function isFunction(obj) {
return _class(obj) === '[object Function]';
}
function escapeRE(str) {
return str.replace(/[.?*+^$[\]\\(){}|-]/g, '\\$&');
}
////////////////////////////////////////////////////////////////////////////////
var defaultOptions = {
fuzzyLink: true,
fuzzyEmail: true,
fuzzyIP: false
};
function isOptionsObj(obj) {
return Object.keys(obj || {}).reduce(function (acc, k) {
return acc || defaultOptions.hasOwnProperty(k);
}, false);
}
var defaultSchemas = {
'http:': {
validate: function validate(text, pos, self) {
var tail = text.slice(pos);
if (!self.re.http) {
// compile lazily, because "host"-containing variables can change on tlds update.
self.re.http = new RegExp('^\\/\\/' + self.re.src_auth + self.re.src_host_port_strict + self.re.src_path, 'i');
}
if (self.re.http.test(tail)) {
return tail.match(self.re.http)[0].length;
}
return 0;
}
},
'https:': 'http:',
'ftp:': 'http:',
'//': {
validate: function validate(text, pos, self) {
var tail = text.slice(pos);
if (!self.re.no_http) {
// compile lazily, because "host"-containing variables can change on tlds update.
self.re.no_http = new RegExp('^' + self.re.src_auth +
// Don't allow single-level domains, because of false positives like '//test'
// with code comments
'(?:localhost|(?:(?:' + self.re.src_domain + ')\\.)+' + self.re.src_domain_root + ')' + self.re.src_port + self.re.src_host_terminator + self.re.src_path, 'i');
}
if (self.re.no_http.test(tail)) {
// should not be `://` & `///`, that protects from errors in protocol name
if (pos >= 3 && text[pos - 3] === ':') {
return 0;
}
if (pos >= 3 && text[pos - 3] === '/') {
return 0;
}
return tail.match(self.re.no_http)[0].length;
}
return 0;
}
},
'mailto:': {
validate: function validate(text, pos, self) {
var tail = text.slice(pos);
if (!self.re.mailto) {
self.re.mailto = new RegExp('^' + self.re.src_email_name + '@' + self.re.src_host_strict, 'i');
}
if (self.re.mailto.test(tail)) {
return tail.match(self.re.mailto)[0].length;
}
return 0;
}
}
};
/*eslint-disable max-len*/
// RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
var tlds_2ch_src_re = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]';
// DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
var tlds_default = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|');
/*eslint-enable max-len*/
////////////////////////////////////////////////////////////////////////////////
function resetScanCache(self) {
self.__index__ = -1;
self.__text_cache__ = '';
}
function createValidator(re$$1) {
return function (text, pos) {
var tail = text.slice(pos);
if (re$$1.test(tail)) {
return tail.match(re$$1)[0].length;
}
return 0;
};
}
function createNormalizer() {
return function (match, self) {
self.normalize(match);
};
}
// Schemas compiler. Build regexps.
//
function compile(self) {
// Load & clone RE patterns.
var re$$1 = self.re = re(self.__opts__);
// Define dynamic patterns
var tlds = self.__tlds__.slice();
self.onCompile();
if (!self.__tlds_replaced__) {
tlds.push(tlds_2ch_src_re);
}
tlds.push(re$$1.src_xn);
re$$1.src_tlds = tlds.join('|');
function untpl(tpl) {
return tpl.replace('%TLDS%', re$$1.src_tlds);
}
re$$1.email_fuzzy = RegExp(untpl(re$$1.tpl_email_fuzzy), 'i');
re$$1.link_fuzzy = RegExp(untpl(re$$1.tpl_link_fuzzy), 'i');
re$$1.link_no_ip_fuzzy = RegExp(untpl(re$$1.tpl_link_no_ip_fuzzy), 'i');
re$$1.host_fuzzy_test = RegExp(untpl(re$$1.tpl_host_fuzzy_test), 'i');
//
// Compile each schema
//
var aliases = [];
self.__compiled__ = {}; // Reset compiled data
function schemaError(name, val) {
throw new Error('(LinkifyIt) Invalid schema "' + name + '": ' + val);
}
Object.keys(self.__schemas__).forEach(function (name) {
var val = self.__schemas__[name];
// skip disabled methods
if (val === null) {
return;
}
var compiled = { validate: null, link: null };
self.__compiled__[name] = compiled;
if (isObject(val)) {
if (isRegExp(val.validate)) {
compiled.validate = createValidator(val.validate);
} else if (isFunction(val.validate)) {
compiled.validate = val.validate;
} else {
schemaError(name, val);
}
if (isFunction(val.normalize)) {
compiled.normalize = val.normalize;
} else if (!val.normalize) {
compiled.normalize = createNormalizer();
} else {
schemaError(name, val);
}
return;
}
if (isString(val)) {
aliases.push(name);
return;
}
schemaError(name, val);
});
//
// Compile postponed aliases
//
aliases.forEach(function (alias) {
if (!self.__compiled__[self.__schemas__[alias]]) {
// Silently fail on missed schemas to avoid errons on disable.
// schemaError(alias, self.__schemas__[alias]);
return;
}
self.__compiled__[alias].validate = self.__compiled__[self.__schemas__[alias]].validate;
self.__compiled__[alias].normalize = self.__compiled__[self.__schemas__[alias]].normalize;
});
//
// Fake record for guessed links
//
self.__compiled__[''] = { validate: null, normalize: createNormalizer() };
//
// Build schema condition
//
var slist = Object.keys(self.__compiled__).filter(function (name) {
// Filter disabled & fake schemas
return name.length > 0 && self.__compiled__[name];
}).map(escapeRE).join('|');
// (?!_) cause 1.5x slowdown
self.re.schema_test = RegExp('(^|(?!_)(?:[><]|' + re$$1.src_ZPCc + '))(' + slist + ')', 'i');
self.re.schema_search = RegExp('(^|(?!_)(?:[><]|' + re$$1.src_ZPCc + '))(' + slist + ')', 'ig');
self.re.pretest = RegExp('(' + self.re.schema_test.source + ')|' + '(' + self.re.host_fuzzy_test.source + ')|' + '@', 'i');
//
// Cleanup
//
resetScanCache(self);
}
/**
* class Match
*
* Match result. Single element of array, returned by [[LinkifyIt#match]]
**/
function Match(self, shift) {
var start = self.__index__,
end = self.__last_index__,
text = self.__text_cache__.slice(start, end);
/**
* Match#schema -> String
*
* Prefix (protocol) for matched string.
**/
this.schema = self.__schema__.toLowerCase();
/**
* Match#index -> Number
*
* First position of matched string.
**/
this.index = start + shift;
/**
* Match#lastIndex -> Number
*
* Next position after matched string.
**/
this.lastIndex = end + shift;
/**
* Match#raw -> String
*
* Matched string.
**/
this.raw = text;
/**
* Match#text -> String
*
* Notmalized text of matched string.
**/
this.text = text;
/**
* Match#url -> String
*
* Normalized url of matched string.
**/
this.url = text;
}
function createMatch(self, shift) {
var match = new Match(self, shift);
self.__compiled__[match.schema].normalize(match, self);
return match;
}
/**
* class LinkifyIt
**/
/**
* new LinkifyIt(schemas, options)
* - schemas (Object): Optional. Additional schemas to validate (prefix/validator)
* - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
*
* Creates new linkifier instance with optional additional schemas.
* Can be called without `new` keyword for convenience.
*
* By default understands:
*
* - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
* - "fuzzy" links and emails (example.com, foo@bar.com).
*
* `schemas` is an object, where each key/value describes protocol/rule:
*
* - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
* for example). `linkify-it` makes shure that prefix is not preceeded with
* alphanumeric char and symbols. Only whitespaces and punctuation allowed.
* - __value__ - rule to check tail after link prefix
* - _String_ - just alias to existing rule
* - _Object_
* - _validate_ - validator function (should return matched length on success),
* or `RegExp`.
* - _normalize_ - optional function to normalize text & url of matched result
* (for example, for @twitter mentions).
*
* `options`:
*
* - __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`.
* - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
* like version numbers. Default `false`.
* - __fuzzyEmail__ - recognize emails without `mailto:` prefix.
*
**/
function LinkifyIt(schemas, options) {
if (!(this instanceof LinkifyIt)) {
return new LinkifyIt(schemas, options);
}
if (!options) {
if (isOptionsObj(schemas)) {
options = schemas;
schemas = {};
}
}
this.__opts__ = assign({}, defaultOptions, options);
// Cache last tested result. Used to skip repeating steps on next `match` call.
this.__index__ = -1;
this.__last_index__ = -1; // Next scan position
this.__schema__ = '';
this.__text_cache__ = '';
this.__schemas__ = assign({}, defaultSchemas, schemas);
this.__compiled__ = {};
this.__tlds__ = tlds_default;
this.__tlds_replaced__ = false;
this.re = {};
compile(this);
}
/** chainable
* LinkifyIt#add(schema, definition)
* - schema (String): rule name (fixed pattern prefix)
* - definition (String|RegExp|Object): schema definition
*
* Add new rule definition. See constructor description for details.
**/
LinkifyIt.prototype.add = function add(schema, definition) {
this.__schemas__[schema] = definition;
compile(this);
return this;
};
/** chainable
* LinkifyIt#set(options)
* - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
*
* Set recognition options for links without schema.
**/
LinkifyIt.prototype.set = function set(options) {
this.__opts__ = assign(this.__opts__, options);
return this;
};
/**
* LinkifyIt#test(text) -> Boolean
*
* Searches linkifiable pattern and returns `true` on success or `false` on fail.
**/
LinkifyIt.prototype.test = function test(text) {
// Reset scan cache
this.__text_cache__ = text;
this.__index__ = -1;
if (!text.length) {
return false;
}
var m, ml, me, len, shift, next, re$$1, tld_pos, at_pos;
// try to scan for link with schema - that's the most simple rule
if (this.re.schema_test.test(text)) {
re$$1 = this.re.schema_search;
re$$1.lastIndex = 0;
while ((m = re$$1.exec(text)) !== null) {
len = this.testSchemaAt(text, m[2], re$$1.lastIndex);
if (len) {
this.__schema__ = m[2];
this.__index__ = m.index + m[1].length;
this.__last_index__ = m.index + m[0].length + len;
break;
}
}
}
if (this.__opts__.fuzzyLink && this.__compiled__['http:']) {
// guess schemaless links
tld_pos = text.search(this.re.host_fuzzy_test);
if (tld_pos >= 0) {
// if tld is located after found link - no need to check fuzzy pattern
if (this.__index__ < 0 || tld_pos < this.__index__) {
if ((ml = text.match(this.__opts__.fuzzyIP ? this.re.link_fuzzy : this.re.link_no_ip_fuzzy)) !== null) {
shift = ml.index + ml[1].length;
if (this.__index__ < 0 || shift < this.__index__) {
this.__schema__ = '';
this.__index__ = shift;
this.__last_index__ = ml.index + ml[0].length;
}
}
}
}
}
if (this.__opts__.fuzzyEmail && this.__compiled__['mailto:']) {
// guess schemaless emails
at_pos = text.indexOf('@');
if (at_pos >= 0) {
// We can't skip this check, because this cases are possible:
// 192.168.1.1@gmail.com, my.in@example.com
if ((me = text.match(this.re.email_fuzzy)) !== null) {
shift = me.index + me[1].length;
next = me.index + me[0].length;
if (this.__index__ < 0 || shift < this.__index__ || shift === this.__index__ && next > this.__last_index__) {
this.__schema__ = 'mailto:';
this.__index__ = shift;
this.__last_index__ = next;
}
}
}
}
return this.__index__ >= 0;
};
/**
* LinkifyIt#pretest(text) -> Boolean
*
* Very quick check, that can give false positives. Returns true if link MAY BE
* can exists. Can be used for speed optimization, when you need to check that
* link NOT exists.
**/
LinkifyIt.prototype.pretest = function pretest(text) {
return this.re.pretest.test(text);
};
/**
* LinkifyIt#testSchemaAt(text, name, position) -> Number
* - text (String): text to scan
* - name (String): rule (schema) name
* - position (Number): text offset to check from
*
* Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly
* at given position. Returns length of found pattern (0 on fail).
**/
LinkifyIt.prototype.testSchemaAt = function testSchemaAt(text, schema, pos) {
// If not supported schema check requested - terminate
if (!this.__compiled__[schema.toLowerCase()]) {
return 0;
}
return this.__compiled__[schema.toLowerCase()].validate(text, pos, this);
};
/**
* LinkifyIt#match(text) -> Array|null
*
* Returns array of found link descriptions or `null` on fail. We strongly
* recommend to use [[LinkifyIt#test]] first, for best speed.
*
* ##### Result match description
*
* - __schema__ - link schema, can be empty for fuzzy links, or `//` for
* protocol-neutral links.
* - __index__ - offset of matched text
* - __lastIndex__ - index of next char after mathch end
* - __raw__ - matched text
* - __text__ - normalized text
* - __url__ - link, generated from matched text
**/
LinkifyIt.prototype.match = function match(text) {
var shift = 0,
result = [];
// Try to take previous element from cache, if .test() called before
if (this.__index__ >= 0 && this.__text_cache__ === text) {
result.push(createMatch(this, shift));
shift = this.__last_index__;
}
// Cut head if cache was used
var tail = shift ? text.slice(shift) : text;
// Scan string until end reached
while (this.test(tail)) {
result.push(createMatch(this, shift));
tail = tail.slice(this.__last_index__);
shift += this.__last_index__;
}
if (result.length) {
return result;
}
return null;
};
/** chainable
* LinkifyIt#tlds(list [, keepOld]) -> this
* - list (Array): list of tlds
* - keepOld (Boolean): merge with current list if `true` (`false` by default)
*
* Load (or merge) new tlds list. Those are user for fuzzy links (without prefix)
* to avoid false positives. By default this algorythm used:
*
* - hostname with any 2-letter root zones are ok.
* - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
* are ok.
* - encoded (`xn--...`) root zones are ok.
*
* If list is replaced, then exact match for 2-chars root zones will be checked.
**/
LinkifyIt.prototype.tlds = function tlds(list, keepOld) {
list = Array.isArray(list) ? list : [list];
if (!keepOld) {
this.__tlds__ = list.slice();
this.__tlds_replaced__ = true;
compile(this);
return this;
}
this.__tlds__ = this.__tlds__.concat(list).sort().filter(function (el, idx, arr) {
return el !== arr[idx - 1];
}).reverse();
compile(this);
return this;
};
/**
* LinkifyIt#normalize(match)
*
* Default normalizer (if schema does not define it's own).
**/
LinkifyIt.prototype.normalize = function normalize(match) {
// Do minimal possible changes by default. Need to collect feedback prior
// to move forward https://github.com/markdown-it/linkify-it/issues/1
if (!match.schema) {
match.url = 'http://' + match.url;
}
if (match.schema === 'mailto:' && !/^mailto:/i.test(match.url)) {
match.url = 'mailto:' + match.url;
}
};
/**
* LinkifyIt#onCompile()
*
* Override to modify basic RegExp-s.
**/
LinkifyIt.prototype.onCompile = function onCompile() {};
var index$1 = LinkifyIt;
var index$2 = ["aaa", "aarp", "abarth", "abb", "abbott", "abbvie", "abc", "able", "abogado", "abudhabi", "ac", "academy", "accenture", "accountant", "accountants", "aco", "active", "actor", "ad", "adac", "ads", "adult", "ae", "aeg", "aero", "aetna", "af", "afamilycompany", "afl", "ag", "agakhan", "agency", "ai", "aig", "aigo", "airbus", "airforce", "airtel", "akdn", "al", "alfaromeo", "alibaba", "alipay", "allfinanz", "allstate", "ally", "alsace", "alstom", "am", "americanexpress", "americanfamily", "amex", "amfam", "amica", "amsterdam", "analytics", "android", "anquan", "anz", "ao", "apartments", "app", "apple", "aq", "aquarelle", "ar", "aramco", "archi", "army", "arpa", "art", "arte", "as", "asda", "asia", "associates", "at", "athleta", "attorney", "au", "auction", "audi", "audible", "audio", "auspost", "author", "auto", "autos", "avianca", "aw", "aws", "ax", "axa", "az", "azure", "ba", "baby", "baidu", "banamex", "bananarepublic", "band", "bank", "bar", "barcelona", "barclaycard", "barclays", "barefoot", "bargains", "baseball", "basketball", "bauhaus", "bayern", "bb", "bbc", "bbt", "bbva", "bcg", "bcn", "bd", "be", "beats", "beauty", "beer", "bentley", "berlin", "best", "bestbuy", "bet", "bf", "bg", "bh", "bharti", "bi", "bible", "bid", "bike", "bing", "bingo", "bio", "biz", "bj", "black", "blackfriday", "blanco", "blockbuster", "blog", "bloomberg", "blue", "bm", "bms", "bmw", "bn", "bnl", "bnpparibas", "bo", "boats", "boehringer", "bofa", "bom", "bond", "boo", "book", "booking", "boots", "bosch", "bostik", "bot", "boutique", "br", "bradesco", "bridgestone", "broadway", "broker", "brother", "brussels", "bs", "bt", "budapest", "bugatti", "build", "builders", "business", "buy", "buzz", "bv", "bw", "by", "bz", "bzh", "ca", "cab", "cafe", "cal", "call", "calvinklein", "cam", "camera", "camp", "cancerresearch", "canon", "capetown", "capital", "capitalone", "car", "caravan", "cards", "care", "career", "careers", "cars", "cartier", "casa", "case", "caseih", "cash", "casino", "cat", "catering", "cba", "cbn", "cbre", "cbs", "cc", "cd", "ceb", "center", "ceo", "cern", "cf", "cfa", "cfd", "cg", "ch", "chanel", "channel", "chase", "chat", "cheap", "chintai", "chloe", "christmas", "chrome", "chrysler", "church", "ci", "cipriani", "circle", "cisco", "citadel", "citi", "citic", "city", "cityeats", "ck", "cl", "claims", "cleaning", "click", "clinic", "clinique", "clothing", "cloud", "club", "clubmed", "cm", "cn", "co", "coach", "codes", "coffee", "college", "cologne", "com", "comcast", "commbank", "community", "company", "compare", "computer", "comsec", "condos", "construction", "consulting", "contact", "contractors", "cooking", "cookingchannel", "cool", "coop", "corsica", "country", "coupon", "coupons", "courses", "cr", "credit", "creditcard", "creditunion", "cricket", "crown", "crs", "cruises", "csc", "cu", "cuisinella", "cv", "cw", "cx", "cy", "cymru", "cyou", "cz", "dabur", "dad", "dance", "date", "dating", "datsun", "day", "dclk", "dds", "de", "deal", "dealer", "deals", "degree", "delivery", "dell", "deloitte", "delta", "democrat", "dental", "dentist", "desi", "design", "dev", "dhl", "diamonds", "diet", "digital", "direct", "directory", "discount", "discover", "dish", "diy", "dj", "dk", "dm", "dnp", "do", "docs", "doctor", "dodge", "dog", "doha", "domains", "dot", "download", "drive", "dtv", "dubai", "duck", "dunlop", "duns", "dupont", "durban", "dvag", "dvr", "dz", "earth", "eat", "ec", "eco", "edeka", "edu", "education", "ee", "eg", "email", "emerck", "energy", "engineer", "engineering", "enterprises", "epost", "epson", "equipment", "er", "ericsson", "erni", "es", "esq", "estate", "esurance", "et", "eu", "eurovision", "eus", "events", "everbank", "exchange", "expert", "exposed", "express", "extraspace", "fage", "fail", "fairwinds", "faith", "family", "fan", "fans", "farm", "farmers", "fashion", "fast", "fedex", "feedback", "ferrari", "ferrero", "fi", "fiat", "fidelity", "fido", "film", "final", "finance", "financial", "fire", "firestone", "firmdale", "fish", "fishing", "fit", "fitness", "fj", "fk", "flickr", "flights", "flir", "florist", "flowers", "fly", "fm", "fo", "foo", "foodnetwork", "football", "ford", "forex", "forsale", "forum", "foundation", "fox", "fr", "fresenius", "frl", "frogans", "frontdoor", "frontier", "ftr", "fujitsu", "fujixerox", "fund", "furniture", "futbol", "fyi", "ga", "gal", "gallery", "gallo", "gallup", "game", "games", "gap", "garden", "gb", "gbiz", "gd", "gdn", "ge", "gea", "gent", "genting", "george", "gf", "gg", "ggee", "gh", "gi", "gift", "gifts", "gives", "giving", "gl", "glade", "glass", "gle", "global", "globo", "gm", "gmail", "gmbh", "gmo", "gmx", "gn", "godaddy", "gold", "goldpoint", "golf", "goo", "goodhands", "goodyear", "goog", "google", "gop", "got", "gov", "gp", "gq", "gr", "grainger", "graphics", "gratis", "green", "gripe", "group", "gs", "gt", "gu", "guardian", "gucci", "guge", "guide", "guitars", "guru", "gw", "gy", "hamburg", "hangout", "haus", "hbo", "hdfc", "hdfcbank", "health", "healthcare", "help", "helsinki", "here", "hermes", "hgtv", "hiphop", "hisamitsu", "hitachi", "hiv", "hk", "hkt", "hm", "hn", "hockey", "holdings", "holiday", "homedepot", "homegoods", "homes", "homesense", "honda", "honeywell", "horse", "host", "hosting", "hot", "hoteles", "hotmail", "house", "how", "hr", "hsbc", "ht", "htc", "hu", "hughes", "hyatt", "hyundai", "ibm", "icbc", "ice", "icu", "id", "ie", "ieee", "ifm", "iinet", "ikano", "il", "im", "imamat", "imdb", "immo", "immobilien", "in", "industries", "infiniti", "info", "ing", "ink", "institute", "insurance", "insure", "int", "intel", "international", "intuit", "investments", "io", "ipiranga", "iq", "ir", "irish", "is", "iselect", "ismaili", "ist", "istanbul", "it", "itau", "itv", "iveco", "iwc", "jaguar", "java", "jcb", "jcp", "je", "jeep", "jetzt", "jewelry", "jlc", "jll", "jm", "jmp", "jnj", "jo", "jobs", "joburg", "jot", "joy", "jp", "jpmorgan", "jprs", "juegos", "juniper", "kaufen", "kddi", "ke", "kerryhotels", "kerrylogistics", "kerryproperties", "kfh", "kg", "kh", "ki", "kia", "kim", "kinder", "kindle", "kitchen", "kiwi", "km", "kn", "koeln", "komatsu", "kosher", "kp", "kpmg", "kpn", "kr", "krd", "kred", "kuokgroup", "kw", "ky", "kyoto", "kz", "la", "lacaixa", "ladbrokes", "lamborghini", "lamer", "lancaster", "lancia", "lancome", "land", "landrover", "lanxess", "lasalle", "lat", "latino", "latrobe", "law", "lawyer", "lb", "lc", "lds", "lease", "leclerc", "lefrak", "legal", "lego", "lexus", "lgbt", "li", "liaison", "lidl", "life", "lifeinsurance", "lifestyle", "lighting", "like", "lilly", "limited", "limo", "lincoln", "linde", "link", "lipsy", "live", "living", "lixil", "lk", "loan", "loans", "locker", "locus", "loft", "lol", "london", "lotte", "lotto", "love", "lpl", "lplfinancial", "lr", "ls", "lt", "ltd", "ltda", "lu", "lundbeck", "lupin", "luxe", "luxury", "lv", "ly", "ma", "macys", "madrid", "maif", "maison", "makeup", "man", "management", "mango", "market", "marketing", "markets", "marriott", "marshalls", "maserati", "mattel", "mba", "mc", "mcd", "mcdonalds", "mckinsey", "md", "me", "med", "media", "meet", "melbourne", "meme", "memorial", "men", "menu", "meo", "metlife", "mg", "mh", "miami", "microsoft", "mil", "mini", "mint", "mit", "mitsubishi", "mk", "ml", "mlb", "mls", "mm", "mma", "mn", "mo", "mobi", "mobily", "moda", "moe", "moi", "mom", "monash", "money", "monster", "montblanc", "mopar", "mormon", "mortgage", "moscow", "motorcycles", "mov", "movie", "movistar", "mp", "mq", "mr", "ms", "msd", "mt", "mtn", "mtpc", "mtr", "mu", "museum", "mutual", "mutuelle", "mv", "mw", "mx", "my", "mz", "na", "nab", "nadex", "nagoya", "name", "nationwide", "natura", "navy", "nba", "nc", "ne", "nec", "net", "netbank", "netflix", "network", "neustar", "new", "newholland", "news", "next", "nextdirect", "nexus", "nf", "nfl", "ng", "ngo", "nhk", "ni", "nico", "nike", "nikon", "ninja", "nissan", "nissay", "nl", "no", "nokia", "northwesternmutual", "norton", "now", "nowruz", "nowtv", "np", "nr", "nra", "nrw", "ntt", "nu", "nyc", "nz", "obi", "observer", "off", "office", "okinawa", "olayan", "olayangroup", "oldnavy", "ollo", "om", "omega", "one", "ong", "onl", "online", "onyourside", "ooo", "open", "oracle", "orange", "org", "organic", "orientexpress", "origins", "osaka", "otsuka", "ott", "ovh", "pa", "page", "pamperedchef", "panasonic", "panerai", "paris", "pars", "partners", "parts", "party", "passagens", "pay", "pccw", "pe", "pet", "pf", "pfizer", "pg", "ph", "pharmacy", "philips", "photo", "photography", "photos", "physio", "piaget", "pics", "pictet", "pictures", "pid", "pin", "ping", "pink", "pioneer", "pizza", "pk", "pl", "place", "play", "playstation", "plumbing", "plus", "pm", "pn", "pnc", "pohl", "poker", "politie", "porn", "post", "pr", "pramerica", "praxi", "press", "prime", "pro", "prod", "productions", "prof", "progressive", "promo", "properties", "property", "protection", "pru", "prudential", "ps", "pt", "pub", "pw", "pwc", "py", "qa", "qpon", "quebec", "quest", "qvc", "racing", "radio", "raid", "re", "read", "realestate", "realtor", "realty", "recipes", "red", "redstone", "redumbrella", "rehab", "reise", "reisen", "reit", "ren", "rent", "rentals", "repair", "report", "republican", "rest", "restaurant", "review", "reviews", "rexroth", "rich", "richardli", "ricoh", "rightathome", "rio", "rip", "ro", "rocher", "rocks", "rodeo", "rogers", "room", "rs", "rsvp", "ru", "ruhr", "run", "rw", "rwe", "ryukyu", "sa", "saarland", "safe", "safety", "sakura", "sale", "salon", "samsclub", "samsung", "sandvik", "sandvikcoromant", "sanofi", "sap", "sapo", "sarl", "sas", "save", "saxo", "sb", "sbi", "sbs", "sc", "sca", "scb", "schaeffler", "schmidt", "scholarships", "school", "schule", "schwarz", "science", "scjohnson", "scor", "scot", "sd", "se", "seat", "secure", "security", "seek", "select", "sener", "services", "ses", "seven", "sew", "sex", "sexy", "sfr", "sg", "sh", "shangrila", "sharp", "shaw", "shell", "shia", "shiksha", "shoes", "shop", "shopping", "shouji", "show", "showtime", "shriram", "si", "silk", "sina", "singles", "site", "sj", "sk", "ski", "skin", "sky", "skype", "sl", "sling", "sm", "smart", "smile", "sn", "sncf", "so", "soccer", "social", "softbank", "software", "sohu", "solar", "solutions", "song", "sony", "soy", "space", "spiegel", "spot", "spreadbetting", "sr", "srl", "srt", "st", "stada", "staples", "star", "starhub", "statebank", "statefarm", "statoil", "stc", "stcgroup", "stockholm", "storage", "store", "stream", "studio", "study", "style", "su", "sucks", "supplies", "supply", "support", "surf", "surgery", "suzuki", "sv", "swatch", "swiftcover", "swiss", "sx", "sy", "sydney", "symantec", "systems", "sz", "tab", "taipei", "talk", "taobao", "target", "tatamotors", "tatar", "tattoo", "tax", "taxi", "tc", "tci", "td", "tdk", "team", "tech", "technology", "tel", "telecity", "telefonica", "temasek", "tennis", "teva", "tf", "tg", "th", "thd", "theater", "theatre", "tiaa", "tickets", "tienda", "tiffany", "tips", "tires", "tirol", "tj", "tjmaxx", "tjx", "tk", "tkmaxx", "tl", "tm", "tmall", "tn", "to", "today", "tokyo", "tools", "top", "toray", "toshiba", "total", "tours", "town", "toyota", "toys", "tr", "trade", "trading", "training", "travel", "travelchannel", "travelers", "travelersinsurance", "trust", "trv", "tt", "tube", "tui", "tunes", "tushu", "tv", "tvs", "tw", "tz", "ua", "ubank", "ubs", "uconnect", "ug", "uk", "unicom", "university", "uno", "uol", "ups", "us", "uy", "uz", "va", "vacations", "vana", "vanguard", "vc", "ve", "vegas", "ventures", "verisign", "versicherung", "vet", "vg", "vi", "viajes", "video", "vig", "viking", "villas", "vin", "vip", "virgin", "visa", "vision", "vista", "vistaprint", "viva", "vivo", "vlaanderen", "vn", "vodka", "volkswagen", "volvo", "vote", "voting", "voto", "voyage", "vu", "vuelos", "wales", "walmart", "walter", "wang", "wanggou", "warman", "watch", "watches", "weather", "weatherchannel", "webcam", "weber", "website", "wed", "wedding", "weibo", "weir", "wf", "whoswho", "wien", "wiki", "williamhill", "win", "windows", "wine", "winners", "wme", "wolterskluwer", "woodside", "work", "works", "world", "wow", "ws", "wtc", "wtf", "xbox", "xerox", "xfinity", "xihuan", "xin", "कॉम", // xn--11b4c3d
"セール", // xn--1ck2e1b
"佛山", // xn--1qqw23a
"慈善", // xn--30rr7y
"集团", // xn--3bst00m
"在线", // xn--3ds443g
"한국", // xn--3e0b707e
"大众汽车", // xn--3oq18vl8pn36a
"点看", // xn--3pxu8k
"คอม", // xn--42c2d9a
"ভারত", // xn--45brj9c
"八卦", // xn--45q11c
"موقع", // xn--4gbrim
"বাংলা", // xn--54b7fta0cc
"公益", // xn--55qw42g
"公司", // xn--55qx5d
"香格里拉", // xn--5su34j936bgsg
"网站", // xn--5tzm5g
"移动", // xn--6frz82g
"我爱你", // xn--6qq986b3xl
"москва", // xn--80adxhks
"қаз", // xn--80ao21a
"онлайн", // xn--80asehdb
"сайт", // xn--80aswg
"联通", // xn--8y0a063a
"срб", // xn--90a3ac
"бг", // xn--90ae
"бел", // xn--90ais
"קום", // xn--9dbq2a
"时尚", // xn--9et52u
"微博", // xn--9krt00a
"淡马锡", // xn--b4w605ferd
"ファッション", // xn--bck1b9a5dre4c
"орг", // xn--c1avg
"नेट", // xn--c2br7g
"ストア", // xn--cck2b3b
"삼성", // xn--cg4bki
"சிங்கப்பூர்", // xn--clchc0ea0b2g2a9gcd
"商标", // xn--czr694b
"商店", // xn--czrs0t
"商城", // xn--czru2d
"дети", // xn--d1acj3b
"мкд", // xn--d1alf
"ею", // xn--e1a4c
"ポイント", // xn--eckvdtc9d
"新闻", // xn--efvy88h
"工行", // xn--estv75g
"家電", // xn--fct429k
"كوم", // xn--fhbei
"中文网", // xn--fiq228c5hs
"中信", // xn--fiq64b
"中国", // xn--fiqs8s
"中國", // xn--fiqz9s
"娱乐", // xn--fjq720a
"谷歌", // xn--flw351e
"భారత్", // xn--fpcrj9c3d
"ලංකා", // xn--fzc2c9e2c
"電訊盈科", // xn--fzys8d69uvgm
"购物", // xn--g2xx48c
"クラウド", // xn--gckr3f0f
"ભારત", // xn--gecrj9c
"通販", // xn--gk3at1e
"भारत", // xn--h2brj9c
"网店", // xn--hxt814e
"संगठन", // xn--i1b6b1a6a2e
"餐厅", // xn--imr513n
"网络", // xn--io0a7i
"ком", // xn--j1aef
"укр", // xn--j1amh
"香港", // xn--j6w193g
"诺基亚", // xn--jlq61u9w7b
"食品", // xn--jvr189m
"飞利浦", // xn--kcrx77d1x4a
"台湾", // xn--kprw13d
"台灣", // xn--kpry57d
"手表", // xn--kpu716f
"手机", // xn--kput3i
"мон", // xn--l1acc
"الجزائر", // xn--lgbbat1ad8j
"عمان", // xn--mgb9awbf
"ارامكو", // xn--mgba3a3ejt
"ایران", // xn--mgba3a4f16a
"العليان", // xn--mgba7c0bbn0a
"امارات", // xn--mgbaam7a8h
"بازار", // xn--mgbab2bd
"الاردن", // xn--mgbayh7gpa
"موبايلي", // xn--mgbb9fbpob
"بھارت", // xn--mgbbh1a71e
"المغرب", // xn--mgbc0a9azcg
"ابوظبي", // xn--mgbca7dzdo
"السعودية", // xn--mgberp4a5d4ar
"سودان", // xn--mgbpl2fh
"همراه", // xn--mgbt3dhd
"عراق", // xn--mgbtx2b
"مليسيا", // xn--mgbx4cd0ab
"澳門", // xn--mix891f
"닷컴", // xn--mk1bu44c
"政府", // xn--mxtq1m
"شبكة", // xn--ngbc5azd
"بيتك", // xn--ngbe9e0a
"გე", // xn--node
"机构", // xn--nqv7f
"组织机构", // xn--nqv7fs00ema
"健康", // xn--nyqy26a
"ไทย", // xn--o3cw4h
"سورية", // xn--ogbpf8fl
"рус", // xn--p1acf
"рф", // xn--p1ai
"珠宝", // xn--pbt977c
"تونس", // xn--pgbs0dh
"大拿", // xn--pssy2u
"みんな", // xn--q9jyb4c
"グーグル", // xn--qcka1pmc
"ελ", // xn--qxam
"世界", // xn--rhqv96g
"書籍", // xn--rovu88b
"ਭਾਰਤ", // xn--s9brj9c
"网址", // xn--ses554g
"닷넷", // xn--t60b56a
"コム", // xn--tckwe
"游戏", // xn--unup4y
"vermögensberater", // xn--vermgensberater-ctb
"vermögensberatung", // xn--vermgensberatung-pwb
"企业", // xn--vhquv
"信息", // xn--vuq861b
"嘉里大酒店", // xn--w4r85el8fhu5dnra
"嘉里", // xn--w4rs40l
"مصر", // xn--wgbh1c
"قطر", // xn--wgbl6a
"广东", // xn--xhq521b
"இலங்கை", // xn--xkc2al3hye2a
"இந்தியா", // xn--xkc2dl3a5ee0h
"հայ", // xn--y9a3aq
"新加坡", // xn--yfro4i67o
"فلسطين", // xn--ygbi2ammx
"政务", // xn--zfr164b
"xperia", "xxx", "xyz", "yachts", "yahoo", "yamaxun", "yandex", "ye", "yodobashi", "yoga", "yokohama", "you", "youtube", "yt", "yun", "za", "zappos", "zara", "zero", "zip", "zippo", "zm", "zone", "zuerich", "zw"];
/**
* This library modifies the diff-patch-match library by Neil Fraser
* by removing the patch and match functionality and certain advanced
* options in the diff function. The original license is as follows:
*
* ===
*
* Diff Match and Patch
*
* Copyright 2006 Google Inc.
* http://code.google.com/p/google-diff-match-patch/
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* The data structure representing a diff is an array of tuples:
* [[DIFF_DELETE, 'Hello'], [DIFF_INSERT, 'Goodbye'], [DIFF_EQUAL, ' world.']]
* which means: delete 'Hello', add 'Goodbye' and keep ' world.'
*/
var DIFF_DELETE = -1;
var DIFF_INSERT = 1;
var DIFF_EQUAL = 0;
/**
* Find the differences between two texts. Simplifies the problem by stripping
* any common prefix or suffix off the texts before diffing.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {Int} cursor_pos Expected edit position in text1 (optional)
* @return {Array} Array of diff tuples.
*/
function diff_main(text1, text2, cursor_pos) {
// Check for equality (speedup).
if (text1 == text2) {
if (text1) {
return [[DIFF_EQUAL, text1]];
}
return [];
}
// Check cursor_pos within bounds
if (cursor_pos < 0 || text1.length < cursor_pos) {
cursor_pos = null;
}
// Trim off common prefix (speedup).
var commonlength = diff_commonPrefix(text1, text2);
var commonprefix = text1.substring(0, commonlength);
text1 = text1.substring(commonlength);
text2 = text2.substring(commonlength);
// Trim off common suffix (speedup).
commonlength = diff_commonSuffix(text1, text2);
var commonsuffix = text1.substring(text1.length - commonlength);
text1 = text1.substring(0, text1.length - commonlength);
text2 = text2.substring(0, text2.length - commonlength);
// Compute the diff on the middle block.
var diffs = diff_compute_(text1, text2);
// Restore the prefix and suffix.
if (commonprefix) {
diffs.unshift([DIFF_EQUAL, commonprefix]);
}
if (commonsuffix) {
diffs.push([DIFF_EQUAL, commonsuffix]);
}
diff_cleanupMerge(diffs);
if (cursor_pos != null) {
diffs = fix_cursor(diffs, cursor_pos);
}
return diffs;
}
/**
* Find the differences between two texts. Assumes that the texts do not
* have any common prefix or suffix.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @return {Array} Array of diff tuples.
*/
function diff_compute_(text1, text2) {
var diffs;
if (!text1) {
// Just add some text (speedup).
return [[DIFF_INSERT, text2]];
}
if (!text2) {
// Just delete some text (speedup).
return [[DIFF_DELETE, text1]];
}
var longtext = text1.length > text2.length ? text1 : text2;
var shorttext = text1.length > text2.length ? text2 : text1;
var i = longtext.indexOf(shorttext);
if (i != -1) {
// Shorter text is inside the longer text (speedup).
diffs = [[DIFF_INSERT, longtext.substring(0, i)], [DIFF_EQUAL, shorttext], [DIFF_INSERT, longtext.substring(i + shorttext.length)]];
// Swap insertions for deletions if diff is reversed.
if (text1.length > text2.length) {
diffs[0][0] = diffs[2][0] = DIFF_DELETE;
}
return diffs;
}
if (shorttext.length == 1) {
// Single character string.
// After the previous speedup, the character can't be an equality.
return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]];
}
// Check to see if the problem can be split in two.
var hm = diff_halfMatch_(text1, text2);
if (hm) {
// A half-match was found, sort out the return data.
var text1_a = hm[0];
var text1_b = hm[1];
var text2_a = hm[2];
var text2_b = hm[3];
var mid_common = hm[4];
// Send both pairs off for separate processing.
var diffs_a = diff_main(text1_a, text2_a);
var diffs_b = diff_main(text1_b, text2_b);
// Merge the results.
return diffs_a.concat([[DIFF_EQUAL, mid_common]], diffs_b);
}
return diff_bisect_(text1, text2);
}
/**
* Find the 'middle snake' of a diff, split the problem in two
* and return the recursively constructed diff.
* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @return {Array} Array of diff tuples.
* @private
*/
function diff_bisect_(text1, text2) {
// Cache the text lengths to prevent multiple calls.
var text1_length = text1.length;
var text2_length = text2.length;
var max_d = Math.ceil((text1_length + text2_length) / 2);
var v_offset = max_d;
var v_length = 2 * max_d;
var v1 = new Array(v_length);
var v2 = new Array(v_length);
// Setting all elements to -1 is faster in Chrome & Firefox than mixing
// integers and undefined.
for (var x = 0; x < v_length; x++) {
v1[x] = -1;
v2[x] = -1;
}
v1[v_offset + 1] = 0;
v2[v_offset + 1] = 0;
var delta = text1_length - text2_length;
// If the total number of characters is odd, then the front path will collide
// with the reverse path.
var front = delta % 2 != 0;
// Offsets for start and end of k loop.
// Prevents mapping of space beyond the grid.
var k1start = 0;
var k1end = 0;
var k2start = 0;
var k2end = 0;
for (var d = 0; d < max_d; d++) {
// Walk the front path one step.
for (var k1 = -d + k1start; k1 <= d - k1end; k1 += 2) {
var k1_offset = v_offset + k1;
var x1;
if (k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) {
x1 = v1[k1_offset + 1];
} else {
x1 = v1[k1_offset - 1] + 1;
}
var y1 = x1 - k1;
while (x1 < text1_length && y1 < text2_length && text1.charAt(x1) == text2.charAt(y1)) {
x1++;
y1++;
}
v1[k1_offset] = x1;
if (x1 > text1_length) {
// Ran off the right of the graph.
k1end += 2;
} else if (y1 > text2_length) {
// Ran off the bottom of the graph.
k1start += 2;
} else if (front) {
var k2_offset = v_offset + delta - k1;
if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) {
// Mirror x2 onto top-left coordinate system.
var x2 = text1_length - v2[k2_offset];
if (x1 >= x2) {
// Overlap detected.
return diff_bisectSplit_(text1, text2, x1, y1);
}
}
}
}
// Walk the reverse path one step.
for (var k2 = -d + k2start; k2 <= d - k2end; k2 += 2) {
var k2_offset = v_offset + k2;
var x2;
if (k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) {
x2 = v2[k2_offset + 1];
} else {
x2 = v2[k2_offset - 1] + 1;
}
var y2 = x2 - k2;
while (x2 < text1_length && y2 < text2_length && text1.charAt(text1_length - x2 - 1) == text2.charAt(text2_length - y2 - 1)) {
x2++;
y2++;
}
v2[k2_offset] = x2;
if (x2 > text1_length) {
// Ran off the left of the graph.
k2end += 2;
} else if (y2 > text2_length) {
// Ran off the top of the graph.
k2start += 2;
} else if (!front) {
var k1_offset = v_offset + delta - k2;
if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) {
var x1 = v1[k1_offset];
var y1 = v_offset + x1 - k1_offset;
// Mirror x2 onto top-left coordinate system.
x2 = text1_length - x2;
if (x1 >= x2) {
// Overlap detected.
return diff_bisectSplit_(text1, text2, x1, y1);
}
}
}
}
}
// Diff took too long and hit the deadline or
// number of diffs equals number of characters, no commonality at all.
return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]];
}
/**
* Given the location of the 'middle snake', split the diff in two parts
* and recurse.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {number} x Index of split point in text1.
* @param {number} y Index of split point in text2.
* @return {Array} Array of diff tuples.
*/
function diff_bisectSplit_(text1, text2, x, y) {
var text1a = text1.substring(0, x);
var text2a = text2.substring(0, y);
var text1b = text1.substring(x);
var text2b = text2.substring(y);
// Compute both diffs serially.
var diffs = diff_main(text1a, text2a);
var diffsb = diff_main(text1b, text2b);
return diffs.concat(diffsb);
}
/**
* Determine the common prefix of two strings.
* @param {string} text1 First string.
* @param {string} text2 Second string.
* @return {number} The number of characters common to the start of each
* string.
*/
function diff_commonPrefix(text1, text2) {
// Quick check for common null cases.
if (!text1 || !text2 || text1.charAt(0) != text2.charAt(0)) {
return 0;
}
// Binary search.
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
var pointermin = 0;
var pointermax = Math.min(text1.length, text2.length);
var pointermid = pointermax;
var pointerstart = 0;
while (pointermin < pointermid) {
if (text1.substring(pointerstart, pointermid) == text2.substring(pointerstart, pointermid)) {
pointermin = pointermid;
pointerstart = pointermin;
} else {
pointermax = pointermid;
}
pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin);
}
return pointermid;
}
/**
* Determine the common suffix of two strings.
* @param {string} text1 First string.
* @param {string} text2 Second string.
* @return {number} The number of characters common to the end of each string.
*/
function diff_commonSuffix(text1, text2) {
// Quick check for common null cases.
if (!text1 || !text2 || text1.charAt(text1.length - 1) != text2.charAt(text2.length - 1)) {
return 0;
}
// Binary search.
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
var pointermin = 0;
var pointermax = Math.min(text1.length, text2.length);