UNPKG

@cao-mei-you-ren/postlight_parser

Version:

Postlight Parser transforms web pages into clean text. Publishers and programmers use it to make the web make sense, and readers use it to read any web article comfortably.

1,273 lines (1,260 loc) 273 kB
var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __esm = (fn, res) => function __init() { return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res; }; var __commonJS = (cb, mod) => function __require() { return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // node_modules/.pnpm/tsup@8.2.3_typescript@5.5.4/node_modules/tsup/assets/cjs_shims.js var init_cjs_shims = __esm({ "node_modules/.pnpm/tsup@8.2.3_typescript@5.5.4/node_modules/tsup/assets/cjs_shims.js"() { } }); // node_modules/.pnpm/heap@0.2.7/node_modules/heap/lib/heap.js var require_heap = __commonJS({ "node_modules/.pnpm/heap@0.2.7/node_modules/heap/lib/heap.js"(exports2, module2) { init_cjs_shims(); (function() { var Heap, defaultCmp, floor, heapify, heappop, heappush, heappushpop, heapreplace, insort, min, nlargest, nsmallest, updateItem, _siftdown, _siftup; floor = Math.floor, min = Math.min; defaultCmp = function(x, y) { if (x < y) { return -1; } if (x > y) { return 1; } return 0; }; insort = function(a, x, lo, hi, cmp) { var mid; if (lo == null) { lo = 0; } if (cmp == null) { cmp = defaultCmp; } if (lo < 0) { throw new Error("lo must be non-negative"); } if (hi == null) { hi = a.length; } while (lo < hi) { mid = floor((lo + hi) / 2); if (cmp(x, a[mid]) < 0) { hi = mid; } else { lo = mid + 1; } } return [].splice.apply(a, [lo, lo - lo].concat(x)), x; }; heappush = function(array, item, cmp) { if (cmp == null) { cmp = defaultCmp; } array.push(item); return _siftdown(array, 0, array.length - 1, cmp); }; heappop = function(array, cmp) { var lastelt, returnitem; if (cmp == null) { cmp = defaultCmp; } lastelt = array.pop(); if (array.length) { returnitem = array[0]; array[0] = lastelt; _siftup(array, 0, cmp); } else { returnitem = lastelt; } return returnitem; }; heapreplace = function(array, item, cmp) { var returnitem; if (cmp == null) { cmp = defaultCmp; } returnitem = array[0]; array[0] = item; _siftup(array, 0, cmp); return returnitem; }; heappushpop = function(array, item, cmp) { var _ref; if (cmp == null) { cmp = defaultCmp; } if (array.length && cmp(array[0], item) < 0) { _ref = [array[0], item], item = _ref[0], array[0] = _ref[1]; _siftup(array, 0, cmp); } return item; }; heapify = function(array, cmp) { var i, _i, _j, _len, _ref, _ref1, _results, _results1; if (cmp == null) { cmp = defaultCmp; } _ref1 = function() { _results1 = []; for (var _j2 = 0, _ref2 = floor(array.length / 2); 0 <= _ref2 ? _j2 < _ref2 : _j2 > _ref2; 0 <= _ref2 ? _j2++ : _j2--) { _results1.push(_j2); } return _results1; }.apply(this).reverse(); _results = []; for (_i = 0, _len = _ref1.length; _i < _len; _i++) { i = _ref1[_i]; _results.push(_siftup(array, i, cmp)); } return _results; }; updateItem = function(array, item, cmp) { var pos; if (cmp == null) { cmp = defaultCmp; } pos = array.indexOf(item); if (pos === -1) { return; } _siftdown(array, 0, pos, cmp); return _siftup(array, pos, cmp); }; nlargest = function(array, n, cmp) { var elem, result2, _i, _len, _ref; if (cmp == null) { cmp = defaultCmp; } result2 = array.slice(0, n); if (!result2.length) { return result2; } heapify(result2, cmp); _ref = array.slice(n); for (_i = 0, _len = _ref.length; _i < _len; _i++) { elem = _ref[_i]; heappushpop(result2, elem, cmp); } return result2.sort(cmp).reverse(); }; nsmallest = function(array, n, cmp) { var elem, i, los, result2, _i, _j, _len, _ref, _ref1, _results; if (cmp == null) { cmp = defaultCmp; } if (n * 10 <= array.length) { result2 = array.slice(0, n).sort(cmp); if (!result2.length) { return result2; } los = result2[result2.length - 1]; _ref = array.slice(n); for (_i = 0, _len = _ref.length; _i < _len; _i++) { elem = _ref[_i]; if (cmp(elem, los) < 0) { insort(result2, elem, 0, null, cmp); result2.pop(); los = result2[result2.length - 1]; } } return result2; } heapify(array, cmp); _results = []; for (i = _j = 0, _ref1 = min(n, array.length); 0 <= _ref1 ? _j < _ref1 : _j > _ref1; i = 0 <= _ref1 ? ++_j : --_j) { _results.push(heappop(array, cmp)); } return _results; }; _siftdown = function(array, startpos, pos, cmp) { var newitem, parent, parentpos; if (cmp == null) { cmp = defaultCmp; } newitem = array[pos]; while (pos > startpos) { parentpos = pos - 1 >> 1; parent = array[parentpos]; if (cmp(newitem, parent) < 0) { array[pos] = parent; pos = parentpos; continue; } break; } return array[pos] = newitem; }; _siftup = function(array, pos, cmp) { var childpos, endpos, newitem, rightpos, startpos; if (cmp == null) { cmp = defaultCmp; } endpos = array.length; startpos = pos; newitem = array[pos]; childpos = 2 * pos + 1; while (childpos < endpos) { rightpos = childpos + 1; if (rightpos < endpos && !(cmp(array[childpos], array[rightpos]) < 0)) { childpos = rightpos; } array[pos] = array[childpos]; pos = childpos; childpos = 2 * pos + 1; } array[pos] = newitem; return _siftdown(array, startpos, pos, cmp); }; Heap = function() { Heap2.push = heappush; Heap2.pop = heappop; Heap2.replace = heapreplace; Heap2.pushpop = heappushpop; Heap2.heapify = heapify; Heap2.updateItem = updateItem; Heap2.nlargest = nlargest; Heap2.nsmallest = nsmallest; function Heap2(cmp) { this.cmp = cmp != null ? cmp : defaultCmp; this.nodes = []; } Heap2.prototype.push = function(x) { return heappush(this.nodes, x, this.cmp); }; Heap2.prototype.pop = function() { return heappop(this.nodes, this.cmp); }; Heap2.prototype.peek = function() { return this.nodes[0]; }; Heap2.prototype.contains = function(x) { return this.nodes.indexOf(x) !== -1; }; Heap2.prototype.replace = function(x) { return heapreplace(this.nodes, x, this.cmp); }; Heap2.prototype.pushpop = function(x) { return heappushpop(this.nodes, x, this.cmp); }; Heap2.prototype.heapify = function() { return heapify(this.nodes, this.cmp); }; Heap2.prototype.updateItem = function(x) { return updateItem(this.nodes, x, this.cmp); }; Heap2.prototype.clear = function() { return this.nodes = []; }; Heap2.prototype.empty = function() { return this.nodes.length === 0; }; Heap2.prototype.size = function() { return this.nodes.length; }; Heap2.prototype.clone = function() { var heap; heap = new Heap2(); heap.nodes = this.nodes.slice(0); return heap; }; Heap2.prototype.toArray = function() { return this.nodes.slice(0); }; Heap2.prototype.insert = Heap2.prototype.push; Heap2.prototype.top = Heap2.prototype.peek; Heap2.prototype.front = Heap2.prototype.peek; Heap2.prototype.has = Heap2.prototype.contains; Heap2.prototype.copy = Heap2.prototype.clone; return Heap2; }(); (function(root, factory) { if (typeof define === "function" && define.amd) { return define([], factory); } else if (typeof exports2 === "object") { return module2.exports = factory(); } else { return root.Heap = factory(); } })(this, function() { return Heap; }); }).call(exports2); } }); // node_modules/.pnpm/heap@0.2.7/node_modules/heap/index.js var require_heap2 = __commonJS({ "node_modules/.pnpm/heap@0.2.7/node_modules/heap/index.js"(exports2, module2) { init_cjs_shims(); module2.exports = require_heap(); } }); // node_modules/.pnpm/difflib@git+https+++git@github.com+postlight+difflib.js.git#32e8e38c7fcd935241b9baab71bb432fd9b166ed/node_modules/difflib/lib/difflib.js var require_difflib = __commonJS({ "node_modules/.pnpm/difflib@git+https+++git@github.com+postlight+difflib.js.git#32e8e38c7fcd935241b9baab71bb432fd9b166ed/node_modules/difflib/lib/difflib.js"(exports2) { init_cjs_shims(); (function() { var Differ, Heap, IS_CHARACTER_JUNK, IS_LINE_JUNK, SequenceMatcher, contextDiff, floor, getCloseMatches, max, min, ndiff, restore, unifiedDiff, _any, _arrayCmp, _calculateRatio, _countLeading, _formatRangeContext, _formatRangeUnified, _has, __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; floor = Math.floor, max = Math.max, min = Math.min; Heap = require_heap2(); _calculateRatio = function(matches, length) { if (length) { return 2 * matches / length; } else { return 1; } }; _arrayCmp = function(a, b) { var i, la, lb, _i, _ref, _ref1; _ref = [a.length, b.length], la = _ref[0], lb = _ref[1]; for (i = _i = 0, _ref1 = min(la, lb); 0 <= _ref1 ? _i < _ref1 : _i > _ref1; i = 0 <= _ref1 ? ++_i : --_i) { if (a[i] < b[i]) { return -1; } if (a[i] > b[i]) { return 1; } } return la - lb; }; _has = function(obj, key) { return Object.prototype.hasOwnProperty.call(obj, key); }; _any = function(items) { var item, _i, _len; for (_i = 0, _len = items.length; _i < _len; _i++) { item = items[_i]; if (item) { return true; } } return false; }; SequenceMatcher = function() { function SequenceMatcher2(isjunk, a, b, autojunk) { this.isjunk = isjunk; if (a == null) { a = ""; } if (b == null) { b = ""; } this.autojunk = autojunk != null ? autojunk : true; this.a = this.b = null; this.setSeqs(a, b); } SequenceMatcher2.prototype.setSeqs = function(a, b) { this.setSeq1(a); return this.setSeq2(b); }; SequenceMatcher2.prototype.setSeq1 = function(a) { if (a === this.a) { return; } this.a = a; return this.matchingBlocks = this.opcodes = null; }; SequenceMatcher2.prototype.setSeq2 = function(b) { if (b === this.b) { return; } this.b = b; this.matchingBlocks = this.opcodes = null; this.fullbcount = null; return this._chainB(); }; SequenceMatcher2.prototype._chainB = function() { var b, b2j, elt, i, idxs, indices, isjunk, junk, n, ntest, popular, _i, _j, _len, _len1, _ref; b = this.b; this.b2j = b2j = {}; for (i = _i = 0, _len = b.length; _i < _len; i = ++_i) { elt = b[i]; indices = _has(b2j, elt) ? b2j[elt] : b2j[elt] = []; indices.push(i); } junk = {}; isjunk = this.isjunk; if (isjunk) { _ref = Object.keys(b2j); for (_j = 0, _len1 = _ref.length; _j < _len1; _j++) { elt = _ref[_j]; if (isjunk(elt)) { junk[elt] = true; delete b2j[elt]; } } } popular = {}; n = b.length; if (this.autojunk && n >= 200) { ntest = floor(n / 100) + 1; for (elt in b2j) { idxs = b2j[elt]; if (idxs.length > ntest) { popular[elt] = true; delete b2j[elt]; } } } this.isbjunk = function(b2) { return _has(junk, b2); }; return this.isbpopular = function(b2) { return _has(popular, b2); }; }; SequenceMatcher2.prototype.findLongestMatch = function(alo, ahi, blo, bhi) { var a, b, b2j, besti, bestj, bestsize, i, isbjunk, j, j2len, k, newj2len, _i, _j, _len, _ref, _ref1, _ref2, _ref3, _ref4, _ref5; _ref = [this.a, this.b, this.b2j, this.isbjunk], a = _ref[0], b = _ref[1], b2j = _ref[2], isbjunk = _ref[3]; _ref1 = [alo, blo, 0], besti = _ref1[0], bestj = _ref1[1], bestsize = _ref1[2]; j2len = {}; for (i = _i = alo; alo <= ahi ? _i < ahi : _i > ahi; i = alo <= ahi ? ++_i : --_i) { newj2len = {}; _ref2 = _has(b2j, a[i]) ? b2j[a[i]] : []; for (_j = 0, _len = _ref2.length; _j < _len; _j++) { j = _ref2[_j]; if (j < blo) { continue; } if (j >= bhi) { break; } k = newj2len[j] = (j2len[j - 1] || 0) + 1; if (k > bestsize) { _ref3 = [i - k + 1, j - k + 1, k], besti = _ref3[0], bestj = _ref3[1], bestsize = _ref3[2]; } } j2len = newj2len; } while (besti > alo && bestj > blo && !isbjunk(b[bestj - 1]) && a[besti - 1] === b[bestj - 1]) { _ref4 = [besti - 1, bestj - 1, bestsize + 1], besti = _ref4[0], bestj = _ref4[1], bestsize = _ref4[2]; } while (besti + bestsize < ahi && bestj + bestsize < bhi && !isbjunk(b[bestj + bestsize]) && a[besti + bestsize] === b[bestj + bestsize]) { bestsize++; } while (besti > alo && bestj > blo && isbjunk(b[bestj - 1]) && a[besti - 1] === b[bestj - 1]) { _ref5 = [besti - 1, bestj - 1, bestsize + 1], besti = _ref5[0], bestj = _ref5[1], bestsize = _ref5[2]; } while (besti + bestsize < ahi && bestj + bestsize < bhi && isbjunk(b[bestj + bestsize]) && a[besti + bestsize] === b[bestj + bestsize]) { bestsize++; } return [besti, bestj, bestsize]; }; SequenceMatcher2.prototype.getMatchingBlocks = function() { var ahi, alo, bhi, blo, i, i1, i2, j, j1, j2, k, k1, k2, la, lb, matchingBlocks, nonAdjacent, queue, x, _i, _len, _ref, _ref1, _ref2, _ref3, _ref4; if (this.matchingBlocks) { return this.matchingBlocks; } _ref = [this.a.length, this.b.length], la = _ref[0], lb = _ref[1]; queue = [[0, la, 0, lb]]; matchingBlocks = []; while (queue.length) { _ref1 = queue.pop(), alo = _ref1[0], ahi = _ref1[1], blo = _ref1[2], bhi = _ref1[3]; _ref2 = x = this.findLongestMatch(alo, ahi, blo, bhi), i = _ref2[0], j = _ref2[1], k = _ref2[2]; if (k) { matchingBlocks.push(x); if (alo < i && blo < j) { queue.push([alo, i, blo, j]); } if (i + k < ahi && j + k < bhi) { queue.push([i + k, ahi, j + k, bhi]); } } } matchingBlocks.sort(_arrayCmp); i1 = j1 = k1 = 0; nonAdjacent = []; for (_i = 0, _len = matchingBlocks.length; _i < _len; _i++) { _ref3 = matchingBlocks[_i], i2 = _ref3[0], j2 = _ref3[1], k2 = _ref3[2]; if (i1 + k1 === i2 && j1 + k1 === j2) { k1 += k2; } else { if (k1) { nonAdjacent.push([i1, j1, k1]); } _ref4 = [i2, j2, k2], i1 = _ref4[0], j1 = _ref4[1], k1 = _ref4[2]; } } if (k1) { nonAdjacent.push([i1, j1, k1]); } nonAdjacent.push([la, lb, 0]); return this.matchingBlocks = nonAdjacent; }; SequenceMatcher2.prototype.getOpcodes = function() { var ai, answer, bj, i, j, size, tag, _i, _len, _ref, _ref1, _ref2; if (this.opcodes) { return this.opcodes; } i = j = 0; this.opcodes = answer = []; _ref = this.getMatchingBlocks(); for (_i = 0, _len = _ref.length; _i < _len; _i++) { _ref1 = _ref[_i], ai = _ref1[0], bj = _ref1[1], size = _ref1[2]; tag = ""; if (i < ai && j < bj) { tag = "replace"; } else if (i < ai) { tag = "delete"; } else if (j < bj) { tag = "insert"; } if (tag) { answer.push([tag, i, ai, j, bj]); } _ref2 = [ai + size, bj + size], i = _ref2[0], j = _ref2[1]; if (size) { answer.push(["equal", ai, i, bj, j]); } } return answer; }; SequenceMatcher2.prototype.getGroupedOpcodes = function(n) { var codes, group, groups, i1, i2, j1, j2, nn, tag, _i, _len, _ref, _ref1, _ref2, _ref3; if (n == null) { n = 3; } codes = this.getOpcodes(); if (!codes.length) { codes = [["equal", 0, 1, 0, 1]]; } if (codes[0][0] === "equal") { _ref = codes[0], tag = _ref[0], i1 = _ref[1], i2 = _ref[2], j1 = _ref[3], j2 = _ref[4]; codes[0] = [tag, max(i1, i2 - n), i2, max(j1, j2 - n), j2]; } if (codes[codes.length - 1][0] === "equal") { _ref1 = codes[codes.length - 1], tag = _ref1[0], i1 = _ref1[1], i2 = _ref1[2], j1 = _ref1[3], j2 = _ref1[4]; codes[codes.length - 1] = [tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)]; } nn = n + n; groups = []; group = []; for (_i = 0, _len = codes.length; _i < _len; _i++) { _ref2 = codes[_i], tag = _ref2[0], i1 = _ref2[1], i2 = _ref2[2], j1 = _ref2[3], j2 = _ref2[4]; if (tag === "equal" && i2 - i1 > nn) { group.push([tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)]); groups.push(group); group = []; _ref3 = [max(i1, i2 - n), max(j1, j2 - n)], i1 = _ref3[0], j1 = _ref3[1]; } group.push([tag, i1, i2, j1, j2]); } if (group.length && !(group.length === 1 && group[0][0] === "equal")) { groups.push(group); } return groups; }; SequenceMatcher2.prototype.ratio = function() { var match, matches, _i, _len, _ref; matches = 0; _ref = this.getMatchingBlocks(); for (_i = 0, _len = _ref.length; _i < _len; _i++) { match = _ref[_i]; matches += match[2]; } return _calculateRatio(matches, this.a.length + this.b.length); }; SequenceMatcher2.prototype.quickRatio = function() { var avail, elt, fullbcount, matches, numb, _i, _j, _len, _len1, _ref, _ref1; if (!this.fullbcount) { this.fullbcount = fullbcount = {}; _ref = this.b; for (_i = 0, _len = _ref.length; _i < _len; _i++) { elt = _ref[_i]; fullbcount[elt] = (fullbcount[elt] || 0) + 1; } } fullbcount = this.fullbcount; avail = {}; matches = 0; _ref1 = this.a; for (_j = 0, _len1 = _ref1.length; _j < _len1; _j++) { elt = _ref1[_j]; if (_has(avail, elt)) { numb = avail[elt]; } else { numb = fullbcount[elt] || 0; } avail[elt] = numb - 1; if (numb > 0) { matches++; } } return _calculateRatio(matches, this.a.length + this.b.length); }; SequenceMatcher2.prototype.realQuickRatio = function() { var la, lb, _ref; _ref = [this.a.length, this.b.length], la = _ref[0], lb = _ref[1]; return _calculateRatio(min(la, lb), la + lb); }; return SequenceMatcher2; }(); getCloseMatches = function(word, possibilities, n, cutoff) { var result2, s, score, x, _i, _j, _len, _len1, _ref, _results; if (n == null) { n = 3; } if (cutoff == null) { cutoff = 0.6; } if (!(n > 0)) { throw new Error("n must be > 0: (" + n + ")"); } if (!(0 <= cutoff && cutoff <= 1)) { throw new Error("cutoff must be in [0.0, 1.0]: (" + cutoff + ")"); } result2 = []; s = new SequenceMatcher(); s.setSeq2(word); for (_i = 0, _len = possibilities.length; _i < _len; _i++) { x = possibilities[_i]; s.setSeq1(x); if (s.realQuickRatio() >= cutoff && s.quickRatio() >= cutoff && s.ratio() >= cutoff) { result2.push([s.ratio(), x]); } } result2 = Heap.nlargest(result2, n, _arrayCmp); _results = []; for (_j = 0, _len1 = result2.length; _j < _len1; _j++) { _ref = result2[_j], score = _ref[0], x = _ref[1]; _results.push(x); } return _results; }; _countLeading = function(line, ch) { var i, n, _ref; _ref = [0, line.length], i = _ref[0], n = _ref[1]; while (i < n && line[i] === ch) { i++; } return i; }; Differ = function() { function Differ2(linejunk, charjunk) { this.linejunk = linejunk; this.charjunk = charjunk; } Differ2.prototype.compare = function(a, b) { var ahi, alo, bhi, blo, cruncher, g, line, lines, tag, _i, _j, _len, _len1, _ref, _ref1; cruncher = new SequenceMatcher(this.linejunk, a, b); lines = []; _ref = cruncher.getOpcodes(); for (_i = 0, _len = _ref.length; _i < _len; _i++) { _ref1 = _ref[_i], tag = _ref1[0], alo = _ref1[1], ahi = _ref1[2], blo = _ref1[3], bhi = _ref1[4]; switch (tag) { case "replace": g = this._fancyReplace(a, alo, ahi, b, blo, bhi); break; case "delete": g = this._dump("-", a, alo, ahi); break; case "insert": g = this._dump("+", b, blo, bhi); break; case "equal": g = this._dump(" ", a, alo, ahi); break; default: throw new Error("unknow tag (" + tag + ")"); } for (_j = 0, _len1 = g.length; _j < _len1; _j++) { line = g[_j]; lines.push(line); } } return lines; }; Differ2.prototype._dump = function(tag, x, lo, hi) { var i, _i, _results; _results = []; for (i = _i = lo; lo <= hi ? _i < hi : _i > hi; i = lo <= hi ? ++_i : --_i) { _results.push("" + tag + " " + x[i]); } return _results; }; Differ2.prototype._plainReplace = function(a, alo, ahi, b, blo, bhi) { var first, g, line, lines, second, _i, _j, _len, _len1, _ref; if (bhi - blo < ahi - alo) { first = this._dump("+", b, blo, bhi); second = this._dump("-", a, alo, ahi); } else { first = this._dump("-", a, alo, ahi); second = this._dump("+", b, blo, bhi); } lines = []; _ref = [first, second]; for (_i = 0, _len = _ref.length; _i < _len; _i++) { g = _ref[_i]; for (_j = 0, _len1 = g.length; _j < _len1; _j++) { line = g[_j]; lines.push(line); } } return lines; }; Differ2.prototype._fancyReplace = function(a, alo, ahi, b, blo, bhi) { var aelt, ai, ai1, ai2, atags, belt, bestRatio, besti, bestj, bj, bj1, bj2, btags, cruncher, cutoff, eqi, eqj, i, j, la, lb, line, lines, tag, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _n, _o, _ref, _ref1, _ref10, _ref11, _ref12, _ref2, _ref3, _ref4, _ref5, _ref6, _ref7, _ref8, _ref9; _ref = [0.74, 0.75], bestRatio = _ref[0], cutoff = _ref[1]; cruncher = new SequenceMatcher(this.charjunk); _ref1 = [null, null], eqi = _ref1[0], eqj = _ref1[1]; lines = []; for (j = _i = blo; blo <= bhi ? _i < bhi : _i > bhi; j = blo <= bhi ? ++_i : --_i) { bj = b[j]; cruncher.setSeq2(bj); for (i = _j = alo; alo <= ahi ? _j < ahi : _j > ahi; i = alo <= ahi ? ++_j : --_j) { ai = a[i]; if (ai === bj) { if (eqi === null) { _ref2 = [i, j], eqi = _ref2[0], eqj = _ref2[1]; } continue; } cruncher.setSeq1(ai); if (cruncher.realQuickRatio() > bestRatio && cruncher.quickRatio() > bestRatio && cruncher.ratio() > bestRatio) { _ref3 = [cruncher.ratio(), i, j], bestRatio = _ref3[0], besti = _ref3[1], bestj = _ref3[2]; } } } if (bestRatio < cutoff) { if (eqi === null) { _ref4 = this._plainReplace(a, alo, ahi, b, blo, bhi); for (_k = 0, _len = _ref4.length; _k < _len; _k++) { line = _ref4[_k]; lines.push(line); } return lines; } _ref5 = [eqi, eqj, 1], besti = _ref5[0], bestj = _ref5[1], bestRatio = _ref5[2]; } else { eqi = null; } _ref6 = this._fancyHelper(a, alo, besti, b, blo, bestj); for (_l = 0, _len1 = _ref6.length; _l < _len1; _l++) { line = _ref6[_l]; lines.push(line); } _ref7 = [a[besti], b[bestj]], aelt = _ref7[0], belt = _ref7[1]; if (eqi === null) { atags = btags = ""; cruncher.setSeqs(aelt, belt); _ref8 = cruncher.getOpcodes(); for (_m = 0, _len2 = _ref8.length; _m < _len2; _m++) { _ref9 = _ref8[_m], tag = _ref9[0], ai1 = _ref9[1], ai2 = _ref9[2], bj1 = _ref9[3], bj2 = _ref9[4]; _ref10 = [ai2 - ai1, bj2 - bj1], la = _ref10[0], lb = _ref10[1]; switch (tag) { case "replace": atags += Array(la + 1).join("^"); btags += Array(lb + 1).join("^"); break; case "delete": atags += Array(la + 1).join("-"); break; case "insert": btags += Array(lb + 1).join("+"); break; case "equal": atags += Array(la + 1).join(" "); btags += Array(lb + 1).join(" "); break; default: throw new Error("unknow tag (" + tag + ")"); } } _ref11 = this._qformat(aelt, belt, atags, btags); for (_n = 0, _len3 = _ref11.length; _n < _len3; _n++) { line = _ref11[_n]; lines.push(line); } } else { lines.push(" " + aelt); } _ref12 = this._fancyHelper(a, besti + 1, ahi, b, bestj + 1, bhi); for (_o = 0, _len4 = _ref12.length; _o < _len4; _o++) { line = _ref12[_o]; lines.push(line); } return lines; }; Differ2.prototype._fancyHelper = function(a, alo, ahi, b, blo, bhi) { var g; g = []; if (alo < ahi) { if (blo < bhi) { g = this._fancyReplace(a, alo, ahi, b, blo, bhi); } else { g = this._dump("-", a, alo, ahi); } } else if (blo < bhi) { g = this._dump("+", b, blo, bhi); } return g; }; Differ2.prototype._qformat = function(aline, bline, atags, btags) { var common, lines; lines = []; common = min(_countLeading(aline, " "), _countLeading(bline, " ")); common = min(common, _countLeading(atags.slice(0, common), " ")); common = min(common, _countLeading(btags.slice(0, common), " ")); atags = atags.slice(common).replace(/\s+$/, ""); btags = btags.slice(common).replace(/\s+$/, ""); lines.push("- " + aline); if (atags.length) { lines.push("? " + Array(common + 1).join(" ") + atags + "\n"); } lines.push("+ " + bline); if (btags.length) { lines.push("? " + Array(common + 1).join(" ") + btags + "\n"); } return lines; }; return Differ2; }(); IS_LINE_JUNK = function(line, pat) { if (pat == null) { pat = /^\s*#?\s*$/; } return pat.test(line); }; IS_CHARACTER_JUNK = function(ch, ws) { if (ws == null) { ws = " "; } return __indexOf.call(ws, ch) >= 0; }; _formatRangeUnified = function(start, stop) { var beginning, length; beginning = start + 1; length = stop - start; if (length === 1) { return "" + beginning; } if (!length) { beginning--; } return "" + beginning + "," + length; }; unifiedDiff = function(a, b, _arg) { var file1Range, file2Range, first, fromdate, fromfile, fromfiledate, group, i1, i2, j1, j2, last, line, lines, lineterm, n, started, tag, todate, tofile, tofiledate, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6; _ref = _arg != null ? _arg : {}, fromfile = _ref.fromfile, tofile = _ref.tofile, fromfiledate = _ref.fromfiledate, tofiledate = _ref.tofiledate, n = _ref.n, lineterm = _ref.lineterm; if (fromfile == null) { fromfile = ""; } if (tofile == null) { tofile = ""; } if (fromfiledate == null) { fromfiledate = ""; } if (tofiledate == null) { tofiledate = ""; } if (n == null) { n = 3; } if (lineterm == null) { lineterm = "\n"; } lines = []; started = false; _ref1 = new SequenceMatcher(null, a, b).getGroupedOpcodes(); for (_i = 0, _len = _ref1.length; _i < _len; _i++) { group = _ref1[_i]; if (!started) { started = true; fromdate = fromfiledate ? " " + fromfiledate : ""; todate = tofiledate ? " " + tofiledate : ""; lines.push("--- " + fromfile + fromdate + lineterm); lines.push("+++ " + tofile + todate + lineterm); } _ref2 = [group[0], group[group.length - 1]], first = _ref2[0], last = _ref2[1]; file1Range = _formatRangeUnified(first[1], last[2]); file2Range = _formatRangeUnified(first[3], last[4]); lines.push("@@ -" + file1Range + " +" + file2Range + " @@" + lineterm); for (_j = 0, _len1 = group.length; _j < _len1; _j++) { _ref3 = group[_j], tag = _ref3[0], i1 = _ref3[1], i2 = _ref3[2], j1 = _ref3[3], j2 = _ref3[4]; if (tag === "equal") { _ref4 = a.slice(i1, i2); for (_k = 0, _len2 = _ref4.length; _k < _len2; _k++) { line = _ref4[_k]; lines.push(" " + line); } continue; } if (tag === "replace" || tag === "delete") { _ref5 = a.slice(i1, i2); for (_l = 0, _len3 = _ref5.length; _l < _len3; _l++) { line = _ref5[_l]; lines.push("-" + line); } } if (tag === "replace" || tag === "insert") { _ref6 = b.slice(j1, j2); for (_m = 0, _len4 = _ref6.length; _m < _len4; _m++) { line = _ref6[_m]; lines.push("+" + line); } } } } return lines; }; _formatRangeContext = function(start, stop) { var beginning, length; beginning = start + 1; length = stop - start; if (!length) { beginning--; } if (length <= 1) { return "" + beginning; } return "" + beginning + "," + (beginning + length - 1); }; contextDiff = function(a, b, _arg) { var file1Range, file2Range, first, fromdate, fromfile, fromfiledate, group, i1, i2, j1, j2, last, line, lines, lineterm, n, prefix, started, tag, todate, tofile, tofiledate, _, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6; _ref = _arg != null ? _arg : {}, fromfile = _ref.fromfile, tofile = _ref.tofile, fromfiledate = _ref.fromfiledate, tofiledate = _ref.tofiledate, n = _ref.n, lineterm = _ref.lineterm; if (fromfile == null) { fromfile = ""; } if (tofile == null) { tofile = ""; } if (fromfiledate == null) { fromfiledate = ""; } if (tofiledate == null) { tofiledate = ""; } if (n == null) { n = 3; } if (lineterm == null) { lineterm = "\n"; } prefix = { insert: "+ ", "delete": "- ", replace: "! ", equal: " " }; started = false; lines = []; _ref1 = new SequenceMatcher(null, a, b).getGroupedOpcodes(); for (_i = 0, _len = _ref1.length; _i < _len; _i++) { group = _ref1[_i]; if (!started) { started = true; fromdate = fromfiledate ? " " + fromfiledate : ""; todate = tofiledate ? " " + tofiledate : ""; lines.push("*** " + fromfile + fromdate + lineterm); lines.push("--- " + tofile + todate + lineterm); _ref2 = [group[0], group[group.length - 1]], first = _ref2[0], last = _ref2[1]; lines.push("***************" + lineterm); file1Range = _formatRangeContext(first[1], last[2]); lines.push("*** " + file1Range + " ****" + lineterm); if (_any(function() { var _j2, _len12, _ref32, _results; _results = []; for (_j2 = 0, _len12 = group.length; _j2 < _len12; _j2++) { _ref32 = group[_j2], tag = _ref32[0], _ = _ref32[1], _ = _ref32[2], _ = _ref32[3], _ = _ref32[4]; _results.push(tag === "replace" || tag === "delete"); } return _results; }())) { for (_j = 0, _len1 = group.length; _j < _len1; _j++) { _ref3 = group[_j], tag = _ref3[0], i1 = _ref3[1], i2 = _ref3[2], _ = _ref3[3], _ = _ref3[4]; if (tag !== "insert") { _ref4 = a.slice(i1, i2); for (_k = 0, _len2 = _ref4.length; _k < _len2; _k++) { line = _ref4[_k]; lines.push(prefix[tag] + line); } } } } file2Range = _formatRangeContext(first[3], last[4]); lines.push("--- " + file2Range + " ----" + lineterm); if (_any(function() { var _l2, _len32, _ref52, _results; _results = []; for (_l2 = 0, _len32 = group.length; _l2 < _len32; _l2++) { _ref52 = group[_l2], tag = _ref52[0], _ = _ref52[1], _ = _ref52[2], _ = _ref52[3], _ = _ref52[4]; _results.push(tag === "replace" || tag === "insert"); } return _results; }())) { for (_l = 0, _len3 = group.length; _l < _len3; _l++) { _ref5 = group[_l], tag = _ref5[0], _ = _ref5[1], _ = _ref5[2], j1 = _ref5[3], j2 = _ref5[4]; if (tag !== "delete") { _ref6 = b.slice(j1, j2); for (_m = 0, _len4 = _ref6.length; _m < _len4; _m++) { line = _ref6[_m]; lines.push(prefix[tag] + line); } } } } } } return lines; }; ndiff = function(a, b, linejunk, charjunk) { if (charjunk == null) { charjunk = IS_CHARACTER_JUNK; } return new Differ(linejunk, charjunk).compare(a, b); }; restore = function(delta, which) { var line, lines, prefixes, tag, _i, _len, _ref; tag = { 1: "- ", 2: "+ " }[which]; if (!tag) { throw new Error("unknow delta choice (must be 1 or 2): " + which); } prefixes = [" ", tag]; lines = []; for (_i = 0, _len = delta.length; _i < _len; _i++) { line = delta[_i]; if (_ref = line.slice(0, 2), __indexOf.call(prefixes, _ref) >= 0) { lines.push(line.slice(2)); } } return lines; }; exports2._arrayCmp = _arrayCmp; exports2.SequenceMatcher = SequenceMatcher; exports2.getCloseMatches = getCloseMatches; exports2._countLeading = _countLeading; exports2.Differ = Differ; exports2.IS_LINE_JUNK = IS_LINE_JUNK; exports2.IS_CHARACTER_JUNK = IS_CHARACTER_JUNK; exports2._formatRangeUnified = _formatRangeUnified; exports2.unifiedDiff = unifiedDiff; exports2._formatRangeContext = _formatRangeContext; exports2.contextDiff = contextDiff; exports2.ndiff = ndiff; exports2.restore = restore; }).call(exports2); } }); // node_modules/.pnpm/difflib@git+https+++git@github.com+postlight+difflib.js.git#32e8e38c7fcd935241b9baab71bb432fd9b166ed/node_modules/difflib/index.js var require_difflib2 = __commonJS({ "node_modules/.pnpm/difflib@git+https+++git@github.com+postlight+difflib.js.git#32e8e38c7fcd935241b9baab71bb432fd9b166ed/node_modules/difflib/index.js"(exports2, module2) { init_cjs_shims(); module2.exports = require_difflib(); } }); // src/mercury.js var mercury_exports = {}; __export(mercury_exports, { default: () => mercury_default }); module.exports = __toCommonJS(mercury_exports); init_cjs_shims(); var import_url12 = __toESM(require("url")); var import_cheerio6 = __toESM(require("cheerio")); var import_turndown = __toESM(require("turndown")); // src/extractors/add-extractor.js init_cjs_shims(); // src/utils/merge-supported-domains.js init_cjs_shims(); var merge = (extractor, domains) => domains.reduce((acc, domain) => { acc[domain] = extractor; return acc; }, {}); function mergeSupportedDomains(extractor) { return extractor.supportedDomains ? merge(extractor, [extractor.domain, ...extractor.supportedDomains]) : merge(extractor, [extractor.domain]); } // src/extractors/add-extractor.js var apiExtractors = {}; function addExtractor(extractor) { if (!extractor || !extractor.domain) { return { error: true, message: "Unable to add custom extractor. Invalid parameters." }; } Object.assign(apiExtractors, mergeSupportedDomains(extractor)); return apiExtractors; } // src/extractors/get-extractor.js init_cjs_shims(); var import_url10 = __toESM(require("url")); // src/extractors/all.js init_cjs_shims(); // src/extractors/custom/index.js var custom_exports = {}; __export(custom_exports, { AbcnewsGoComExtractor: () => AbcnewsGoComExtractor, ApartmentTherapyExtractor: () => ApartmentTherapyExtractor, ArstechnicaComExtractor: () => ArstechnicaComExtractor, BiorxivOrgExtractor: () => BiorxivOrgExtractor, BlisterreviewComExtractor: () => BlisterreviewComExtractor, BloggerExtractor: () => BloggerExtractor, BookwalkerJpExtractor: () => BookwalkerJpExtractor, BroadwayWorldExtractor: () => BroadwayWorldExtractor, BuzzapJpExtractor: () => BuzzapJpExtractor, BuzzfeedExtractor: () => BuzzfeedExtractor, ClinicaltrialsGovExtractor: () => ClinicaltrialsGovExtractor, DeadlineComExtractor: () => DeadlineComExtractor, DeadspinExtractor: () => DeadspinExtractor, EpaperZeitDeExtractor: () => EpaperZeitDeExtractor, FortuneComExtractor: () => FortuneComExtractor, ForwardComExtractor: () => ForwardComExtractor, GeniusComExtractor: () => GeniusComExtractor, GetnewsJpExtractor: () => GetnewsJpExtractor, GithubComExtractor: () => GithubComExtractor, GothamistComExtractor: () => GothamistComExtractor, HellogigglesComExtractor: () => HellogigglesComExtractor, IciRadioCanadaCaExtractor: () => IciRadioCanadaCaExtractor, JapanCnetComExtractor: () => JapanCnetComExtractor, JapanZdnetComExtractor: () => JapanZdnetComExtractor, JvndbJvnJpExtractor: () => JvndbJvnJpExtractor, LittleThingsExtractor: () => LittleThingsExtractor, MSNExtractor: () => MSNExtractor, MaTtiasBeExtractor: () => MaTtiasBeExtractor, MashableComExtractor: () => MashableComExtractor, MediumExtractor: () => MediumExtractor, MoneyCnnComExtractor: () => MoneyCnnComExtractor, NYMagExtractor: () => NYMagExtractor, NYTimesExtractor: () => NYTimesExtractor, NewYorkerExtractor: () => NewYorkerExtractor, NewrepublicComExtractor: () => NewrepublicComExtractor, NewsMynaviJpExtractor: () => NewsMynaviJpExtractor, NewsNationalgeographicComExtractor: () => NewsNationalgeographicComExtractor, ObamawhitehouseArchivesGovExtractor: () => ObamawhitehouseArchivesGovExtractor, ObserverComExtractor: () => ObserverComExtractor, OtrsComExtractor: () => OtrsComExtractor, PagesixComExtractor: () => PagesixComExtractor, PastebinComExtractor: () => PastebinComExtractor, PeopleComExtractor: () => PeopleComExtractor, PhpspotOrgExtractor: () => PhpspotOrgExtractor, PitchforkComExtractor: () => PitchforkComExtractor, PoliticoExtractor: () => PoliticoExtractor, PostlightComExtractor: () => PostlightComExtractor, QzComExtractor: () => QzComExtractor, ScanNetsecurityNeJpExtractor: () => ScanNetsecurityNeJpExtractor, ScienceflyComExtractor: () => ScienceflyComExtractor, SectIijAdJpExtractor: () => SectIijAdJpExtractor, SpektrumExtractor: () => SpektrumExtractor, TakagihiromitsuJpExtractor: () => TakagihiromitsuJpExtractor, TechlogIijAdJpExtractor: () => TechlogIijAdJpExtractor, TheAtlanticExtractor: () => TheAtlanticExtractor, ThefederalistpapersOrgExtractor: () => ThefederalistpapersOrgExtractor, ThoughtcatalogComExtractor: () => ThoughtcatalogComExtractor, TimesofindiaIndiatimesComExtractor: () => TimesofindiaIndiatimesComExtractor, TwitterExtractor: () => TwitterExtractor, UproxxComExtractor: () => UproxxComExtractor, WeeklyAsciiJpExtractor: () => WeeklyAsciiJpExtractor, WikiaExtractor: () => WikiaExtractor, WikipediaExtractor: () => WikipediaExtractor, WiredExtractor: () => WiredExtractor, WiredJpExtractor: () => WiredJpExtractor, WwwAbendblattDeExtractor: () => WwwAbendblattDeExtractor, WwwAlComExtractor: () => WwwAlComExtractor, WwwAmericanowComExtractor: () => WwwAmericanowComExtractor, WwwAndroidcentralComExtractor: () => WwwAndroidcentralComExtractor, WwwAolComExtractor: () => WwwAolComExtractor, WwwAsahiComExtractor: () => WwwAsahiComExtractor, WwwBloombergComExtractor: () => WwwBloombergComExtractor, WwwBustleComExtractor: () => WwwBustleComExtractor, WwwCbcCaExtractor: () => WwwCbcCaExtractor, WwwCbssportsComExtractor: () => WwwCbssportsComExtractor, WwwChicagotribuneComExtractor: () => WwwChicagotribuneComExtractor, WwwCnbcComExtractor: () => WwwCnbcComExtractor, WwwCnetComExtractor: () => WwwCnetComExtractor, WwwCnnComExtractor: () => WwwCnnComExtractor, WwwDmagazineComExtractor: () => WwwDmagazineComExtractor, WwwElecomCoJpExtractor: () => WwwElecomCoJpExtractor, WwwEngadgetComExtractor: () => WwwEngadgetComExtractor, WwwEonlineComExtractor: () => WwwEonlineComExtractor, WwwFastcompanyComExtractor: () => WwwFastcompanyComExtractor, WwwFoolComExtractor: () => WwwFoolComExtractor, WwwFortinetComExtractor: () => WwwFortinetComExtractor, WwwGizmodoJpExtractor: () => WwwGizmodoJpExtractor, WwwGrueneDeExtractor: () => WwwGrueneDeExtractor, WwwHuffingtonpostComExtractor: () => WwwHuffingtonpostComExtractor, WwwInfoqComExtractor: () => WwwInfoqComExtractor, WwwInquisitrComExtractor: () => WwwInquisitrComExtractor, WwwInvestmentexecutiveComExtractor: () => WwwInvestmentexecutiveComExtractor, WwwIpaGoJpExtractor: () => WwwIpaGoJpExtractor, WwwItmediaCoJpExtractor: () => WwwItmediaCoJpExtractor, WwwJnsaOrgExtractor: () => WwwJnsaOrgExtractor, WwwLadbibleComExtractor: () => WwwLadbibleComExtractor, WwwLatimesComExtractor: () => WwwLatimesComExtractor, WwwLemondeFrExtractor: () => WwwLemondeFrExtractor, WwwLifehackerJpExtractor: () => WwwLifehackerJpExtractor, WwwLinkedinComExtractor: () => WwwLinkedinComExtractor, WwwMacrumorsComExtractor: () => WwwMacrumorsComExtractor, WwwMentalflossComExtractor: () => WwwMentalflossComExtractor, WwwMiamiheraldComExtractor: () => WwwMiamiheraldComExtractor, WwwMoongiftJpExtractor: () => WwwMoongiftJpExtractor, WwwMsnbcComExtractor: () => WwwMsnbcComExtractor, WwwNationalgeographicComExtractor: () => WwwNationalgeographicComExtractor, WwwNbcnewsComExtractor: () => WwwNbcnewsComExtractor, WwwNdtvComExtractor: () => WwwNdtvComExtractor, WwwNprOrgExtractor: () => WwwNprOrgExtractor, WwwNydailynewsComExtractor: () => WwwNydailynewsComExtractor, WwwOpposingviewsComExtractor: () => WwwOpposingviewsComExtractor, WwwOreillyCoJpExtractor: () => WwwOreillyCoJpExtractor, WwwOssnewsJpExtractor: () => WwwOssnewsJpExtractor, WwwPhoronixComExtractor: () => WwwPhoronixComExtractor, WwwPopsugarComExtractor: () => WwwPopsugarComExtractor, WwwProspectmagazineCoUkExtractor: () => WwwProspectmagazineCoUkExtractor, WwwPublickey1JpExtractor: () => WwwPublickey1JpExtractor, WwwQdailyComExtractor: () => WwwQdailyComExtractor, WwwRawstoryComExtractor: () => WwwRawstoryComExtractor, WwwRbbtodayComExtractor: () => WwwRbbtodayComExtractor, WwwRecodeNetExtractor: () => WwwRecodeNetExtractor, WwwRedditComExtractor: () => WwwRedditComExtractor, WwwRefinery29ComExtractor: () => WwwRefinery29ComExtractor, WwwReutersComExtractor: () => WwwReutersComExtractor, WwwRollingstoneComExtractor: () => WwwRollingstoneComExtractor, WwwSanwaCoJpExtractor: () => WwwSanwaCoJpExtractor, WwwSbnationComExtractor: () => WwwSbnationComExtractor, WwwSiComExtractor: () => WwwSiComExtractor, WwwSlateComExtractor: () => WwwSlateComExtractor, WwwTheguardianComExtractor: () => WwwTheguardianComExtractor, WwwThepennyhoarderComExtractor: () => WwwThepennyhoarderComExtractor, WwwThepoliticalinsiderComExtractor: () => WwwThepoliticalinsiderComExtractor, WwwThevergeComExtractor: () => WwwThevergeComExtractor, WwwTmzComExtractor: () => WwwTmzComExtractor, WwwTodayComExtractor: () => WwwTodayComExtractor, WwwUsmagazineComExtractor: () => WwwUsmagazineComExtractor, WwwVoxComExtractor: () => WwwVoxComExtractor,