UNPKG

urban-scraper

Version:

A package for scraping terms and their definitions from the [Urban Dictionary](https://www.urbandictionary.com/).

728 lines (717 loc) 25.8 kB
'use strict'; Object.defineProperty(exports, '__esModule', { value: true }); var fetch = require('node-fetch'); var cheerio = require('cheerio'); function _interopNamespaceDefault(e) { var n = Object.create(null); if (e) { Object.keys(e).forEach(function (k) { if (k !== 'default') { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); } n.default = e; return n; } var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio); function _arrayLikeToArray(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; } function asyncGeneratorStep(n, t, e, r, o, a, c) { try { var i = n[a](c), u = i.value; } catch (n) { return void e(n); } i.done ? t(u) : Promise.resolve(u).then(r, o); } function _asyncToGenerator(n) { return function () { var t = this, e = arguments; return new Promise(function (r, o) { var a = n.apply(t, e); function _next(n) { asyncGeneratorStep(a, r, o, _next, _throw, "next", n); } function _throw(n) { asyncGeneratorStep(a, r, o, _next, _throw, "throw", n); } _next(void 0); }); }; } function _createForOfIteratorHelperLoose(r, e) { var t = "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; if (t) return (t = t.call(r)).next.bind(t); if (Array.isArray(r) || (t = _unsupportedIterableToArray(r)) || e && r && "number" == typeof r.length) { t && (r = t); var o = 0; return function () { return o >= r.length ? { done: !0 } : { done: !1, value: r[o++] }; }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } function _extends() { return _extends = Object.assign ? Object.assign.bind() : function (n) { for (var e = 1; e < arguments.length; e++) { var t = arguments[e]; for (var r in t) ({}).hasOwnProperty.call(t, r) && (n[r] = t[r]); } return n; }, _extends.apply(null, arguments); } function _inherits(t, e) { if ("function" != typeof e && null !== e) throw new TypeError("Super expression must either be null or a function"); t.prototype = Object.create(e && e.prototype, { constructor: { value: t, writable: !0, configurable: !0 } }), Object.defineProperty(t, "prototype", { writable: !1 }), e && _setPrototypeOf(t, e); } function _regeneratorRuntime() { _regeneratorRuntime = function () { return e; }; var t, e = {}, r = Object.prototype, n = r.hasOwnProperty, o = Object.defineProperty || function (t, e, r) { t[e] = r.value; }, i = "function" == typeof Symbol ? Symbol : {}, a = i.iterator || "@@iterator", c = i.asyncIterator || "@@asyncIterator", u = i.toStringTag || "@@toStringTag"; function define(t, e, r) { return Object.defineProperty(t, e, { value: r, enumerable: !0, configurable: !0, writable: !0 }), t[e]; } try { define({}, ""); } catch (t) { define = function (t, e, r) { return t[e] = r; }; } function wrap(t, e, r, n) { var i = e && e.prototype instanceof Generator ? e : Generator, a = Object.create(i.prototype), c = new Context(n || []); return o(a, "_invoke", { value: makeInvokeMethod(t, r, c) }), a; } function tryCatch(t, e, r) { try { return { type: "normal", arg: t.call(e, r) }; } catch (t) { return { type: "throw", arg: t }; } } e.wrap = wrap; var h = "suspendedStart", l = "suspendedYield", f = "executing", s = "completed", y = {}; function Generator() {} function GeneratorFunction() {} function GeneratorFunctionPrototype() {} var p = {}; define(p, a, function () { return this; }); var d = Object.getPrototypeOf, v = d && d(d(values([]))); v && v !== r && n.call(v, a) && (p = v); var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p); function defineIteratorMethods(t) { ["next", "throw", "return"].forEach(function (e) { define(t, e, function (t) { return this._invoke(e, t); }); }); } function AsyncIterator(t, e) { function invoke(r, o, i, a) { var c = tryCatch(t[r], t, o); if ("throw" !== c.type) { var u = c.arg, h = u.value; return h && "object" == typeof h && n.call(h, "__await") ? e.resolve(h.__await).then(function (t) { invoke("next", t, i, a); }, function (t) { invoke("throw", t, i, a); }) : e.resolve(h).then(function (t) { u.value = t, i(u); }, function (t) { return invoke("throw", t, i, a); }); } a(c.arg); } var r; o(this, "_invoke", { value: function (t, n) { function callInvokeWithMethodAndArg() { return new e(function (e, r) { invoke(t, n, e, r); }); } return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg(); } }); } function makeInvokeMethod(e, r, n) { var o = h; return function (i, a) { if (o === f) throw Error("Generator is already running"); if (o === s) { if ("throw" === i) throw a; return { value: t, done: !0 }; } for (n.method = i, n.arg = a;;) { var c = n.delegate; if (c) { var u = maybeInvokeDelegate(c, n); if (u) { if (u === y) continue; return u; } } if ("next" === n.method) n.sent = n._sent = n.arg;else if ("throw" === n.method) { if (o === h) throw o = s, n.arg; n.dispatchException(n.arg); } else "return" === n.method && n.abrupt("return", n.arg); o = f; var p = tryCatch(e, r, n); if ("normal" === p.type) { if (o = n.done ? s : l, p.arg === y) continue; return { value: p.arg, done: n.done }; } "throw" === p.type && (o = s, n.method = "throw", n.arg = p.arg); } }; } function maybeInvokeDelegate(e, r) { var n = r.method, o = e.iterator[n]; if (o === t) return r.delegate = null, "throw" === n && e.iterator.return && (r.method = "return", r.arg = t, maybeInvokeDelegate(e, r), "throw" === r.method) || "return" !== n && (r.method = "throw", r.arg = new TypeError("The iterator does not provide a '" + n + "' method")), y; var i = tryCatch(o, e.iterator, r.arg); if ("throw" === i.type) return r.method = "throw", r.arg = i.arg, r.delegate = null, y; var a = i.arg; return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, "return" !== r.method && (r.method = "next", r.arg = t), r.delegate = null, y) : a : (r.method = "throw", r.arg = new TypeError("iterator result is not an object"), r.delegate = null, y); } function pushTryEntry(t) { var e = { tryLoc: t[0] }; 1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e); } function resetTryEntry(t) { var e = t.completion || {}; e.type = "normal", delete e.arg, t.completion = e; } function Context(t) { this.tryEntries = [{ tryLoc: "root" }], t.forEach(pushTryEntry, this), this.reset(!0); } function values(e) { if (e || "" === e) { var r = e[a]; if (r) return r.call(e); if ("function" == typeof e.next) return e; if (!isNaN(e.length)) { var o = -1, i = function next() { for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next; return next.value = t, next.done = !0, next; }; return i.next = i; } } throw new TypeError(typeof e + " is not iterable"); } return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, "constructor", { value: GeneratorFunctionPrototype, configurable: !0 }), o(GeneratorFunctionPrototype, "constructor", { value: GeneratorFunction, configurable: !0 }), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, "GeneratorFunction"), e.isGeneratorFunction = function (t) { var e = "function" == typeof t && t.constructor; return !!e && (e === GeneratorFunction || "GeneratorFunction" === (e.displayName || e.name)); }, e.mark = function (t) { return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, "GeneratorFunction")), t.prototype = Object.create(g), t; }, e.awrap = function (t) { return { __await: t }; }, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () { return this; }), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) { void 0 === i && (i = Promise); var a = new AsyncIterator(wrap(t, r, n, o), i); return e.isGeneratorFunction(r) ? a : a.next().then(function (t) { return t.done ? t.value : a.next(); }); }, defineIteratorMethods(g), define(g, u, "Generator"), define(g, a, function () { return this; }), define(g, "toString", function () { return "[object Generator]"; }), e.keys = function (t) { var e = Object(t), r = []; for (var n in e) r.push(n); return r.reverse(), function next() { for (; r.length;) { var t = r.pop(); if (t in e) return next.value = t, next.done = !1, next; } return next.done = !0, next; }; }, e.values = values, Context.prototype = { constructor: Context, reset: function (e) { if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = "next", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) "t" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t); }, stop: function () { this.done = !0; var t = this.tryEntries[0].completion; if ("throw" === t.type) throw t.arg; return this.rval; }, dispatchException: function (e) { if (this.done) throw e; var r = this; function handle(n, o) { return a.type = "throw", a.arg = e, r.next = n, o && (r.method = "next", r.arg = t), !!o; } for (var o = this.tryEntries.length - 1; o >= 0; --o) { var i = this.tryEntries[o], a = i.completion; if ("root" === i.tryLoc) return handle("end"); if (i.tryLoc <= this.prev) { var c = n.call(i, "catchLoc"), u = n.call(i, "finallyLoc"); if (c && u) { if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); if (this.prev < i.finallyLoc) return handle(i.finallyLoc); } else if (c) { if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); } else { if (!u) throw Error("try statement without catch or finally"); if (this.prev < i.finallyLoc) return handle(i.finallyLoc); } } } }, abrupt: function (t, e) { for (var r = this.tryEntries.length - 1; r >= 0; --r) { var o = this.tryEntries[r]; if (o.tryLoc <= this.prev && n.call(o, "finallyLoc") && this.prev < o.finallyLoc) { var i = o; break; } } i && ("break" === t || "continue" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null); var a = i ? i.completion : {}; return a.type = t, a.arg = e, i ? (this.method = "next", this.next = i.finallyLoc, y) : this.complete(a); }, complete: function (t, e) { if ("throw" === t.type) throw t.arg; return "break" === t.type || "continue" === t.type ? this.next = t.arg : "return" === t.type ? (this.rval = this.arg = t.arg, this.method = "return", this.next = "end") : "normal" === t.type && e && (this.next = e), y; }, finish: function (t) { for (var e = this.tryEntries.length - 1; e >= 0; --e) { var r = this.tryEntries[e]; if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y; } }, catch: function (t) { for (var e = this.tryEntries.length - 1; e >= 0; --e) { var r = this.tryEntries[e]; if (r.tryLoc === t) { var n = r.completion; if ("throw" === n.type) { var o = n.arg; resetTryEntry(r); } return o; } } throw Error("illegal catch attempt"); }, delegateYield: function (e, r, n) { return this.delegate = { iterator: values(e), resultName: r, nextLoc: n }, "next" === this.method && (this.arg = t), y; } }, e; } function _setPrototypeOf(t, e) { return _setPrototypeOf = Object.setPrototypeOf ? Object.setPrototypeOf.bind() : function (t, e) { return t.__proto__ = e, t; }, _setPrototypeOf(t, e); } function _unsupportedIterableToArray(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? Array.from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0; } } function _wrapRegExp() { _wrapRegExp = function (e, r) { return new BabelRegExp(e, void 0, r); }; var e = RegExp.prototype, r = new WeakMap(); function BabelRegExp(e, t, p) { var o = RegExp(e, t); return r.set(o, p || r.get(e)), _setPrototypeOf(o, BabelRegExp.prototype); } function buildGroups(e, t) { var p = r.get(t); return Object.keys(p).reduce(function (r, t) { var o = p[t]; if ("number" == typeof o) r[t] = e[o];else { for (var i = 0; void 0 === e[o[i]] && i + 1 < o.length;) i++; r[t] = e[o[i]]; } return r; }, Object.create(null)); } return _inherits(BabelRegExp, RegExp), BabelRegExp.prototype.exec = function (r) { var t = e.exec.call(this, r); if (t) { t.groups = buildGroups(t, this); var p = t.indices; p && (p.groups = buildGroups(p, this)); } return t; }, BabelRegExp.prototype[Symbol.replace] = function (t, p) { if ("string" == typeof p) { var o = r.get(this); return e[Symbol.replace].call(this, t, p.replace(/\$<([^>]+)>/g, function (e, r) { var t = o[r]; return "$" + (Array.isArray(t) ? t.join("$") : t); })); } if ("function" == typeof p) { var i = this; return e[Symbol.replace].call(this, t, function () { var e = arguments; return "object" != typeof e[e.length - 1] && (e = [].slice.call(e)).push(buildGroups(e, i)), p.apply(this, e); }); } return e[Symbol.replace].call(this, t, p); }, _wrapRegExp.apply(this, arguments); } var NOT_FOUND_SELECTOR = "#ud-root > div > main > div > div > section > div > div.font-bold.text-2xl.my-8"; var TERM_WRAPPER_SELECTOR = "#ud-root > div > main > div > div > section > div:nth-child(1)"; var TERM_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div > h1 > a"; var DESCRIPTION_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.break-words.meaning.mb-4"; var EXAMPLE_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.break-words.example.italic.mb-4"; var AUTHOR_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.contributor.font-bold > a"; var DATE_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.contributor.font-bold"; var TERM_ID_SELECTOR = TERM_WRAPPER_SELECTOR + " > a"; var LINK_START_REGEX = /*#__PURE__*/_wrapRegExp(/<a class="autolink" href="(\/define\.php\?term=[\w%\/.-]+)">([\w. '-]+)/gm, { url: 1, text: 2 }); var LINK_STRING_REGEX = "[$<text>](https://www.urbandictionary.com$<url>)"; var LINK_END_REGEX = /<\/a>|\*/gm; var NEW_LINE_REGEX = /<br>/gm; var TERM_ID_REGEX = /*#__PURE__*/_wrapRegExp(/[\w:\/.?]+defid=([\d]+)/, { termId: 1 }); /** * Formats the term data as markdown if `formatMarkdown` is true * @param termData * @param formatMarkdown * @returns */ function format(termData, formatMarkdown) { if (formatMarkdown === void 0) { formatMarkdown = false; } var found = termData.found, description = termData.description, example = termData.example; if (found && description && example) return _extends({}, termData, { description: description.replace(NEW_LINE_REGEX, "\n").replace(LINK_START_REGEX, formatMarkdown ? LINK_STRING_REGEX : "$<text>").replace(LINK_END_REGEX, ""), example: example.replace(NEW_LINE_REGEX, "\n").replace(LINK_START_REGEX, formatMarkdown ? LINK_STRING_REGEX : "$<text>").replace(LINK_END_REGEX, "") });else return termData; } /** * The function to fetch data for the `getTerm` and `getRandom` function * @param str The term to search for * @param random Whether to get a random term */ function get(_x, _x2) { return _get.apply(this, arguments); } function _get() { _get = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee3(str, random) { var termUrl, html, $, notFound, term, description, example, author, name, authorUrl, date, dateString, createdAt, termId, id, thumbs, url; return _regeneratorRuntime().wrap(function _callee3$(_context3) { while (1) switch (_context3.prev = _context3.next) { case 0: if (random === void 0) { random = false; } termUrl = "https://www.urbandictionary.com/" + (random ? "random.php" : "define.php?term=" + str); _context3.next = 4; return fetch(termUrl, { redirect: "follow" }).then(/*#__PURE__*/function () { var _ref = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee(x) { return _regeneratorRuntime().wrap(function _callee$(_context) { while (1) switch (_context.prev = _context.next) { case 0: _context.next = 2; return x.text(); case 2: return _context.abrupt("return", _context.sent); case 3: case "end": return _context.stop(); } }, _callee); })); return function (_x3) { return _ref.apply(this, arguments); }; }()); case 4: html = _context3.sent; $ = cheerio__namespace.load(html); /* check if the term wasn't found */ notFound = $(NOT_FOUND_SELECTOR).html(); if (!(notFound != null && notFound.startsWith("Sorry, we couldn't find:"))) { _context3.next = 9; break; } return _context3.abrupt("return", { found: false, term: str }); case 9: /* get the term name */ term = $(TERM_SELECTOR).html(); /* get the term description */ description = $(DESCRIPTION_SELECTOR).html(); /* get the example of the term */ example = $(EXAMPLE_SELECTOR).html(); /* get the author of the term */ author = $(AUTHOR_SELECTOR); name = author.html(); authorUrl = "https://www.urbandictionary.com" + author[0].attribs.href; /* get when the term was created */ date = $(DATE_SELECTOR); dateString = date.children()[0].next.data.trim(); createdAt = new Date(Date.UTC(new Date(dateString).getFullYear(), new Date(dateString).getMonth(), new Date(dateString).getDate(), new Date(dateString).getHours(), new Date(dateString).getMinutes(), new Date(dateString).getSeconds(), new Date(dateString).getMilliseconds())); /* get the term id */ termId = $(TERM_ID_SELECTOR); id = Number(termId[0].attribs.href.replace(TERM_ID_REGEX, "$<termId>")); /* get the term's thumbs up and down count */ _context3.next = 22; return fetch("https://api.urbandictionary.com/v0/uncacheable?ids=" + id).then(/*#__PURE__*/function () { var _ref2 = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee2(x) { return _regeneratorRuntime().wrap(function _callee2$(_context2) { while (1) switch (_context2.prev = _context2.next) { case 0: _context2.next = 2; return x.json(); case 2: return _context2.abrupt("return", _context2.sent); case 3: case "end": return _context2.stop(); } }, _callee2); })); return function (_x4) { return _ref2.apply(this, arguments); }; }()).then(function (x) { return { up: x.thumbs[0].up, down: x.thumbs[0].down }; }); case 22: thumbs = _context3.sent; /* get the term's url */ url = "https://www.urbandictionary.com/define.php?term=" + term + "&defid=" + id; return _context3.abrupt("return", { found: true, term: term, id: id, url: url, description: description, example: example, createdAt: createdAt, author: { name: name, url: authorUrl }, thumbs: thumbs }); case 25: case "end": return _context3.stop(); } }, _callee3); })); return _get.apply(this, arguments); } /** * Get a term with its description and the example from the urban dictionary * @param {string} query - The term name * @param {boolean} options.formatMarkdown - Whether to format the term description and the example for markdown * @example await getTerm("urban"); */ function getTerm(_x, _x2) { return _getTerm.apply(this, arguments); } /** * Get a random term with its description and the example from the urban dictionary * @param {boolean} formatMarkdown - Whether to format the term description and the example for markdown * @example await getRandom(); */ function _getTerm() { _getTerm = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee(query, options) { var responses, _iterator, _step, searchQuery, _termData, termData; return _regeneratorRuntime().wrap(function _callee$(_context) { while (1) switch (_context.prev = _context.next) { case 0: if (options === void 0) { options = { formatMarkdown: false }; } if (!(!query || query.length === 0)) { _context.next = 3; break; } throw new Error("Provide a term to search for"); case 3: if (!(typeof options.formatMarkdown !== "boolean")) { _context.next = 5; break; } throw new Error("Format markdown option needs to be a boolean"); case 5: if (!Array.isArray(query)) { _context.next = 17; break; } responses = []; _iterator = _createForOfIteratorHelperLoose(query); case 8: if ((_step = _iterator()).done) { _context.next = 16; break; } searchQuery = _step.value; _context.next = 12; return get(searchQuery); case 12: _termData = _context.sent; responses.push(format(_termData, options.formatMarkdown)); case 14: _context.next = 8; break; case 16: return _context.abrupt("return", responses); case 17: _context.next = 19; return get(query); case 19: termData = _context.sent; return _context.abrupt("return", format(termData, options.formatMarkdown)); case 21: case "end": return _context.stop(); } }, _callee); })); return _getTerm.apply(this, arguments); } function getRandom(_x3) { return _getRandom.apply(this, arguments); } function _getRandom() { _getRandom = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee2(options) { var termData; return _regeneratorRuntime().wrap(function _callee2$(_context2) { while (1) switch (_context2.prev = _context2.next) { case 0: if (options === void 0) { options = { formatMarkdown: false }; } if (!(typeof options.formatMarkdown !== "boolean")) { _context2.next = 3; break; } throw new Error("Format markdown option needs to be a boolean"); case 3: _context2.next = 5; return get("", true); case 5: termData = _context2.sent; return _context2.abrupt("return", format(termData, options.formatMarkdown)); case 7: case "end": return _context2.stop(); } }, _callee2); })); return _getRandom.apply(this, arguments); } exports.getRandom = getRandom; exports.getTerm = getTerm; //# sourceMappingURL=urban-scraper.cjs.development.js.map