urban-scraper
Version:
A package for scraping terms and their definitions from the [Urban Dictionary](https://www.urbandictionary.com/).
728 lines (717 loc) • 25.8 kB
JavaScript
;
Object.defineProperty(exports, '__esModule', { value: true });
var fetch = require('node-fetch');
var cheerio = require('cheerio');
function _interopNamespaceDefault(e) {
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return n;
}
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
function _arrayLikeToArray(r, a) {
(null == a || a > r.length) && (a = r.length);
for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e];
return n;
}
function asyncGeneratorStep(n, t, e, r, o, a, c) {
try {
var i = n[a](c),
u = i.value;
} catch (n) {
return void e(n);
}
i.done ? t(u) : Promise.resolve(u).then(r, o);
}
function _asyncToGenerator(n) {
return function () {
var t = this,
e = arguments;
return new Promise(function (r, o) {
var a = n.apply(t, e);
function _next(n) {
asyncGeneratorStep(a, r, o, _next, _throw, "next", n);
}
function _throw(n) {
asyncGeneratorStep(a, r, o, _next, _throw, "throw", n);
}
_next(void 0);
});
};
}
function _createForOfIteratorHelperLoose(r, e) {
var t = "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"];
if (t) return (t = t.call(r)).next.bind(t);
if (Array.isArray(r) || (t = _unsupportedIterableToArray(r)) || e && r && "number" == typeof r.length) {
t && (r = t);
var o = 0;
return function () {
return o >= r.length ? {
done: !0
} : {
done: !1,
value: r[o++]
};
};
}
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
}
function _extends() {
return _extends = Object.assign ? Object.assign.bind() : function (n) {
for (var e = 1; e < arguments.length; e++) {
var t = arguments[e];
for (var r in t) ({}).hasOwnProperty.call(t, r) && (n[r] = t[r]);
}
return n;
}, _extends.apply(null, arguments);
}
function _inherits(t, e) {
if ("function" != typeof e && null !== e) throw new TypeError("Super expression must either be null or a function");
t.prototype = Object.create(e && e.prototype, {
constructor: {
value: t,
writable: !0,
configurable: !0
}
}), Object.defineProperty(t, "prototype", {
writable: !1
}), e && _setPrototypeOf(t, e);
}
function _regeneratorRuntime() {
_regeneratorRuntime = function () {
return e;
};
var t,
e = {},
r = Object.prototype,
n = r.hasOwnProperty,
o = Object.defineProperty || function (t, e, r) {
t[e] = r.value;
},
i = "function" == typeof Symbol ? Symbol : {},
a = i.iterator || "@@iterator",
c = i.asyncIterator || "@@asyncIterator",
u = i.toStringTag || "@@toStringTag";
function define(t, e, r) {
return Object.defineProperty(t, e, {
value: r,
enumerable: !0,
configurable: !0,
writable: !0
}), t[e];
}
try {
define({}, "");
} catch (t) {
define = function (t, e, r) {
return t[e] = r;
};
}
function wrap(t, e, r, n) {
var i = e && e.prototype instanceof Generator ? e : Generator,
a = Object.create(i.prototype),
c = new Context(n || []);
return o(a, "_invoke", {
value: makeInvokeMethod(t, r, c)
}), a;
}
function tryCatch(t, e, r) {
try {
return {
type: "normal",
arg: t.call(e, r)
};
} catch (t) {
return {
type: "throw",
arg: t
};
}
}
e.wrap = wrap;
var h = "suspendedStart",
l = "suspendedYield",
f = "executing",
s = "completed",
y = {};
function Generator() {}
function GeneratorFunction() {}
function GeneratorFunctionPrototype() {}
var p = {};
define(p, a, function () {
return this;
});
var d = Object.getPrototypeOf,
v = d && d(d(values([])));
v && v !== r && n.call(v, a) && (p = v);
var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p);
function defineIteratorMethods(t) {
["next", "throw", "return"].forEach(function (e) {
define(t, e, function (t) {
return this._invoke(e, t);
});
});
}
function AsyncIterator(t, e) {
function invoke(r, o, i, a) {
var c = tryCatch(t[r], t, o);
if ("throw" !== c.type) {
var u = c.arg,
h = u.value;
return h && "object" == typeof h && n.call(h, "__await") ? e.resolve(h.__await).then(function (t) {
invoke("next", t, i, a);
}, function (t) {
invoke("throw", t, i, a);
}) : e.resolve(h).then(function (t) {
u.value = t, i(u);
}, function (t) {
return invoke("throw", t, i, a);
});
}
a(c.arg);
}
var r;
o(this, "_invoke", {
value: function (t, n) {
function callInvokeWithMethodAndArg() {
return new e(function (e, r) {
invoke(t, n, e, r);
});
}
return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg();
}
});
}
function makeInvokeMethod(e, r, n) {
var o = h;
return function (i, a) {
if (o === f) throw Error("Generator is already running");
if (o === s) {
if ("throw" === i) throw a;
return {
value: t,
done: !0
};
}
for (n.method = i, n.arg = a;;) {
var c = n.delegate;
if (c) {
var u = maybeInvokeDelegate(c, n);
if (u) {
if (u === y) continue;
return u;
}
}
if ("next" === n.method) n.sent = n._sent = n.arg;else if ("throw" === n.method) {
if (o === h) throw o = s, n.arg;
n.dispatchException(n.arg);
} else "return" === n.method && n.abrupt("return", n.arg);
o = f;
var p = tryCatch(e, r, n);
if ("normal" === p.type) {
if (o = n.done ? s : l, p.arg === y) continue;
return {
value: p.arg,
done: n.done
};
}
"throw" === p.type && (o = s, n.method = "throw", n.arg = p.arg);
}
};
}
function maybeInvokeDelegate(e, r) {
var n = r.method,
o = e.iterator[n];
if (o === t) return r.delegate = null, "throw" === n && e.iterator.return && (r.method = "return", r.arg = t, maybeInvokeDelegate(e, r), "throw" === r.method) || "return" !== n && (r.method = "throw", r.arg = new TypeError("The iterator does not provide a '" + n + "' method")), y;
var i = tryCatch(o, e.iterator, r.arg);
if ("throw" === i.type) return r.method = "throw", r.arg = i.arg, r.delegate = null, y;
var a = i.arg;
return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, "return" !== r.method && (r.method = "next", r.arg = t), r.delegate = null, y) : a : (r.method = "throw", r.arg = new TypeError("iterator result is not an object"), r.delegate = null, y);
}
function pushTryEntry(t) {
var e = {
tryLoc: t[0]
};
1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e);
}
function resetTryEntry(t) {
var e = t.completion || {};
e.type = "normal", delete e.arg, t.completion = e;
}
function Context(t) {
this.tryEntries = [{
tryLoc: "root"
}], t.forEach(pushTryEntry, this), this.reset(!0);
}
function values(e) {
if (e || "" === e) {
var r = e[a];
if (r) return r.call(e);
if ("function" == typeof e.next) return e;
if (!isNaN(e.length)) {
var o = -1,
i = function next() {
for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next;
return next.value = t, next.done = !0, next;
};
return i.next = i;
}
}
throw new TypeError(typeof e + " is not iterable");
}
return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, "constructor", {
value: GeneratorFunctionPrototype,
configurable: !0
}), o(GeneratorFunctionPrototype, "constructor", {
value: GeneratorFunction,
configurable: !0
}), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, "GeneratorFunction"), e.isGeneratorFunction = function (t) {
var e = "function" == typeof t && t.constructor;
return !!e && (e === GeneratorFunction || "GeneratorFunction" === (e.displayName || e.name));
}, e.mark = function (t) {
return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, "GeneratorFunction")), t.prototype = Object.create(g), t;
}, e.awrap = function (t) {
return {
__await: t
};
}, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () {
return this;
}), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) {
void 0 === i && (i = Promise);
var a = new AsyncIterator(wrap(t, r, n, o), i);
return e.isGeneratorFunction(r) ? a : a.next().then(function (t) {
return t.done ? t.value : a.next();
});
}, defineIteratorMethods(g), define(g, u, "Generator"), define(g, a, function () {
return this;
}), define(g, "toString", function () {
return "[object Generator]";
}), e.keys = function (t) {
var e = Object(t),
r = [];
for (var n in e) r.push(n);
return r.reverse(), function next() {
for (; r.length;) {
var t = r.pop();
if (t in e) return next.value = t, next.done = !1, next;
}
return next.done = !0, next;
};
}, e.values = values, Context.prototype = {
constructor: Context,
reset: function (e) {
if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = "next", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) "t" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t);
},
stop: function () {
this.done = !0;
var t = this.tryEntries[0].completion;
if ("throw" === t.type) throw t.arg;
return this.rval;
},
dispatchException: function (e) {
if (this.done) throw e;
var r = this;
function handle(n, o) {
return a.type = "throw", a.arg = e, r.next = n, o && (r.method = "next", r.arg = t), !!o;
}
for (var o = this.tryEntries.length - 1; o >= 0; --o) {
var i = this.tryEntries[o],
a = i.completion;
if ("root" === i.tryLoc) return handle("end");
if (i.tryLoc <= this.prev) {
var c = n.call(i, "catchLoc"),
u = n.call(i, "finallyLoc");
if (c && u) {
if (this.prev < i.catchLoc) return handle(i.catchLoc, !0);
if (this.prev < i.finallyLoc) return handle(i.finallyLoc);
} else if (c) {
if (this.prev < i.catchLoc) return handle(i.catchLoc, !0);
} else {
if (!u) throw Error("try statement without catch or finally");
if (this.prev < i.finallyLoc) return handle(i.finallyLoc);
}
}
}
},
abrupt: function (t, e) {
for (var r = this.tryEntries.length - 1; r >= 0; --r) {
var o = this.tryEntries[r];
if (o.tryLoc <= this.prev && n.call(o, "finallyLoc") && this.prev < o.finallyLoc) {
var i = o;
break;
}
}
i && ("break" === t || "continue" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null);
var a = i ? i.completion : {};
return a.type = t, a.arg = e, i ? (this.method = "next", this.next = i.finallyLoc, y) : this.complete(a);
},
complete: function (t, e) {
if ("throw" === t.type) throw t.arg;
return "break" === t.type || "continue" === t.type ? this.next = t.arg : "return" === t.type ? (this.rval = this.arg = t.arg, this.method = "return", this.next = "end") : "normal" === t.type && e && (this.next = e), y;
},
finish: function (t) {
for (var e = this.tryEntries.length - 1; e >= 0; --e) {
var r = this.tryEntries[e];
if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y;
}
},
catch: function (t) {
for (var e = this.tryEntries.length - 1; e >= 0; --e) {
var r = this.tryEntries[e];
if (r.tryLoc === t) {
var n = r.completion;
if ("throw" === n.type) {
var o = n.arg;
resetTryEntry(r);
}
return o;
}
}
throw Error("illegal catch attempt");
},
delegateYield: function (e, r, n) {
return this.delegate = {
iterator: values(e),
resultName: r,
nextLoc: n
}, "next" === this.method && (this.arg = t), y;
}
}, e;
}
function _setPrototypeOf(t, e) {
return _setPrototypeOf = Object.setPrototypeOf ? Object.setPrototypeOf.bind() : function (t, e) {
return t.__proto__ = e, t;
}, _setPrototypeOf(t, e);
}
function _unsupportedIterableToArray(r, a) {
if (r) {
if ("string" == typeof r) return _arrayLikeToArray(r, a);
var t = {}.toString.call(r).slice(8, -1);
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? Array.from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0;
}
}
function _wrapRegExp() {
_wrapRegExp = function (e, r) {
return new BabelRegExp(e, void 0, r);
};
var e = RegExp.prototype,
r = new WeakMap();
function BabelRegExp(e, t, p) {
var o = RegExp(e, t);
return r.set(o, p || r.get(e)), _setPrototypeOf(o, BabelRegExp.prototype);
}
function buildGroups(e, t) {
var p = r.get(t);
return Object.keys(p).reduce(function (r, t) {
var o = p[t];
if ("number" == typeof o) r[t] = e[o];else {
for (var i = 0; void 0 === e[o[i]] && i + 1 < o.length;) i++;
r[t] = e[o[i]];
}
return r;
}, Object.create(null));
}
return _inherits(BabelRegExp, RegExp), BabelRegExp.prototype.exec = function (r) {
var t = e.exec.call(this, r);
if (t) {
t.groups = buildGroups(t, this);
var p = t.indices;
p && (p.groups = buildGroups(p, this));
}
return t;
}, BabelRegExp.prototype[Symbol.replace] = function (t, p) {
if ("string" == typeof p) {
var o = r.get(this);
return e[Symbol.replace].call(this, t, p.replace(/\$<([^>]+)>/g, function (e, r) {
var t = o[r];
return "$" + (Array.isArray(t) ? t.join("$") : t);
}));
}
if ("function" == typeof p) {
var i = this;
return e[Symbol.replace].call(this, t, function () {
var e = arguments;
return "object" != typeof e[e.length - 1] && (e = [].slice.call(e)).push(buildGroups(e, i)), p.apply(this, e);
});
}
return e[Symbol.replace].call(this, t, p);
}, _wrapRegExp.apply(this, arguments);
}
var NOT_FOUND_SELECTOR = "#ud-root > div > main > div > div > section > div > div.font-bold.text-2xl.my-8";
var TERM_WRAPPER_SELECTOR = "#ud-root > div > main > div > div > section > div:nth-child(1)";
var TERM_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div > h1 > a";
var DESCRIPTION_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.break-words.meaning.mb-4";
var EXAMPLE_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.break-words.example.italic.mb-4";
var AUTHOR_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.contributor.font-bold > a";
var DATE_SELECTOR = TERM_WRAPPER_SELECTOR + " > div > div.contributor.font-bold";
var TERM_ID_SELECTOR = TERM_WRAPPER_SELECTOR + " > a";
var LINK_START_REGEX = /*#__PURE__*/_wrapRegExp(/<a class="autolink" href="(\/define\.php\?term=[\w%\/.-]+)">([\w. '-]+)/gm, {
url: 1,
text: 2
});
var LINK_STRING_REGEX = "[$<text>](https://www.urbandictionary.com$<url>)";
var LINK_END_REGEX = /<\/a>|\*/gm;
var NEW_LINE_REGEX = /<br>/gm;
var TERM_ID_REGEX = /*#__PURE__*/_wrapRegExp(/[\w:\/.?]+defid=([\d]+)/, {
termId: 1
});
/**
* Formats the term data as markdown if `formatMarkdown` is true
* @param termData
* @param formatMarkdown
* @returns
*/
function format(termData, formatMarkdown) {
if (formatMarkdown === void 0) {
formatMarkdown = false;
}
var found = termData.found,
description = termData.description,
example = termData.example;
if (found && description && example) return _extends({}, termData, {
description: description.replace(NEW_LINE_REGEX, "\n").replace(LINK_START_REGEX, formatMarkdown ? LINK_STRING_REGEX : "$<text>").replace(LINK_END_REGEX, ""),
example: example.replace(NEW_LINE_REGEX, "\n").replace(LINK_START_REGEX, formatMarkdown ? LINK_STRING_REGEX : "$<text>").replace(LINK_END_REGEX, "")
});else return termData;
}
/**
* The function to fetch data for the `getTerm` and `getRandom` function
* @param str The term to search for
* @param random Whether to get a random term
*/
function get(_x, _x2) {
return _get.apply(this, arguments);
}
function _get() {
_get = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee3(str, random) {
var termUrl, html, $, notFound, term, description, example, author, name, authorUrl, date, dateString, createdAt, termId, id, thumbs, url;
return _regeneratorRuntime().wrap(function _callee3$(_context3) {
while (1) switch (_context3.prev = _context3.next) {
case 0:
if (random === void 0) {
random = false;
}
termUrl = "https://www.urbandictionary.com/" + (random ? "random.php" : "define.php?term=" + str);
_context3.next = 4;
return fetch(termUrl, {
redirect: "follow"
}).then(/*#__PURE__*/function () {
var _ref = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee(x) {
return _regeneratorRuntime().wrap(function _callee$(_context) {
while (1) switch (_context.prev = _context.next) {
case 0:
_context.next = 2;
return x.text();
case 2:
return _context.abrupt("return", _context.sent);
case 3:
case "end":
return _context.stop();
}
}, _callee);
}));
return function (_x3) {
return _ref.apply(this, arguments);
};
}());
case 4:
html = _context3.sent;
$ = cheerio__namespace.load(html);
/* check if the term wasn't found */
notFound = $(NOT_FOUND_SELECTOR).html();
if (!(notFound != null && notFound.startsWith("Sorry, we couldn't find:"))) {
_context3.next = 9;
break;
}
return _context3.abrupt("return", {
found: false,
term: str
});
case 9:
/* get the term name */
term = $(TERM_SELECTOR).html();
/* get the term description */
description = $(DESCRIPTION_SELECTOR).html();
/* get the example of the term */
example = $(EXAMPLE_SELECTOR).html();
/* get the author of the term */
author = $(AUTHOR_SELECTOR);
name = author.html();
authorUrl = "https://www.urbandictionary.com" + author[0].attribs.href;
/* get when the term was created */
date = $(DATE_SELECTOR);
dateString = date.children()[0].next.data.trim();
createdAt = new Date(Date.UTC(new Date(dateString).getFullYear(), new Date(dateString).getMonth(), new Date(dateString).getDate(), new Date(dateString).getHours(), new Date(dateString).getMinutes(), new Date(dateString).getSeconds(), new Date(dateString).getMilliseconds()));
/* get the term id */
termId = $(TERM_ID_SELECTOR);
id = Number(termId[0].attribs.href.replace(TERM_ID_REGEX, "$<termId>"));
/* get the term's thumbs up and down count */
_context3.next = 22;
return fetch("https://api.urbandictionary.com/v0/uncacheable?ids=" + id).then(/*#__PURE__*/function () {
var _ref2 = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee2(x) {
return _regeneratorRuntime().wrap(function _callee2$(_context2) {
while (1) switch (_context2.prev = _context2.next) {
case 0:
_context2.next = 2;
return x.json();
case 2:
return _context2.abrupt("return", _context2.sent);
case 3:
case "end":
return _context2.stop();
}
}, _callee2);
}));
return function (_x4) {
return _ref2.apply(this, arguments);
};
}()).then(function (x) {
return {
up: x.thumbs[0].up,
down: x.thumbs[0].down
};
});
case 22:
thumbs = _context3.sent;
/* get the term's url */
url = "https://www.urbandictionary.com/define.php?term=" + term + "&defid=" + id;
return _context3.abrupt("return", {
found: true,
term: term,
id: id,
url: url,
description: description,
example: example,
createdAt: createdAt,
author: {
name: name,
url: authorUrl
},
thumbs: thumbs
});
case 25:
case "end":
return _context3.stop();
}
}, _callee3);
}));
return _get.apply(this, arguments);
}
/**
* Get a term with its description and the example from the urban dictionary
* @param {string} query - The term name
* @param {boolean} options.formatMarkdown - Whether to format the term description and the example for markdown
* @example await getTerm("urban");
*/
function getTerm(_x, _x2) {
return _getTerm.apply(this, arguments);
}
/**
* Get a random term with its description and the example from the urban dictionary
* @param {boolean} formatMarkdown - Whether to format the term description and the example for markdown
* @example await getRandom();
*/
function _getTerm() {
_getTerm = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee(query, options) {
var responses, _iterator, _step, searchQuery, _termData, termData;
return _regeneratorRuntime().wrap(function _callee$(_context) {
while (1) switch (_context.prev = _context.next) {
case 0:
if (options === void 0) {
options = {
formatMarkdown: false
};
}
if (!(!query || query.length === 0)) {
_context.next = 3;
break;
}
throw new Error("Provide a term to search for");
case 3:
if (!(typeof options.formatMarkdown !== "boolean")) {
_context.next = 5;
break;
}
throw new Error("Format markdown option needs to be a boolean");
case 5:
if (!Array.isArray(query)) {
_context.next = 17;
break;
}
responses = [];
_iterator = _createForOfIteratorHelperLoose(query);
case 8:
if ((_step = _iterator()).done) {
_context.next = 16;
break;
}
searchQuery = _step.value;
_context.next = 12;
return get(searchQuery);
case 12:
_termData = _context.sent;
responses.push(format(_termData, options.formatMarkdown));
case 14:
_context.next = 8;
break;
case 16:
return _context.abrupt("return", responses);
case 17:
_context.next = 19;
return get(query);
case 19:
termData = _context.sent;
return _context.abrupt("return", format(termData, options.formatMarkdown));
case 21:
case "end":
return _context.stop();
}
}, _callee);
}));
return _getTerm.apply(this, arguments);
}
function getRandom(_x3) {
return _getRandom.apply(this, arguments);
}
function _getRandom() {
_getRandom = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee2(options) {
var termData;
return _regeneratorRuntime().wrap(function _callee2$(_context2) {
while (1) switch (_context2.prev = _context2.next) {
case 0:
if (options === void 0) {
options = {
formatMarkdown: false
};
}
if (!(typeof options.formatMarkdown !== "boolean")) {
_context2.next = 3;
break;
}
throw new Error("Format markdown option needs to be a boolean");
case 3:
_context2.next = 5;
return get("", true);
case 5:
termData = _context2.sent;
return _context2.abrupt("return", format(termData, options.formatMarkdown));
case 7:
case "end":
return _context2.stop();
}
}, _callee2);
}));
return _getRandom.apply(this, arguments);
}
exports.getRandom = getRandom;
exports.getTerm = getTerm;
//# sourceMappingURL=urban-scraper.cjs.development.js.map