UNPKG

imdb-listimporter

Version:
254 lines (253 loc) 10.2 kB
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; import axios from "axios"; import { DOMParser } from '@xmldom/xmldom'; var patterns = [ { listType: "list", exp: /^(https:\/\/)?(www.)?imdb.com\/list\/ls\d+\/?$/g, converter: function (input) { return prependHttpsAndWww(input); } }, { listType: "watchlist", exp: /^(https:\/\/)?(www.)?imdb.com\/user\/ur\d+(\/?(watchlist\/?)?)$/g, converter: function (input) { return prependHttpsAndWww(makeWatchlistFetchingUrl(input)); } }, { listType: "watchlist", exp: /^ur\d+$/g, converter: function (input) { return prependHttpsAndWww(makeWatchlistFetchingUrl(input)); } }, { listType: "list", exp: /^ls\d+$/g, converter: function (input) { return "https://www.imdb.com/list/" + input; } }, ]; /** * Check if the input URL matches one of the accepted patterns * * @param url to be tested * @returns a MatchedUrl that only returns false if didn't match, contains true * and the matched ImdbUrlPattern if did match */ var validateUrl = function (url) { var matchedUrl = { matched: false }; patterns.every(function (pattern) { if (url.match(pattern.exp)) { matchedUrl = { matched: true, listType: pattern.listType, url: pattern.converter(url) }; return false; } return true; }); return matchedUrl; }; /** * If a URL in the form of user/<id>/watchlist is given, gets the actual watchlist page ID * * @param url the input url * @returns a Promise containing the extracted list ID if found and gotten, Promise.reject otherwise */ var getListLinkFromWatchlist = function (url) { return __awaiter(void 0, void 0, void 0, function () { var response, extractionResults, error_1, err; var _a; return __generator(this, function (_b) { switch (_b.label) { case 0: _b.trys.push([0, 2, , 3]); return [4 /*yield*/, axios.get(url)]; case 1: response = _b.sent(); extractionResults = extractListId(response.data); if (!extractionResults[0]) { return [2 /*return*/, Promise.reject(extractionResults[1])]; } else { if (extractionResults[1].match(/ls\d+\b/)) { return [2 /*return*/, extractionResults[1]]; } else { return [2 /*return*/, Promise.reject("Could not extract a valid listId")]; } } return [3 /*break*/, 3]; case 2: error_1 = _b.sent(); err = error_1; if (axios.isAxiosError(err)) { if (((_a = err.response) === null || _a === void 0 ? void 0 : _a.status) === 404) { return [2 /*return*/, Promise.reject("404 error")]; } return [2 /*return*/, Promise.reject("Something went wrong: " + err.message)]; } else { return [2 /*return*/, Promise.reject("Something went wrong: " + err.message)]; } return [3 /*break*/, 3]; case 3: return [2 /*return*/]; } }); }); }; var extractListId = function (data) { var domparser = new DOMParser(); var doc = domparser.parseFromString(data, 'text/html'); if (!checkIfPublic(doc)) { return [false, "watchlist is private"]; } var metas = doc.getElementsByTagName('meta'); for (var i = 0; i < metas.length; i++) { var meta = metas[i]; if (meta.attributes.item(0).nodeValue !== "pageId") { continue; } else { return [true, meta.attributes.item(1).textContent]; } } return [false, "could not extract listID"]; }; var checkIfPublic = function (doc) { return doc.getElementById("unavailable") ? false : true; }; /** * Generate a URL to be used for fetching the list ID * * @param input either a user ID, a URL leading to the user profile, or to the <profile>/watchlist route * @returns a fully formed URL that leads to the <profile>/watchlist route */ var makeWatchlistFetchingUrl = function (input) { if (input.match(/^ur\d+$/)) { return "https://www.imdb.com/user/" + input + "/watchlist"; } if (!input.endsWith("watchlist") && !input.endsWith("watchlist/")) { if (!input.endsWith("/")) { input += "/"; } return prependHttpsAndWww(input + "watchlist"); } return input; }; /** * Makes sure the URL starts with 'https://www.' * * @param url a URL that may or may not be missing https and/or www at the beginning * @returns a URL that definitely is not missing https and/or www at the beginning */ var prependHttpsAndWww = function (url) { if (url.startsWith("imdb.com")) { url = "https://www." + url; } else if (url.startsWith("www.")) { url = "https://" + url; } else if (url.startsWith("https://imdb")) { url = url.replace("https://imdb", "https://www.imdb"); } return url; }; /** * Creates an export URL out of the input URL * * @param url the input URL * @returns the export URL to be fetched from */ var makeUrl = function (url) { return url.endsWith("/") ? url + "export" : url + "/export"; }; export var makeRequest = function (url) { return __awaiter(void 0, void 0, void 0, function () { var validatedUrl, listId, madeUrl, resp, error_2, err; var _a, _b; return __generator(this, function (_c) { switch (_c.label) { case 0: _c.trys.push([0, 4, , 5]); validatedUrl = validateUrl(url); if (!validatedUrl.matched) { return [2 /*return*/, Promise.reject("Invalid URL")]; } if (!(validatedUrl.listType === "watchlist")) return [3 /*break*/, 2]; return [4 /*yield*/, getListLinkFromWatchlist(validatedUrl.url)]; case 1: listId = _c.sent(); validatedUrl.url = "https://www.imdb.com/list/" + listId; _c.label = 2; case 2: madeUrl = makeUrl(validatedUrl.url); return [4 /*yield*/, axios.get(madeUrl)]; case 3: resp = _c.sent(); return [2 /*return*/, resp.data]; case 4: error_2 = _c.sent(); err = error_2; if (axios.isAxiosError(err)) { if (((_a = err.response) === null || _a === void 0 ? void 0 : _a.status) === 403) { return [2 /*return*/, Promise.reject("list is private")]; } return [2 /*return*/, Promise.reject("got error code " + ((_b = err.response) === null || _b === void 0 ? void 0 : _b.status) + " while trying to fetch list")]; } return [2 /*return*/, Promise.reject(error_2)]; case 5: return [2 /*return*/]; } }); }); }; // module.exports.default = makeRequest export default makeRequest; /** * exports for testing purposes, not meant to be used externally. Ugly hack but alas... */ export var exportsForTests = { validateUrl: validateUrl, getListLinkFromWatchlist: getListLinkFromWatchlist, makeUrl: makeUrl, makeWatchlistFetchingUrl: makeWatchlistFetchingUrl, extractListId: extractListId };