imdb-listimporter
Version:
Import lists from IMDb.
254 lines (253 loc) • 10.2 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
import axios from "axios";
import { DOMParser } from '@xmldom/xmldom';
var patterns = [
{
listType: "list",
exp: /^(https:\/\/)?(www.)?imdb.com\/list\/ls\d+\/?$/g,
converter: function (input) {
return prependHttpsAndWww(input);
}
},
{
listType: "watchlist",
exp: /^(https:\/\/)?(www.)?imdb.com\/user\/ur\d+(\/?(watchlist\/?)?)$/g,
converter: function (input) {
return prependHttpsAndWww(makeWatchlistFetchingUrl(input));
}
},
{
listType: "watchlist",
exp: /^ur\d+$/g,
converter: function (input) {
return prependHttpsAndWww(makeWatchlistFetchingUrl(input));
}
},
{
listType: "list",
exp: /^ls\d+$/g,
converter: function (input) {
return "https://www.imdb.com/list/" + input;
}
},
];
/**
* Check if the input URL matches one of the accepted patterns
*
* @param url to be tested
* @returns a MatchedUrl that only returns false if didn't match, contains true
* and the matched ImdbUrlPattern if did match
*/
var validateUrl = function (url) {
var matchedUrl = {
matched: false
};
patterns.every(function (pattern) {
if (url.match(pattern.exp)) {
matchedUrl = {
matched: true,
listType: pattern.listType,
url: pattern.converter(url)
};
return false;
}
return true;
});
return matchedUrl;
};
/**
* If a URL in the form of user/<id>/watchlist is given, gets the actual watchlist page ID
*
* @param url the input url
* @returns a Promise containing the extracted list ID if found and gotten, Promise.reject otherwise
*/
var getListLinkFromWatchlist = function (url) { return __awaiter(void 0, void 0, void 0, function () {
var response, extractionResults, error_1, err;
var _a;
return __generator(this, function (_b) {
switch (_b.label) {
case 0:
_b.trys.push([0, 2, , 3]);
return [4 /*yield*/, axios.get(url)];
case 1:
response = _b.sent();
extractionResults = extractListId(response.data);
if (!extractionResults[0]) {
return [2 /*return*/, Promise.reject(extractionResults[1])];
}
else {
if (extractionResults[1].match(/ls\d+\b/)) {
return [2 /*return*/, extractionResults[1]];
}
else {
return [2 /*return*/, Promise.reject("Could not extract a valid listId")];
}
}
return [3 /*break*/, 3];
case 2:
error_1 = _b.sent();
err = error_1;
if (axios.isAxiosError(err)) {
if (((_a = err.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
return [2 /*return*/, Promise.reject("404 error")];
}
return [2 /*return*/, Promise.reject("Something went wrong: " + err.message)];
}
else {
return [2 /*return*/, Promise.reject("Something went wrong: " + err.message)];
}
return [3 /*break*/, 3];
case 3: return [2 /*return*/];
}
});
}); };
var extractListId = function (data) {
var domparser = new DOMParser();
var doc = domparser.parseFromString(data, 'text/html');
if (!checkIfPublic(doc)) {
return [false, "watchlist is private"];
}
var metas = doc.getElementsByTagName('meta');
for (var i = 0; i < metas.length; i++) {
var meta = metas[i];
if (meta.attributes.item(0).nodeValue !== "pageId") {
continue;
}
else {
return [true, meta.attributes.item(1).textContent];
}
}
return [false, "could not extract listID"];
};
var checkIfPublic = function (doc) {
return doc.getElementById("unavailable") ? false : true;
};
/**
* Generate a URL to be used for fetching the list ID
*
* @param input either a user ID, a URL leading to the user profile, or to the <profile>/watchlist route
* @returns a fully formed URL that leads to the <profile>/watchlist route
*/
var makeWatchlistFetchingUrl = function (input) {
if (input.match(/^ur\d+$/)) {
return "https://www.imdb.com/user/" + input + "/watchlist";
}
if (!input.endsWith("watchlist") && !input.endsWith("watchlist/")) {
if (!input.endsWith("/")) {
input += "/";
}
return prependHttpsAndWww(input + "watchlist");
}
return input;
};
/**
* Makes sure the URL starts with 'https://www.'
*
* @param url a URL that may or may not be missing https and/or www at the beginning
* @returns a URL that definitely is not missing https and/or www at the beginning
*/
var prependHttpsAndWww = function (url) {
if (url.startsWith("imdb.com")) {
url = "https://www." + url;
}
else if (url.startsWith("www.")) {
url = "https://" + url;
}
else if (url.startsWith("https://imdb")) {
url = url.replace("https://imdb", "https://www.imdb");
}
return url;
};
/**
* Creates an export URL out of the input URL
*
* @param url the input URL
* @returns the export URL to be fetched from
*/
var makeUrl = function (url) {
return url.endsWith("/") ? url + "export" : url + "/export";
};
export var makeRequest = function (url) { return __awaiter(void 0, void 0, void 0, function () {
var validatedUrl, listId, madeUrl, resp, error_2, err;
var _a, _b;
return __generator(this, function (_c) {
switch (_c.label) {
case 0:
_c.trys.push([0, 4, , 5]);
validatedUrl = validateUrl(url);
if (!validatedUrl.matched) {
return [2 /*return*/, Promise.reject("Invalid URL")];
}
if (!(validatedUrl.listType === "watchlist")) return [3 /*break*/, 2];
return [4 /*yield*/, getListLinkFromWatchlist(validatedUrl.url)];
case 1:
listId = _c.sent();
validatedUrl.url = "https://www.imdb.com/list/" + listId;
_c.label = 2;
case 2:
madeUrl = makeUrl(validatedUrl.url);
return [4 /*yield*/, axios.get(madeUrl)];
case 3:
resp = _c.sent();
return [2 /*return*/, resp.data];
case 4:
error_2 = _c.sent();
err = error_2;
if (axios.isAxiosError(err)) {
if (((_a = err.response) === null || _a === void 0 ? void 0 : _a.status) === 403) {
return [2 /*return*/, Promise.reject("list is private")];
}
return [2 /*return*/, Promise.reject("got error code " + ((_b = err.response) === null || _b === void 0 ? void 0 : _b.status) + " while trying to fetch list")];
}
return [2 /*return*/, Promise.reject(error_2)];
case 5: return [2 /*return*/];
}
});
}); };
// module.exports.default = makeRequest
export default makeRequest;
/**
* exports for testing purposes, not meant to be used externally. Ugly hack but alas...
*/
export var exportsForTests = {
validateUrl: validateUrl,
getListLinkFromWatchlist: getListLinkFromWatchlist,
makeUrl: makeUrl,
makeWatchlistFetchingUrl: makeWatchlistFetchingUrl,
extractListId: extractListId
};