nyaasiscrapper
Version:
A light scrapper for nyaasi
321 lines • 13.9 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.getAdvancedInfos = exports.searchNyaa = void 0;
var values_1 = require("./values");
var fetch = require('node-fetch');
var parser = require('fast-xml-parser');
var HTMLParser = require('node-html-parser');
var NYAA_URL = "https://nyaa.si";
function fetchTry(i, limit, itemArray, resolve) {
fetch(itemArray[i]["nyaaUrl"])
.then(function (response) { return response.text(); })
.then(function (response) { return advancedInfo(itemArray[i], response); })
.then(function () {
i++;
if (i < limit) {
fetchTry(i, limit, itemArray, resolve);
}
else if (i == limit) {
resolve(itemArray);
}
})
.catch(function (error) { return console.error(error); });
}
/**
* General function, will give you results with your parameters, solwer with advanced and even slower if advanced and more than 14 results
* @param options
*/
function searchNyaa(options) {
if (options === void 0) { options = {}; }
return __awaiter(this, void 0, void 0, function () {
var optionsCleaned, optionsSerialized;
return __generator(this, function (_a) {
optionsCleaned = cleanOptions(options);
optionsSerialized = serializeOptions(optionsCleaned);
return [2 /*return*/, new Promise(function (resolve) {
fetch(NYAA_URL + optionsSerialized)
.then(function (response) { return response.text(); })
.then(function (data) {
resolve(parseData(data, optionsCleaned));
})
.catch(function (error) {
throw (error);
});
})];
});
});
}
exports.searchNyaa = searchNyaa;
function parseData(data, options) {
return __awaiter(this, void 0, void 0, function () {
var itemArray, jsonData;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
itemArray = [];
jsonData = parser.parse(data);
try {
if (jsonData["rss"]["channel"]["item"] == undefined) {
return [2 /*return*/, []];
}
}
catch (_b) { }
jsonData = jsonData["rss"]["channel"]["item"];
if (jsonData.length == undefined) {
// only one result
jsonData = [jsonData];
}
jsonData.forEach(function (anime) {
var remake, trusted;
anime["nyaa:remake"] == 'No' ? remake = false : remake = true;
anime["nyaa:trusted"] == 'No' ? trusted = false : trusted = true;
var item = {
title: anime["title"],
downloadUrl: anime["link"],
nyaaUrl: anime["guid"],
date: new Date(anime["pubDate"]),
seeders: anime["nyaa:seeders"],
leechers: anime["nyaa:leechers"],
grabs: anime["nyaa:downloads"],
infoHash: anime["nyaa:infoHash"],
category: anime["nyaa:category"],
categoryId: anime["nyaa:categoryId"],
remake: remake,
trusted: trusted,
size: anime["nyaa:size"],
};
itemArray.push(item);
});
if (!options.advanced) return [3 /*break*/, 2];
return [4 /*yield*/, getAdvancedInfos(itemArray)];
case 1:
// advanced, need to get more info before returning
return [2 /*return*/, _a.sent()];
case 2: return [2 /*return*/, itemArray];
}
});
});
}
/**
* Call that function with an animeItem or an array and you'll get more info
* @param items
*/
function getAdvancedInfos(items) {
return __awaiter(this, void 0, void 0, function () {
var itemArray, nArray, prom;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
itemArray = (!Array.isArray(items)) ? [items] : items;
if (!(itemArray.length < 14)) return [3 /*break*/, 2];
return [4 /*yield*/, Promise.all(itemArray.map(function (item) { return fetch(item["nyaaUrl"])
.then(function (response) { return response.text(); })
.then(function (response) { return advancedInfo(item, response); })
.catch(function (error) { return console.error(error); }); }))];
case 1:
nArray = _a.sent();
return [2 /*return*/, (nArray)];
case 2:
prom = new Promise(function (resolve, reject) {
fetchTry(0, itemArray.length, itemArray, resolve);
});
return [4 /*yield*/, prom];
case 3:
_a.sent();
return [2 /*return*/, prom];
}
});
});
}
exports.getAdvancedInfos = getAdvancedInfos;
var ITERATION = 0;
function parseFiles(fileParent, emplacement) {
if (emplacement === void 0) { emplacement = ""; }
ITERATION++;
var fileArray = [];
// console.dir(emplacement)
// single file case
if (fileParent.childNodes.length == 3) {
fileArray = [{
//@ts-ignore
title: fileParent.childNodes[1].rawText,
//@ts-ignore
size: fileParent.childNodes[2].childNodes[0].rawText,
parentDir: (emplacement == '') ? '/' : emplacement
}];
return fileArray;
}
// more than one file, recursive calling of parseFiles
for (var i = 0; i < fileParent.childNodes.length; i++) {
var div = fileParent.childNodes[i];
switch (div.nodeType) {
case 3:
// who cares
continue;
break;
case 1:
// either a folder or a file
//@ts-ignore
if (div.tagName == 'ul') {
continue;
}
//@ts-ignore
else if (div.attributes.class.includes('folder')) {
//@ts-ignore
if (div.tagName == 'a') {
// for every <li> tag in the +2 div we need to call again
//@ts-ignore
for (var y = 0; y < fileParent.childNodes[i + 2].childNodes.length; y++) {
if (fileParent.childNodes[i + 2].childNodes[y].nodeType == 3) {
continue;
}
//@ts-ignore
if (fileParent.childNodes[i + 2].childNodes[y].tagName == 'li') {
//@ts-ignore
var tempArray = parseFiles(fileParent.childNodes[i + 2].childNodes[y], emplacement + "/" + div.rawText);
fileArray = fileArray.concat(tempArray);
}
}
}
// console.dir(fileArray)
}
// else if(div.att)
break;
default:
break;
}
}
return fileArray;
}
/**
* Advanced function, used to get info only available on the page itself, is called for every result when advanced is used in searchNyaa()
* @param item
*/
function advancedInfo(item, pageData) {
var root = HTMLParser.parse(pageData);
// need to get magnet, user, files and comments
var body = root.querySelectorAll("body");
var panel = body[0].childNodes[4].childNodes[1];
try {
var user = panel.childNodes[3].childNodes[3].childNodes[3].childNodes[1].childNodes[0].rawText;
item["user"] = user;
}
catch (_a) {
item["user"] = "Anonymous";
}
var magnetAttrs = panel.childNodes[5].childNodes[3].rawAttrs;
var magnet = magnetAttrs.split('"')[1];
item["magnet"] = magnet;
var descriptionPanel = body[0].childNodes[4].childNodes[3];
var description = descriptionPanel.childNodes[1].childNodes[0].rawText;
item["description"] = description;
var filePanel = body[0].childNodes[4].childNodes[5];
var fileParent = filePanel.childNodes[3].childNodes[1].childNodes[1];
var commentsPanel = body[0].childNodes[4].childNodes[7];
var comments = parseComments(commentsPanel);
item["comments"] = comments;
var filesItems = parseFiles(fileParent);
item["files"] = filesItems;
return item;
}
function parseComments(panel) {
var comments = [];
var commentsDiv = panel.childNodes[3];
commentsDiv.childNodes.forEach(function (commentDiv) {
if (commentDiv.nodeType != 3) {
// nodeType 3 is wrong nodeType, fuck nodeTypes 3
var avatarDiv = commentDiv.childNodes[1].childNodes[1];
var contentDiv = commentDiv.childNodes[1].childNodes[3];
// @ts-ignore
var commentContent = contentDiv.childNodes[3].structuredText;
// @ts-ignore
var date = contentDiv.childNodes[1].structuredText;
var edited = date.includes("(edited)");
date = date.replace(' (edited)', '');
var properDate = new Date(date);
// @ts-ignore
var user = avatarDiv.structuredText;
var comment = {
"content": commentContent,
"date": properDate,
"user": user,
"edited": edited
};
comments.push(comment);
}
});
return comments;
}
function serializeOptions(options) {
var optionString = '/?page=rss&';
for (var propName in options) {
if (propName == "sortDirection") {
switch (options[propName]) {
case 'Ascending':
options[propName] = "asc";
break;
default:
options[propName] = "desc";
break;
}
}
if (propName != 'advanced') {
optionString += values_1.argValues[values_1.argStringValues.indexOf(propName)] + "=" + options[propName] + "&";
}
}
return optionString;
}
/**
* I let a few literal values so I have to put them in real values now
* @param options
*/
function cleanOptions(options) {
if (options['category'] != undefined) {
if (options['category'].length != 3) {
// not in x_x format
var index = values_1.stringCatValues.indexOf(options['category']);
options['category'] = values_1.numberCatValues[index];
}
}
if (options['sortType'] == "Date") {
options['sortType'] = "id";
}
return (options);
}
//# sourceMappingURL=index.js.map