UNPKG

nurlresolver

Version:
173 lines 6.73 kB
import got, { HTTPError } from 'got'; import scrapeIt from "scrape-it"; import * as helper from './utils/helper.js'; import { CookieJar } from 'tough-cookie'; import _debug from 'debug'; import { URL } from 'url'; import { performance } from "perf_hooks"; const logger = _debug('nurl:BaseUrlResolver'); export class BaseUrlResolver { domains; gotInstance = got; _speedRank; useCookies; scrapeItAsync = scrapeIt; scrapeHtml = scrapeIt.scrapeHTML; _cookieJar; defaultUserAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'; _resolverOptions; _context; getSecondLevelDomain = helper.getSecondLevelDomain; isValidHttpUrl = helper.isValidHttpUrl; getHiddenForm = helper.parseHiddenFormV2; scrapeLinkHref = helper.scrapeLinkHref; scrapePageTitle = helper.scrapePageTitle; scrapeInnerText = helper.scrapeInnerText; parseAllLinks = helper.parseAllLinks; parseElementAttributes = helper.parseElementAttributes; extractFileNameFromUrl = helper.extractFileNameFromUrl; wait = helper.wait; nodeatob = helper.nodeatob; nodebtoa = helper.nodebtoa; getServerPublicIp = helper.getServerPublicIp; parseScripts = helper.parseScripts; scrapeAllLinks(html, context, baseUrl = '') { return helper.parseAllLinks(html, context, baseUrl) .map(x => { return { link: x.link, title: x.title }; }); } constructor(options) { this.domains = options.domains; this.useCookies = options.useCookies || false; this._speedRank = options.speedRank || 30; } /** * @param {string} urlToResolve * Override this method if you want to implement can resolve function */ async canResolve(urlToResolve) { return this.domains.some((innerUrl) => { return innerUrl.test(urlToResolve); }); } /** * * @param {string} urlToResolve */ async resolve(urlToResolve, options, context) { let canResolve = false; this._resolverOptions = options; this._context = context; try { canResolve = await this.canResolve(urlToResolve); } catch (error) { logger(`Error occurred while calling canResolve BaseResolver: ${error}`); } if (canResolve) { let status = 'ERROR'; const startTime = performance.now(); try { this.setupEnvironment(); let resolveResults = []; const _resolveResults = await this.resolveInner(urlToResolve); resolveResults = resolveResults.concat(_resolveResults); resolveResults.forEach(x => x.parent = x.parent || urlToResolve); resolveResults.filter(x => x.isPlayable).forEach(x => x.speedRank = this._speedRank); if (options.extractMetaInformation) { await Promise.all(resolveResults .filter(x => x.isPlayable) .map(this.fillMetaInfoInner, this)); } const result = this.massageResolveResults(resolveResults); status = result.length > 0 ? 'OK' : 'NOT FOUND'; return result; } catch (error) { if (error instanceof HTTPError) { logger('http error %s %s', urlToResolve, error.message); } else if (error instanceof Error) { logger('unknown error %s %s', urlToResolve, error.message); } } finally { const timeTook = (performance.now() - startTime); logger('%s %s %sms', status, urlToResolve, timeTook.toFixed(0)); } } return []; } massageResolveResults(resolveResults) { return resolveResults.map(x => { x.link = new URL(x.link).href; //normalizing the url like escaping spaces return x; }); } async fillMetaInfoInner(resolveMediaItem) { await this.fillMetaInfo(resolveMediaItem); } async fillMetaInfo(resolveMediaItem) { const headResponse = await this.gotInstance.head(resolveMediaItem.link, { headers: resolveMediaItem.headers || {} }); resolveMediaItem.size = headResponse.headers['content-length']; resolveMediaItem.lastModified = headResponse.headers['last-modified']; resolveMediaItem.contentType = headResponse.headers['content-type']; } setupEnvironment() { const rejectUnauthorized = process.env.NODE_TLS_REJECT_UNAUTHORIZED; const gotOptions = { https: { rejectUnauthorized: rejectUnauthorized == '1' }, headers: { 'User-Agent': this.defaultUserAgent }, timeout: { request: (this._resolverOptions?.timeout || 20) * 1000 //by default let every individual request time out after 20 seconds }, retry: { limit: 0 } }; if (this.useCookies) { this._cookieJar = new CookieJar(); gotOptions.cookieJar = this._cookieJar; } this.gotInstance = got.extend(gotOptions); } async postHiddenForm(urlToPost, page, ix, resolveBody = true, followRedirect) { const form = this.getHiddenForm(page, ix); if (form) { const response2 = await this.gotInstance.post(urlToPost, { form: form, headers: { Referer: urlToPost }, followRedirect: followRedirect || false //it can raise some unhandled error which can potentially cause whole application shutdown. }); return resolveBody ? response2.body : response2; } throw new Error('No form found to post.'); } } export class GenericFormBasedResolver extends BaseUrlResolver { _selector; _formIx; constructor(options, selector, formIx) { super(options); this._selector = selector; this._formIx = formIx || 0; } async resolveInner(_urlToResolve) { const response = await this.gotInstance(_urlToResolve); const response2Body = await this.postHiddenForm(response.url, response.body, this._formIx); const link = this.scrapeLinkHref(response2Body, this._selector); const title = this.extractFileNameFromUrl(link); const result = { link, title, isPlayable: true }; return [result]; } } //# sourceMappingURL=BaseResolver.js.map