@subtitles/providers
Version:
Providers are the core of applications, where the subtitles are collected. Each provider exports a unique strategy for gathering data. From legendastv's web scraping from opensubtitle API usage, you can collect subtitles from your favorite tv shows and mo
54 lines (53 loc) • 2.22 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
function createScraper() {
return async function scraper(handle) {
const titleNode = await handle.$('a[href^="/download"]');
const uploaderNode = await handle.$('a[href^="/usuario"]');
const dataNode = await handle.$('.data');
const title = await titleNode?.evaluate(node => node.innerHTML);
const uploader = await uploaderNode?.evaluate(node => node.innerHTML);
const source = await titleNode?.evaluate(node => node.href);
const data = await dataNode?.evaluate(node => {
const downloadNode = node.firstChild?.textContent?.match(/^\d+/);
const dateNode = node.lastChild?.textContent?.match(/(\d{2})\/(\d{2})\/(\d{4}) - (\d{2}):(\d{2})/);
const downloads = Number(downloadNode?.[0]);
let date;
if (dateNode) {
const [, day, month, year, hour, minutes] = dateNode;
// the extracted zone is in Brasilia Standard Time, that's why the -03
date = new Date(`${year}-${month}-${day}T${hour}:${minutes}-03:00`);
}
const releasedAt = date?.toISOString();
return {
releasedAt,
downloads,
};
});
const match = source?.match(/(.*)\/download\/(\w+)\//);
if (!source || !match || match.length === 0) {
throw new Error(`Failed to get subtitle source: ${handle}`);
}
const [, base, id] = match;
const fileUrl = '' + new URL(`downloadarquivo/${id}`, base);
let releasedAt;
const downloads = data?.downloads;
if (data?.releasedAt && data.releasedAt !== '') {
releasedAt = new Date(data.releasedAt);
}
if (!(title && source)) {
throw new Error(`Coulnd't parse title or source from subtitle: ${handle}`);
}
return {
id,
title,
source,
uploader,
downloads,
language: 'Português Brasileiro',
releasedAt,
provider: 'legendas.tv',
};
};
}
exports.default = createScraper;