node-csfd-api
Version:
ČSFD API in JavaScript. Amazing NPM library for scrapping csfd.cz :)
254 lines (253 loc) • 9.62 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.getTags = exports.getPremieres = exports.getBoxMovies = exports.getBoxContent = exports.getVods = exports.getType = exports.getGroup = exports.parsePeople = exports.getDescriptions = exports.getTrivia = exports.getRandomPhoto = exports.getPoster = exports.getTitlesOther = exports.getDuration = exports.getYear = exports.getRatingCount = exports.getRating = exports.getColorRating = exports.getOrigins = exports.getGenres = exports.getTitle = exports.getId = void 0;
const global_helper_1 = require("./global.helper");
const getId = (el) => {
const url = el.querySelector('.tabs .tab-nav-list a').attributes.href;
return (0, global_helper_1.parseIdFromUrl)(url);
};
exports.getId = getId;
const getTitle = (el) => {
return el.querySelector('h1').innerText.split(`(`)[0].trim();
};
exports.getTitle = getTitle;
const getGenres = (el) => {
const genresRaw = el.querySelector('.genres').textContent;
return genresRaw.split(' / ');
};
exports.getGenres = getGenres;
const getOrigins = (el) => {
const originsRaw = el.querySelector('.origin').textContent;
const origins = originsRaw.split(',')[0];
return origins.split(' / ');
};
exports.getOrigins = getOrigins;
const getColorRating = (bodyClasses) => {
return (0, global_helper_1.getColor)(bodyClasses[1]);
};
exports.getColorRating = getColorRating;
const getRating = (el) => {
const ratingRaw = el.querySelector('.film-rating-average').textContent;
const rating = ratingRaw === null || ratingRaw === void 0 ? void 0 : ratingRaw.replace(/%/g, '').trim();
const ratingInt = parseInt(rating);
if (Number.isInteger(ratingInt)) {
return ratingInt;
}
else {
return null;
}
};
exports.getRating = getRating;
const getRatingCount = (el) => {
var _a;
const ratingCountRaw = (_a = el.querySelector('.box-rating-container .counter')) === null || _a === void 0 ? void 0 : _a.textContent;
const ratingCount = +(ratingCountRaw === null || ratingCountRaw === void 0 ? void 0 : ratingCountRaw.replace(/[(\s)]/g, ''));
if (Number.isInteger(ratingCount)) {
return ratingCount;
}
else {
return null;
}
};
exports.getRatingCount = getRatingCount;
const getYear = (el) => {
try {
const jsonLd = JSON.parse(el);
return +jsonLd.dateCreated;
}
catch (error) {
console.error('node-csfd-api: Error parsing JSON-LD', error);
return null;
}
};
exports.getYear = getYear;
const getDuration = (jsonLdRaw, el) => {
let duration = null;
try {
const jsonLd = JSON.parse(jsonLdRaw);
duration = jsonLd.duration;
return (0, global_helper_1.parseISO8601Duration)(duration);
}
catch (error) {
const origin = el.querySelector('.origin').innerText;
const timeString = origin.split(',');
if (timeString.length > 2) {
// Get last time elelment
const timeString2 = timeString.pop().trim();
// Clean it
const timeRaw = timeString2.split('(')[0].trim();
// Split by minutes and hours
const hoursMinsRaw = timeRaw.split('min')[0];
const hoursMins = hoursMinsRaw.split('h');
// Resolve hours + minutes format
duration = hoursMins.length > 1 ? +hoursMins[0] * 60 + +hoursMins[1] : +hoursMins[0];
return duration;
}
else {
return null;
}
}
};
exports.getDuration = getDuration;
const getTitlesOther = (el) => {
const namesNode = el.querySelectorAll('.film-names li');
if (!namesNode.length) {
return [];
}
const titlesOther = namesNode.map((el) => {
const country = el.querySelector('img.flag').attributes.alt;
const title = el.textContent.trim().split('\n')[0];
if (country && title) {
return {
country,
title
};
}
else {
return null;
}
});
return titlesOther.filter((x) => x);
};
exports.getTitlesOther = getTitlesOther;
const getPoster = (el) => {
var _a;
const poster = el.querySelector('.film-posters img');
// Resolve empty image
if (poster) {
if ((_a = poster.classNames) === null || _a === void 0 ? void 0 : _a.includes('empty-image')) {
return null;
}
else {
// Full sized image (not thumb)
const imageThumb = poster.attributes.src.split('?')[0];
const image = imageThumb.replace(/\/w140\//, '/w1080/');
return (0, global_helper_1.addProtocol)(image);
}
}
else {
return null;
}
};
exports.getPoster = getPoster;
const getRandomPhoto = (el) => {
var _a;
const imageNode = el.querySelector('.gallery-item picture img');
const image = (_a = imageNode === null || imageNode === void 0 ? void 0 : imageNode.attributes) === null || _a === void 0 ? void 0 : _a.src;
if (image) {
return image.replace(/\/w663\//, '/w1326/');
}
else {
return null;
}
};
exports.getRandomPhoto = getRandomPhoto;
const getTrivia = (el) => {
const triviaNodes = el.querySelectorAll('.article-trivia ul li');
if (triviaNodes === null || triviaNodes === void 0 ? void 0 : triviaNodes.length) {
return triviaNodes.map((node) => node.textContent.trim().replace(/(\r\n|\n|\r|\t)/gm, ''));
}
else {
return null;
}
};
exports.getTrivia = getTrivia;
const getDescriptions = (el) => {
return el
.querySelectorAll('.body--plots .plot-full p, .body--plots .plots .plots-item p')
.map((movie) => { var _a; return (_a = movie.textContent) === null || _a === void 0 ? void 0 : _a.trim().replace(/(\r\n|\n|\r|\t)/gm, ''); });
};
exports.getDescriptions = getDescriptions;
const parsePeople = (el) => {
const people = el.querySelectorAll('a');
return (people
// Filter out "more" links
.filter((x) => x.classNames.length === 0)
.map((person) => {
return {
id: (0, global_helper_1.parseIdFromUrl)(person.attributes.href),
name: person.innerText.trim(),
url: `https://www.csfd.cz${person.attributes.href}`
};
}));
};
exports.parsePeople = parsePeople;
const getGroup = (el, group) => {
const creators = el.querySelectorAll('.creators h4');
const element = creators.filter((elem) => elem.textContent.trim().includes(group))[0];
if (element === null || element === void 0 ? void 0 : element.parentNode) {
return (0, exports.parsePeople)(element.parentNode);
}
else {
return [];
}
};
exports.getGroup = getGroup;
const getType = (el) => {
var _a;
const type = el.querySelector('.film-header-name .type');
return ((_a = type === null || type === void 0 ? void 0 : type.innerText) === null || _a === void 0 ? void 0 : _a.replace(/[{()}]/g, '')) || 'film';
};
exports.getType = getType;
const getVods = (el) => {
let vods = [];
if (el) {
const buttons = el.querySelectorAll('.box-buttons .button');
const buttonsVod = buttons.filter((x) => !x.classNames.includes('button-social'));
vods = buttonsVod.map((btn) => {
return {
title: btn.textContent.trim(),
url: btn.attributes.href
};
});
}
return vods.length ? vods : [];
};
exports.getVods = getVods;
// Get box content
const getBoxContent = (el, box) => {
var _a;
const headers = el.querySelectorAll('section.box .box-header');
return (_a = headers.find((header) => header.querySelector('h3').textContent.trim().includes(box))) === null || _a === void 0 ? void 0 : _a.parentNode;
};
exports.getBoxContent = getBoxContent;
const getBoxMovies = (el, boxName) => {
const movieListItem = [];
const box = (0, exports.getBoxContent)(el, boxName);
const movieTitleNodes = box === null || box === void 0 ? void 0 : box.querySelectorAll('.article-header .film-title-name');
if (movieTitleNodes === null || movieTitleNodes === void 0 ? void 0 : movieTitleNodes.length) {
for (const item of movieTitleNodes) {
movieListItem.push({
id: (0, global_helper_1.parseIdFromUrl)(item.attributes.href),
title: item.textContent.trim(),
url: `https://www.csfd.cz${item.attributes.href}`
});
}
}
return movieListItem;
};
exports.getBoxMovies = getBoxMovies;
const getPremieres = (el) => {
var _a, _b;
const premiereNodes = el.querySelectorAll('.box-premieres li');
const premiere = [];
for (const premiereNode of premiereNodes) {
const title = premiereNode.querySelector('p + span').attributes.title;
if (title) {
const [date, ...company] = title === null || title === void 0 ? void 0 : title.split(' ');
premiere.push({
country: ((_a = premiereNode.querySelector('.flag')) === null || _a === void 0 ? void 0 : _a.attributes.title) || null,
format: (_b = premiereNode.querySelector('p').textContent.trim()) === null || _b === void 0 ? void 0 : _b.split(' od')[0],
date,
company: company.join(' ')
});
}
}
return premiere;
};
exports.getPremieres = getPremieres;
const getTags = (el) => {
const tagsRaw = el.querySelectorAll('.box-content a[href*="/podrobne-vyhledavani/?tag="]');
return tagsRaw.map((tag) => tag.textContent);
};
exports.getTags = getTags;
;