node-csfd-api
Version:
ČSFD API in JavaScript. Amazing NPM library for scrapping csfd.cz :)
318 lines (317 loc) • 10.9 kB
JavaScript
import { addProtocol, getColor, parseDate, parseFilmType, parseISO8601Duration, parseIdFromUrl, parseLastIdFromUrl } from "./global.helper.js";
//#region src/helpers/movie.helper.ts
const CLEAN_TEXT_REGEX = /(\r\n|\n|\r|\t)/gm;
const CREATOR_LABELS = {
en: {
directors: "Directed by",
writers: "Screenplay",
cinematography: "Cinematography",
music: "Composer",
actors: "Cast",
basedOn: "Based on",
producers: "Produced by",
filmEditing: "Editing",
costumeDesign: "Costumes",
productionDesign: "Production design",
casting: "Casting",
sound: "Sound",
makeup: "Make-up"
},
cs: {
directors: "Režie",
writers: "Scénář",
cinematography: "Kamera",
music: "Hudba",
actors: "Hrají",
basedOn: "Předloha",
producers: "Produkce",
filmEditing: "Střih",
costumeDesign: "Kostýmy",
productionDesign: "Scénografie",
casting: "Casting",
sound: "Zvuk",
makeup: "Masky"
},
sk: {
directors: "Réžia",
writers: "Scenár",
cinematography: "Kamera",
music: "Hudba",
actors: "Hrajú",
basedOn: "Predloha",
producers: "Produkcia",
filmEditing: "Strih",
costumeDesign: "Kostýmy",
productionDesign: "Scénografia",
casting: "Casting",
sound: "Zvuk",
makeup: "Masky"
}
};
/**
* Maps language-specific movie creator group labels.
* @param language - The language code (e.g., 'en', 'cs')
* @param key - The key of the creator group (e.g., 'directors', 'writers')
* @returns The localized label for the creator group
*/
const CREATOR_KEYS = [
"directors",
"writers",
"cinematography",
"music",
"actors",
"basedOn",
"producers",
"filmEditing",
"costumeDesign",
"productionDesign",
"sound"
];
const REVERSE_CREATOR_LABELS = (() => {
const map = /* @__PURE__ */ new Map();
for (const lang of Object.values(CREATOR_LABELS)) for (const key of CREATOR_KEYS) if (lang[key]) map.set(lang[key], key);
return map;
})();
const getSeriesAndSeasonTitle = (el) => {
const titleElement = el.querySelector("h1");
if (!titleElement) return {
seriesName: null,
seasonName: null
};
const fullText = titleElement.innerText.trim();
if (fullText.includes(" - ")) {
const [seriesName, seasonName] = fullText.split(" - ").map((part) => part.trim());
return {
seriesName,
seasonName
};
}
return {
seriesName: fullText,
seasonName: null
};
};
const getMovieTitle = (el) => {
return el.querySelector("h1").innerText.split(`(`)[0].trim();
};
const getMovieGenres = (el) => {
const genresNode = el.querySelector(".genres");
if (!genresNode) return [];
return genresNode.childNodes.map((n) => n.textContent.trim()).filter((x) => x.length > 0);
};
const getMovieOrigins = (el) => {
const originNode = el.querySelector(".origin");
if (!originNode) return [];
return (originNode.childNodes[0]?.text || "").split("/").map((x) => x.trim()).filter((x) => x);
};
const getMovieColorRating = (bodyClasses) => {
return getColor(bodyClasses[1]);
};
const getMovieRating = (el) => {
const rating = (el.querySelector(".film-rating-average")?.textContent)?.replace(/%/g, "")?.trim();
const ratingInt = parseInt(rating);
if (Number.isInteger(ratingInt)) return ratingInt;
else return null;
};
const getMovieRatingCount = (el) => {
const ratingCountRaw = el.querySelector(".ratings-list .counter")?.textContent;
if (!ratingCountRaw) return null;
const countString = ratingCountRaw.replace(/[^\d]/g, "");
const ratingCount = parseInt(countString, 10);
if (Number.isInteger(ratingCount)) return ratingCount;
else return null;
};
const getMovieYear = (jsonLd) => {
if (jsonLd && jsonLd.dateCreated) return +jsonLd.dateCreated;
return null;
};
const getMovieDuration = (jsonLd, el) => {
if (jsonLd && jsonLd.duration) try {
return parseISO8601Duration(jsonLd.duration);
} catch (e) {}
try {
const originText = el.querySelector(".origin")?.textContent;
if (originText) {
const match = originText.match(/(?:(\d+)\s*h)?\s*(\d+)\s*min/);
if (match) {
const hours = parseInt(match[1] || "0", 10);
const minutes = parseInt(match[2] || "0", 10);
return hours * 60 + minutes;
}
}
} catch (error) {
return null;
}
return null;
};
const getMovieTitlesOther = (el) => {
const namesNode = el.querySelectorAll(".film-names li");
if (!namesNode.length) return [];
return namesNode.map((el) => {
const country = el.querySelector("img.flag").attributes.alt;
const title = el.textContent.trim().split("\n")[0];
if (country && title) return {
country,
title
};
else return null;
}).filter((x) => x);
};
const getMoviePoster = (el) => {
const poster = el.querySelector(".film-posters img");
if (poster) if (poster.classNames?.includes("empty-image")) return null;
else return addProtocol(poster.attributes.src.split("?")[0].replace(/\/w140\//, "/w1080/"));
else return null;
};
const getMovieRandomPhoto = (el) => {
const image = el.querySelector(".gallery-item picture img")?.attributes?.src;
if (image) return image.replace(/\/w663\//, "/w1326/");
else return null;
};
const getMovieTrivia = (el) => {
const triviaNodes = el.querySelectorAll(".article-trivia ul li");
if (triviaNodes?.length) return triviaNodes.map((node) => node.textContent.trim().replace(CLEAN_TEXT_REGEX, ""));
else return null;
};
const getMovieDescriptions = (el) => {
return el.querySelectorAll(".body--plots .plot-full p, .body--plots .plots .plots-item p").map((movie) => movie.textContent?.trim().replace(CLEAN_TEXT_REGEX, ""));
};
const parseMoviePeople = (el) => {
return el.querySelectorAll("a").filter((x) => x.classNames.length === 0).map((person) => {
return {
id: parseIdFromUrl(person.attributes.href),
name: person.innerText.trim(),
url: `https://www.csfd.cz${person.attributes.href}`
};
});
};
const getMovieCreators = (el, options) => {
const creators = {
directors: [],
writers: [],
cinematography: [],
music: [],
actors: [],
basedOn: [],
producers: [],
filmEditing: [],
costumeDesign: [],
productionDesign: [],
sound: []
};
const groups = el.querySelectorAll(".creators h4");
for (const group of groups) {
let text = group.textContent.trim();
if (text.endsWith(":")) text = text.slice(0, -1);
const key = REVERSE_CREATOR_LABELS.get(text);
if (key) {
if (group.parentNode) creators[key] = parseMoviePeople(group.parentNode);
}
}
return creators;
};
const getSeasonsOrEpisodes = (el) => {
const childrenList = el.querySelector(".film-episodes-list");
if (!childrenList) return null;
const childrenNodes = childrenList.querySelectorAll(".film-title-inline");
if (!childrenNodes?.length) return [];
return childrenNodes.map((season) => {
const nameContainer = season.querySelector(".film-title-name");
const infoContainer = season.querySelector(".info");
const href = nameContainer?.getAttribute("href");
const url = href ? href.startsWith("/") ? `https://www.csfd.cz${href}` : href : null;
return {
id: parseLastIdFromUrl(href || ""),
title: nameContainer?.textContent?.trim() || null,
url,
info: infoContainer?.textContent?.replace(/[{()}]/g, "").trim() || null
};
});
};
const getEpisodeCode = (el) => {
const filmHeaderName = el.querySelector(".film-header-name h1");
if (!filmHeaderName) return null;
const match = (filmHeaderName.textContent?.trim() || "").match(/\(([^)]+)\)/);
return match ? match[1] : null;
};
const detectSeasonOrEpisodeListType = (el) => {
const episodesList = el.querySelector(".film-episodes-list");
if (!episodesList) return null;
const headerText = (episodesList.closest(".updated-box") || episodesList.closest("section") || el).querySelector(".updated-box-header h3")?.textContent?.trim() ?? "";
if (headerText.includes("Série")) return "seasons";
if (headerText.includes("Epizody")) return "episodes";
return null;
};
const getSeasonOrEpisodeParent = (el) => {
let parents = el.querySelectorAll(".film-series-content h2 a");
if (parents.length === 0) parents = el.querySelectorAll(".film-header-name h1 a");
if (parents.length === 0) return null;
const [parentSeries, parentSeason] = parents;
const seriesId = parseIdFromUrl(parentSeries?.getAttribute("href"));
const seasonId = parseLastIdFromUrl(parentSeason?.getAttribute("href") || "");
const seriesTitle = parentSeries?.textContent?.trim() || null;
const seasonTitle = parentSeason?.textContent?.trim() || null;
const series = seriesId && seriesTitle ? {
id: seriesId,
title: seriesTitle
} : null;
const season = seasonId && seasonTitle ? {
id: seasonId,
title: seasonTitle
} : null;
if (!series && !season) return null;
return {
series,
season
};
};
const getMovieType = (el) => {
return parseFilmType(el.querySelector(".film-header-name .type")?.innerText?.replace(/[{()}]/g, "").split("\n")[0].trim() || "film");
};
const getMovieVods = (el) => {
let vods = [];
if (el) vods = el.querySelectorAll(".box-film-vod .vod-badge-link").map((btn) => {
return {
title: btn.textContent.trim(),
url: btn.attributes.href
};
});
return vods.length ? vods : [];
};
const getBoxContent = (el, box) => {
return el.querySelectorAll("section .updated-box-header").find((header) => header.querySelector("h3")?.textContent.trim() === box || header.querySelector("h2")?.textContent.trim() === box)?.parentNode;
};
const getMovieBoxMovies = (el, boxName) => {
const movieListItem = [];
const movieTitleNodes = getBoxContent(el, boxName)?.querySelectorAll(".article-header .film-title-name");
if (movieTitleNodes?.length) for (const item of movieTitleNodes) movieListItem.push({
id: parseIdFromUrl(item.attributes.href),
title: item.textContent.trim(),
url: `https://www.csfd.cz${item.attributes.href}`
});
return movieListItem;
};
const getMoviePremieres = (el) => {
const premiereNodes = el.querySelectorAll(".box-premieres li");
const premiere = [];
for (const premiereNode of premiereNodes) {
const title = premiereNode.querySelector("p + span").attributes.title;
if (title) {
const [dateRaw, ...company] = title?.split(" ");
const date = parseDate(dateRaw);
if (date) premiere.push({
country: premiereNode.querySelector(".flag")?.attributes.title || null,
format: premiereNode.querySelector("p").textContent.trim()?.split(" od")[0],
date,
company: company.join(" ")
});
}
}
return premiere;
};
const getMovieTags = (el) => {
return el.querySelectorAll(".updated-box-content-tags a").map((tag) => tag.textContent.trim());
};
//#endregion
export { detectSeasonOrEpisodeListType, getEpisodeCode, getMovieBoxMovies, getMovieColorRating, getMovieCreators, getMovieDescriptions, getMovieDuration, getMovieGenres, getMovieOrigins, getMoviePoster, getMoviePremieres, getMovieRandomPhoto, getMovieRating, getMovieRatingCount, getMovieTags, getMovieTitle, getMovieTitlesOther, getMovieTrivia, getMovieType, getMovieVods, getMovieYear, getSeasonOrEpisodeParent, getSeasonsOrEpisodes, getSeriesAndSeasonTitle };
//# sourceMappingURL=movie.helper.js.map