UNPKG

fandomscraper

Version:

A package to scrap fandoms wikis characters page. Only scraps the characters info section and the list of all repertoried characters.

1,791 lines (1,726 loc) 83.6 kB
import { JSDOM } from 'jsdom'; // wikia/death-note/data-source.ts var DeathNoteFRDataSource = { gender: "Sexe", images: { identifier: ".mw-parser-output table img", get: function(page) { const elements = page.querySelectorAll(this.identifier); const filteredElements = Array.from(elements).filter((element) => { return element.getAttribute("alt") !== "Tete" && element.getAttribute("alt") !== "Pomme"; }); return filteredElements; } }, episode: "anime", age: "\xE2ge", birthday: "Naissance", affiliation: "affiliation", bloodType: "Groupe sanguin", occupations: "Activit\xE9(s)", height: "Taille", weight: "Poids", relatives: "Famille" }; var DeathNoteENDataSource = { kanji: "name", species: "species", gender: "gender", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: 'table[border="0"] tbody tr td div', get: function(page) { return page.querySelector('table[border="0"] tbody tr td div'); } }, episode: "anime", manga: "manga", age: "age", birthday: "birth", bloodType: "blood", height: "height", weight: "weight", affiliation: "organization", occupations: "occupation", relatives: "family", seiyu: "japanese", voiceActor: "english" }; // wikia/death-note/schemas.ts var DeathNoteFR = { url: "https://deathnote.fandom.com/fr/wiki/Cat%C3%A9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: DeathNoteFRDataSource }; var DeathNoteEN = { url: "https://deathnote.fandom.com/wiki/Category:Manga_characters", pageFormat: "classic", category: "Category:Manga_characters", dataSource: DeathNoteENDataSource }; // wikia/death-note/index.ts var DeathNote = { fr: DeathNoteFR, en: DeathNoteEN }; // wikia/kimetsu-no-yaiba/data-source.ts var DemonSlayerFRDataSource = { kanji: "kanji", romaji: "r\xF4maji", status: "statut", species: "race", gender: "genre", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: 'table[style*="rgba(210, 179, 148, 0.46)"] tbody tr:first-child td:nth-child(2)', get: function(page) { return page.querySelector('table[style*="rgba(210, 179, 148, 0.46)"] tbody tr:first-child td:nth-child(2)'); } }, episode: "anime", manga: "manga", age: "\xE2ge", affiliation: "affiliation", height: "taille", weight: "poids", birthday: "anniversaire", occupations: "occupation", relatives: "relation", seiyu: "japonais" }; var DemonSlayerENDataSource = { kanji: "kanji", romaji: "r\u014Dmaji", status: "status", species: "race", gender: "gender", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: 'table[style*="rgba(210, 179, 148, 0.46)"] tbody tr:first-child td:nth-child(2)', get: function(page) { return page.querySelector('table[style*="rgba(210, 179, 148, 0.46)"] tbody tr:first-child td:nth-child(2)'); } }, episode: "anime_debut", manga: "manga_debut", age: "age", affiliation: "affiliation", occupations: "occupation", relatives: "relative(s)", birthday: "birthday", height: "height", weight: "weight", seiyu: "japanese_va", voiceActor: "english_va" }; // wikia/kimetsu-no-yaiba/schemas.ts var DemonSlayerFR = { url: "https://kimetsu-no-yaiba.fandom.com/fr/wiki/Cat\xE9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: DemonSlayerFRDataSource }; var DemonSlayerEN = { url: "https://kimetsu-no-yaiba.fandom.com/wiki/Characters#Manga", pageFormat: "table-2", dataSource: DemonSlayerENDataSource }; // wikia/kimetsu-no-yaiba/index.ts var DemonSlayer = { fr: DemonSlayerFR, en: DemonSlayerEN }; // wikia/dragon-ball/data-source.ts var DragonBallFRDataSource = { kanji: "Nom Original", status: "Statut", species: "Race", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "Premi\xE8re apparition Anime", manga: "Premi\xE8re apparition Manga", birthday: "Naissance", height: "Taille", weight: "Poids", seiyu: "Voix Japonaise", voiceActor: "Voix Fran\xE7aise", relatives: "Famille" }; var DragonBallENDataSource = { kanji: "JapName", romaji: "RomName", gender: "Gender", species: "Race", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "anime debut", affiliation: "Allegiance", manga: "manga debut", height: "Height", weight: "Weight", occupations: "Occupation", relatives: "FamConnect", birthday: "Date of birth" }; // wikia/dragon-ball/schemas.ts var DragonBallFR = { url: "https://dragonball.fandom.com/fr/wiki/Cat\xE9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: DragonBallFRDataSource }; var DragonBallEN = { url: "https://dragonball.fandom.com/wiki/Characters", pageFormat: "classic", category: "Category:Characters", dataSource: DragonBallENDataSource }; // wikia/dragon-ball/index.ts var DragonBall = { fr: DragonBallFR, en: DragonBallEN }; // wikia/fumetsu-no-anata-e/data-source.ts var FumetsuENDataSource = { kanji: "Kanji", status: "Status", species: "Race", gender: "Sex", images: { identifier: ".mw-parser-output table img", get: function(page) { return page.querySelectorAll(this.identifier); }, ignore: ["https://static.wikia.nocookie.net/fumetsunoanatae/images/0/03/Alert_4.png"] }, quote: { identifier: "#Quotes", get: function(page) { const quotesHeading = page.querySelector("span#Quotes"); if (!quotesHeading) return null; const h2 = quotesHeading.closest("h2"); if (!h2) return null; let next = h2.nextElementSibling; while (next && next.tagName.toLowerCase() !== "ul") { next = next.nextElementSibling; } return next; } }, episode: "Anime", manga: "Manga", age: "Age", affiliation: "Affiliation", birthday: "Birthday", relatives: "Relatives", seiyu: "Japanese Voice", voiceActor: "English Voice" }; // wikia/fumetsu-no-anata-e/schemas.ts var FumetsuEN = { url: "https://fumetsunoanatae.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: FumetsuENDataSource }; // wikia/fumetsu-no-anata-e/index.ts var Fumetsu = { en: FumetsuEN }; // wikia/naruto/data-source.ts var NarutoFRDataSource = { name: "Nom", status: "Statut", gender: "Genre", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: "#Citations", get: function(page) { const heading = page.querySelector("span#Citations"); if (!heading) return null; const h2 = heading.closest("h2"); if (!h2) return null; let next = h2.nextElementSibling; while (next && next.tagName.toLowerCase() !== "ul") { next = next.nextElementSibling; } return next; } }, episode: "D\xE9but anime", manga: "D\xE9but manga", age: "\xC2ge", affiliation: "Affiliation", birthday: "Naissance", height: "Taille", weight: "Poids", relatives: "Famille", bloodType: "Groupe Sanguin", seiyu: "Seiy\xFB", voiceActor: "Doubleur Fran\xE7ais" }; var NarutoENDataSource = { status: "Status", gender: "Sex", images: { identifier: ".mw-parser-output .imagecell img", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: "#Quotes", get: function(page) { const quotesHeading = page.querySelector("span#Quotes"); if (!quotesHeading) return null; const h2 = quotesHeading.closest("h2"); if (!h2) return null; let next = h2.nextElementSibling; while (next && next.tagName.toLowerCase() !== "ul") { next = next.nextElementSibling; } return next; } }, episode: "Anime", manga: "Manga", age: "Age", affiliation: "Affiliation", occupations: "Occupation", birthday: "Birthdate", height: "Height", weight: "Weight", relatives: "Famille", bloodType: "Blood type", seiyu: "Japanese", voiceActor: "English" }; // wikia/naruto/schemas.ts var NarutoFR = { url: "https://naruto.fandom.com/fr/wiki/Cat\xE9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: NarutoFRDataSource }; var NarutoEN = { url: "https://naruto.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: NarutoENDataSource }; // wikia/naruto/index.ts var Naruto = { fr: NarutoFR, en: NarutoEN }; // wikia/one-piece/data-source.ts var OnePieceFRDataSource = { name: "nomf", kanji: "nomj", romaji: "nomr", status: "statut", age: "\xE2ge", images: { identifier: ".wds-tab__content img", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: "table.noprint tbody tr:first-child td:nth-child(2)", get: function(page) { return page.querySelector("table.noprint tbody tr:first-child td:nth-child(2)"); } }, episode: "premi\xE8re", affiliation: "affiliation", occupations: "occupation", height: "taille", bloodType: "groupe sanguin", seiyu: "voj", voiceActor: "vof" }; var OnePieceENDataSource = { name: "ename", kanji: "jname", romaji: "rname", status: "status", age: "age", images: { identifier: ".wds-tab__content img", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "first", affiliation: "affiliation", occupations: "occupation", bloodType: "blood type", height: "height", seiyu: "jva", voiceActor: "Odex eva" }; // wikia/one-piece/schemas.ts var OnePieceFR = { url: "https://onepiece.fandom.com/fr/wiki/Liste_des_Personnages_Canon", pageFormat: "table-1", dataSource: OnePieceFRDataSource }; var OnePieceEN = { url: "https://onepiece.fandom.com/wiki/List_of_Canon_Characters", pageFormat: "table-3", dataSource: OnePieceENDataSource }; // wikia/one-piece/index.ts var OnePiece = { fr: OnePieceFR, en: OnePieceEN }; // wikia/shiki/data-source.ts var ShikiENDataSource = { kanji: "Name Kanji", status: "Status", species: "Race", gender: "Gender", images: { identifier: ".mw-parser-output table img", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: "div.quote", get: function(page) { return page.querySelector("div.quote"); } }, episode: "Anime Debut", age: "Age", occupations: "Occupation" }; // wikia/shiki/schemas.ts var ShikiEN = { url: "https://shiki.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: ShikiENDataSource }; // wikia/shiki/index.ts var Shiki = { en: ShikiEN }; // wikia/promised-neverland/data-source.ts var PromisedNeverlandFRDataSource = { kanji: "kanji", romaji: "r\u014Dmaji", gender: "genre", species: "esp\xE8ce", images: { identifier: ".mw-parser-output table img", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "premi\xE8re_apparition", status: "statut", age: "\xE2ge", birthday: "anniversaire", eyeColor: "yeux", hairColor: "cheveux", height: "taille", affiliation: "Affiliations", relatives: "famille", seiyu: "doubleur" }; var PromisedNeverlandENDataSource = { kanji: "Kanji", romaji: "R\u014Dmaji", gender: "Gender", species: "Species", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: "blockquote.snippet", get: function(page) { return page.querySelector("blockquote.snippet"); } }, manga: "Manga", episode: "Episode", status: "Status", bloodType: "Blood Type", age: "Age", birthday: "Birthday", eyeColor: "Eye Color", hairColor: "Hair Color", height: "Height", affiliation: "Previous Affiliation", voiceActor: "English VA", seiyu: "Japanese VA" }; // wikia/promised-neverland/schemas.ts var PromisedNeverlandFR = { url: "https://the-promised-neverland.fandom.com/fr/wiki/Cat\xE9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: PromisedNeverlandFRDataSource }; var PromisedNeverlandEN = { url: "https://yakusokunoneverland.fandom.com/wiki/Category:Manga_characters", pageFormat: "classic", category: "Category:Manga_characters", dataSource: PromisedNeverlandENDataSource }; // wikia/promised-neverland/index.ts var PromisedNeverland = { fr: PromisedNeverlandFR, en: PromisedNeverlandEN }; // wikia/berserk/data-source.ts var BerserkENDataSource = { gender: "Gender", species: "Kind", images: { identifier: ".mw-parser-output aside img", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "First appearance", status: "Status", affiliation: "Affiliations", occupations: "Occupation(s)", relatives: "Relatives", hairColor: "Hair color", eyeColor: "Eye color" }; // wikia/berserk/schemas.ts var BerserkEN = { url: "https://berserk.fandom.com/wiki/Category:Fantasia_Arc_Characters", pageFormat: "classic", category: "Category:Fantasia_Arc_Characters", dataSource: BerserkENDataSource }; // wikia/berserk/index.ts var Berserk = { en: BerserkEN }; // wikia/jojo/data-source.ts var JojoFRDataSource = { kanji: "Kanji", romaji: "Romaji", species: "Esp\xE8ce", gender: "Genre", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); }, ignore: ["https://static.wikia.nocookie.net/jjba/images/d/d5/NoPicAvailable.png"] }, quote: { identifier: "table.cquote tbody tr:first-child td:nth-child(2)", get: function(page) { return page.querySelector("table.cquote tbody tr:first-child td:nth-child(2)"); } }, episode: "D\xE9but anime", manga: "D\xE9but manga", age: "\xC2ge", birthday: "Date de naissance", zodiac: "Signe", height: "Taille", weight: "Poids", hairColor: "Couleur de cheveux", eyeColor: "Couleur des yeux", occupations: "Profession", affiliation: "Profession" }; var JojoENDataSource = { kanji: "ja_kanji", romaji: "ja_romaji", status: "status", gender: "gender", images: { identifier: ".mw-parser-output aside img", get: function(page) { return page.querySelectorAll(this.identifier); }, ignore: [ "https://static.wikia.nocookie.net/jjba/images/d/d5/NoPicAvailable.png", "https://static.wikia.nocookie.net/jjba/images/b/b1/NoPicAv.png", "https://static.wikia.nocookie.net/jjba/images/9/9e/Flag_of_Japan.svg", "https://static.wikia.nocookie.net/jjba/images/a/a4/Flag_of_the_United_States.svg" ] }, quote: { identifier: "table.cquote tbody tr:first-child td:nth-child(2)", get: function(page) { return page.querySelector("table.cquote tbody tr:first-child td:nth-child(2)") ?? null; } }, episode: "animedebut", manga: "mangadebut", age: "age", birthday: "birthday", zodiac: "zodiac", height: "height", weight: "weight", occupations: "occupation", hairColor: "hair", eyeColor: "eyes", affiliation: "affiliation", seiyu: "seiyuu", voiceActor: "voiceactor" }; // wikia/jojo/schemas.ts var JojoFR = { url: "https://jjba.fandom.com/fr/wiki/Cat\xE9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: JojoFRDataSource }; var JojoEN = { url: "https://jojo.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: JojoENDataSource }; // wikia/jojo/index.ts var Jojo = { fr: JojoFR, en: JojoEN }; // wikia/dororo/data-source.ts var DororoENDataSource = { kanji: "japanese name", age: "age", gender: "gender", species: "species", status: "status", height: "height", weight: "weight", eyeColor: "eyes", hairColor: "hair", images: { identifier: ".pi-image img", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: "blockquote", get: function(page) { return page.querySelector("blockquote"); } }, episode: "debut", relatives: "relatives", voiceActor: "voice eng", seiyu: "voice" }; // wikia/dororo/schemas.ts var DororoEN = { url: "https://dororo.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: DororoENDataSource }; // wikia/dororo/index.ts var Dororo = { en: DororoEN }; // wikia/shingeki-no-kyojin/data-source.ts var ShingekiFRDataSource = { name: "Nom", gender: "Genre", age: "\xC2ge", kanji: "Kanji", birthday: "Anniversaire", height: "Taille", weight: "Poids", species: "Esp\xE8ces", images: { identifier: ".pi-item .pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: ".cquote tr i", get: function(page) { return page.querySelector(".cquote tr i"); } }, episode: "Premi\xE8re Anim\xE9", manga: "Premi\xE8re Manga", seiyu: "Voix Anim\xE9", voiceActor: "Voix Anim\xE9 fr", status: "Statut", affiliation: "Affiliation", relatives: "Affili\xE9s" }; var ShingekiENDataSource = { gender: "Gender", kanji: "Kanji", birthday: "Birthday", height: "Height", weight: "Weight", relatives: "Relatives", images: { identifier: ".pi-item .pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); }, ignore: [ "https://static.wikia.nocookie.net/shingekinokyojin/images/a/a7/Survey_Corps_Logo.png", "https://static.wikia.nocookie.net/shingekinokyojin/images/5/55/Garrison_Logo.png", "https://static.wikia.nocookie.net/shingekinokyojin/images/a/a9/104th_Trainees_Squad_Logo.png", "https://static.wikia.nocookie.net/shingekinokyojin/images/4/4c/Brigade_Logo.png" ] }, manga: "Debut chapter", seiyu: "Voice actor", status: "Status", affiliation: "Affiliation" }; // wikia/shingeki-no-kyojin/schemas.ts var ShingekiFR = { url: "https://attaque-des-titans.fandom.com/fr/wiki/Cat%C3%A9gorie:Humains", pageFormat: "classic", category: "Cat\xE9gorie:Humains", dataSource: ShingekiFRDataSource }; var ShingekiEN = { url: "https://attackontitan.fandom.com/wiki/List_of_characters/Anime", pageFormat: "table-4", dataSource: ShingekiENDataSource }; // wikia/shingeki-no-kyojin/index.ts var Shingeki = { fr: ShingekiFR, en: ShingekiEN }; // wikia/koe-no-katachi/data-source.ts var SilentVoiceFRDataSource = { name: "nom", age: "\xE2ge", birthday: "naissance", zodiac: "signe", gender: "genre", bloodType: "groupesanguin", relatives: "parent\xE9", occupations: "occupation", affiliation: "\xE9tudes", manga: "manga", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, voiceActor: "voix" }; var SilentVoiceENDataSource = { age: "age", birthday: "birthday", zodiac: "sign", gender: "gender", height: "height", bloodType: "bloodtype", relatives: "relatives", occupations: "occupation", affiliation: "affiliation", manga: "manga", voiceActor: "voice_actor", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, quote: { identifier: "dl", get: function(page) { const dl = page.querySelector("dl"); if (!dl) return null; dl.querySelectorAll("b").forEach((b) => b.remove()); return dl; } } }; // wikia/koe-no-katachi/schemas.ts var SilentVoiceFR = { url: "https://koenokatachi.fandom.com/fr/wiki/Cat%C3%A9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: SilentVoiceFRDataSource }; var SilentVoiceEN = { url: "https://koenokatachi.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: SilentVoiceENDataSource }; // wikia/koe-no-katachi/index.ts var SilentVoice = { fr: SilentVoiceFR, en: SilentVoiceEN }; // wikia/hellsing/data-source.ts var HellsingENDataSource = { age: "age", gender: "gender", height: "height", species: "species", status: "status", affiliation: "affiliation", relatives: "family", manga: "firstmanga", episode: "firstova", seiyu: "japanactor", voiceActor: "voiceactor", images: { identifier: ".pi-item .pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } } }; // wikia/hellsing/schemas.ts var HellsingEN = { url: "https://hellsing.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: HellsingENDataSource }; // wikia/hellsing/index.ts var Hellsing = { en: HellsingEN }; // wikia/smurf/data-source.ts var SmurfFRDataSource = { name: "nom", species: "esp\xE8ce", gender: "sexe", images: { identifier: ".pi-image .pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "premier \xE9pisode", age: "\xE2ge", occupations: "profession", voiceActor: "acteur", relatives: "amis" }; var SmurfENDataSource = { species: "race", gender: "gender", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "firt", voiceActor: "voice actor", occupations: "occupation" }; // wikia/smurf/schemas.ts var SmurfEN = { url: "https://smurfs.fandom.com/wiki/Category:Smurfs_Characters", pageFormat: "classic", category: "Category:Smurfs_Characters", dataSource: SmurfENDataSource }; var SmurfFR = { url: "https://schtroumpfs.fandom.com/fr/wiki/Cat%C3%A9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: SmurfFRDataSource }; // wikia/smurf/index.ts var Smurf = { fr: SmurfFR, en: SmurfEN }; // wikia/rick-and-morty/data-source.ts var RickAndMortyFRDataSource = { name: "nom", species: "esp\xE8ce", gender: "genre", status: "statut", images: { identifier: ".pi-image .pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "premi\xE8re_apparition", age: "\xE2ge", occupations: "occupation", voiceActor: "voix", relatives: "famille" }; var RickAndMortyENDataSource = { species: "species", gender: "gender", age: "age", status: "status", occupations: "job", affiliation: "affiliation", relatives: "family", images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, episode: "firt", voiceActor: "voice actor" }; // wikia/rick-and-morty/schemas.ts var RickAndMortyEN = { url: "https://rickandmorty.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: RickAndMortyENDataSource }; var RickAndMortyFR = { url: "https://rick-et-morty.fandom.com/fr/wiki/Cat%C3%A9gorie:Personnages", pageFormat: "classic", category: "Cat\xE9gorie:Personnages", dataSource: RickAndMortyFRDataSource }; // wikia/rick-and-morty/index.ts var RickAndMorty = { fr: RickAndMortyFR, en: RickAndMortyEN }; // wikia/death-parade/data-source.ts var DeathParadeENDataSource = { kanji: "kanji", romaji: "romaji", gender: "gender", age: "age", height: "height", hairColor: "hair", eyeColor: "eye", weight: "weight", status: "status", occupations: "occupation", affiliation: "affiliation", images: { identifier: ".pi-image .pi-image-thumbnail", get: function(page) { return Array.from(page.querySelectorAll(this.identifier)); } }, quote: { identifier: "h2:has(span#Quotes) + ul li", get: function(page) { const quoteSection = page.querySelector('h2:has(span#Quotes), h2:has(span.mw-headline[id="Quotes"])'); if (!quoteSection) return null; const quotesList = quoteSection.nextElementSibling; if (quotesList && quotesList.tagName === "UL") { return quotesList.querySelector("li"); } return null; } }, episode: "debut", bloodType: "blood", seiyu: "jvoice", voiceActor: "evoice" }; // wikia/death-parade/schemas.ts var DeathParadeEN = { url: "https://death-parade.fandom.com/wiki/Category:Characters", pageFormat: "classic", category: "Category:Characters", dataSource: DeathParadeENDataSource }; // wikia/death-parade/index.ts var DeathParade = { en: DeathParadeEN }; // wikia/fate/data-source.ts var FateENDataSource = { species: "type", gender: "gender", height: "height", weight: "weight", bloodType: "bloodt", likes: "likes", dislikes: "dislikes", talent: "talent", enemy: "enemy", imageColor: "imagecol", ancestors: "Ancestor", father: "Father", mother: "Mother", spirit: "spirit", type: "type", source: "source", region: "region", alignement: "alignement", attribute: "attribute", armament: "armament", // Images images: { identifier: ".pi-image-thumbnail", get: function(page) { return page.querySelectorAll(this.identifier); } }, // Quote block (optional) quote: { identifier: "#Quotes", get: function(page) { const quotesHeading = page.querySelector("span#Quotes"); if (!quotesHeading) return null; const h2 = quotesHeading.closest("h2"); if (!h2) return null; let next = h2.nextElementSibling; while (next && next.tagName.toLowerCase() !== "ul") { next = next.nextElementSibling; } return next; } }, kanji: "jname", name: "name", seiyu: "JPvoice", voiceActor: "Engvoice", franchise: "franchise", appearance: "appearances", aka: "aka", class: "class", master: "master", age: "age", affiliation: "affiliation", birthday: "bday", occupations: "occupation", relatives: "relative(s)" }; // wikia/fate/schemas.ts var FateEN = { url: "https://typemoon.fandom.com/wiki/List_of_Servants", pageFormat: "table-5", dataSource: FateENDataSource }; // wikia/fate/index.ts var Fate = { en: FateEN }; // wikia/index.ts var Schemas = { "berserk": Berserk, "death-note": DeathNote, "death-parade": DeathParade, "kimetsu-no-yaiba": DemonSlayer, "koe-no-katachi": SilentVoice, "dororo": Dororo, "dragon-ball": DragonBall, "fumetsu-no-anata-e": Fumetsu, "hellsing": Hellsing, "naruto": Naruto, "jojo": Jojo, "one-piece": OnePiece, "rick-and-morty": RickAndMorty, "promised-neverland": PromisedNeverland, "shiki": Shiki, "shingeki-no-kyojin": Shingeki, "smurf": Smurf, "fate": Fate }; // func/parsing.ts var removeBrackets = (str) => { return str.replace(/\[.*?\]/g, "").trim(); }; var formatName = (name) => { const split = name.split(" "); const formatted = split.map((word) => { return word.charAt(0).toUpperCase() + word.slice(1); }); return formatted.join(" "); }; var formatForUrl = (name) => { return name.replace(/ /g, "_"); }; // types/dynamic.types.ts var availableWikis = [ "berserk", "death-note", "death-parade", "dororo", "dragon-ball", "fumetsu-no-anata-e", "hellsing", "jojo", "kimetsu-no-yaiba", "koe-no-katachi", "naruto", "one-piece", "rick-and-morty", "shiki", "shingeki-no-kyojin", "smurf", "promised-neverland", "fate" ]; // utils/urlUtils.ts var WIKI_PATH_MARKER = "/wiki/"; var CATEGORY_CONTINUE_PARAM = "fandomscraper-cmcontinue"; function getWikiUrl(url) { const parsedUrl = new URL(url); const wikiMarkerIndex = parsedUrl.pathname.indexOf(WIKI_PATH_MARKER); if (wikiMarkerIndex === -1) { return `${parsedUrl.origin}${parsedUrl.pathname.replace(/\/?$/, "/")}`; } const wikiPath = parsedUrl.pathname.slice(0, wikiMarkerIndex + WIKI_PATH_MARKER.length); return new URL(wikiPath, parsedUrl.origin).href; } function getDataUrl(domain, href) { if (!href) { return ""; } return new URL(href, domain).href; } function isFandomPageUrl(url) { const parsedUrl = new URL(url); return parsedUrl.hostname.endsWith("fandom.com") && parsedUrl.pathname.includes(WIKI_PATH_MARKER); } function getWikiApiUrl(url) { const parsedUrl = new URL(url); const wikiMarkerIndex = parsedUrl.pathname.indexOf(WIKI_PATH_MARKER); if (wikiMarkerIndex === -1) { throw new Error(`Cannot build API URL from non-wiki URL: ${url}`); } const apiPath = `${parsedUrl.pathname.slice(0, wikiMarkerIndex)}/api.php`; return new URL(apiPath, parsedUrl.origin).href; } function getWikiTitleFromUrl(url) { const parsedUrl = new URL(url); const wikiMarkerIndex = parsedUrl.pathname.indexOf(WIKI_PATH_MARKER); if (wikiMarkerIndex === -1) { return null; } const encodedTitle = parsedUrl.pathname.slice(wikiMarkerIndex + WIKI_PATH_MARKER.length); if (!encodedTitle) { return null; } return decodeURIComponent(encodedTitle); } function buildWikiPageUrl(sourceUrl, title) { const normalizedTitle = title.split("/").map((segment) => encodeURIComponent(segment.replace(/ /g, "_")).replace(/%3A/gi, ":")).join("/"); return new URL(normalizedTitle, getWikiUrl(sourceUrl)).href; } function getCategoryContinuationToken(url) { return new URL(url).searchParams.get(CATEGORY_CONTINUE_PARAM); } function buildCategoryContinuationUrl(url, continuationToken) { const nextUrl = new URL(url); nextUrl.searchParams.set(CATEGORY_CONTINUE_PARAM, continuationToken); return nextUrl.href; } // services/PageFetcher.ts var PageFetcher = class { /** * Fetch a page from a URL and return its document * @param url - The URL to fetch * @returns The document of the fetched page */ async fetchPage(url) { if (isFandomPageUrl(url)) { try { return await this.fetchFandomPage(url); } catch { return this.fetchHtmlPage(url); } } return this.fetchHtmlPage(url); } async fetchFandomPage(url) { const title = getWikiTitleFromUrl(url); if (title && this.isCategoryTitle(title)) { return this.fetchCategoryPage(url, title); } return this.fetchParsedWikiPage(url); } async fetchParsedWikiPage(url) { const apiUrl = this.buildParseApiUrl(url); const response = await this.fetchJson(apiUrl); const parsedHtml = response.parse?.text?.["*"]; const title = response.parse?.title; if (!parsedHtml || !title) { const errorMessage = response.error?.info || `Unexpected parse API response for ${url}`; throw new Error(errorMessage); } const redirectUrl = this.extractRedirectUrl(parsedHtml, url); if (redirectUrl && redirectUrl !== url) { return this.fetchFandomPage(redirectUrl); } const canonicalUrl = buildWikiPageUrl(url, title); return this.createDocument(parsedHtml, { requestUrl: url, canonicalUrl, title, pageId: response.parse?.pageid ?? 0 }); } async fetchCategoryPage(url, title) { const apiUrl = new URL(getWikiApiUrl(url)); apiUrl.searchParams.set("action", "query"); apiUrl.searchParams.set("list", "categorymembers"); apiUrl.searchParams.set("cmtitle", title); apiUrl.searchParams.set("cmnamespace", "0"); apiUrl.searchParams.set("cmlimit", "500"); apiUrl.searchParams.set("format", "json"); const continuationToken = getCategoryContinuationToken(url); if (continuationToken) { apiUrl.searchParams.set("cmcontinue", continuationToken); } const response = await this.fetchJson(apiUrl.href); const members = response.query?.categorymembers ?? []; const nextToken = response.continue?.cmcontinue; if (!response.query && response.error) { throw new Error(response.error.info || `Unexpected categorymembers API response for ${url}`); } const categoryHtml = this.buildCategoryMembersHtml(url, members, nextToken); return this.createDocument(categoryHtml, { requestUrl: url, canonicalUrl: buildWikiPageUrl(url, title), title }); } async fetchHtmlPage(url) { const response = await fetch(url).catch((err) => { throw new Error(`Error while fetching ${url}: ${err}`); }); const text = await response.text(); if (!response.ok) { throw new Error(`Error while fetching ${url}: HTTP ${response.status}`); } if (this.isCloudflareChallenge(text)) { throw new Error(`Cloudflare blocked the request for ${url}`); } return this.createDocument(text, { requestUrl: url, canonicalUrl: url }); } async fetchJson(url) { const response = await fetch(url, { headers: { "User-Agent": "FandomScraper/1.0" } }).catch((err) => { throw new Error(`Error while fetching ${url}: ${err}`); }); const body = await response.text(); if (!response.ok) { throw new Error(`Error while fetching ${url}: HTTP ${response.status}`); } try { return JSON.parse(body); } catch (error) { throw new Error(`Error while parsing JSON from ${url}: ${error}`); } } buildParseApiUrl(url) { const apiUrl = new URL(getWikiApiUrl(url)); const parsedUrl = new URL(url); const pageId = parsedUrl.searchParams.get("curid"); apiUrl.searchParams.set("action", "parse"); apiUrl.searchParams.set("prop", "text"); apiUrl.searchParams.set("format", "json"); if (pageId) { apiUrl.searchParams.set("pageid", pageId); return apiUrl.href; } const title = getWikiTitleFromUrl(url); if (!title) { throw new Error(`Cannot extract a wiki title from ${url}`); } apiUrl.searchParams.set("page", title); return apiUrl.href; } buildCategoryMembersHtml(url, members, nextToken) { const items = members.map(({ title: pageTitle }) => { const href = buildWikiPageUrl(url, pageTitle); return `<a class="category-page__member-link" href="${href}">${pageTitle}</a>`; }).join(""); const nextLink = nextToken ? `<a class="category-page__pagination-next" href="${buildCategoryContinuationUrl(url, nextToken)}">Next</a>` : ""; return `<!doctype html><html><head></head><body><div id="mw-content-text">${items}${nextLink}</div></body></html>`; } createDocument(html, options) { const document = new JSDOM(html, { url: options.canonicalUrl, contentType: "text/html", referrer: options.requestUrl }).window.document; if (!document.querySelector('link[rel="canonical"]')) { const canonicalElement = document.createElement("link"); canonicalElement.setAttribute("rel", "canonical"); canonicalElement.setAttribute("href", options.canonicalUrl); document.head.appendChild(canonicalElement); } if (options.pageId && !document.querySelector('script[data-fandomscraper-pageid="true"]')) { const pageIdScript = document.createElement("script"); pageIdScript.type = "application/json"; pageIdScript.dataset.fandomscraperPageid = "true"; pageIdScript.textContent = `{"pageId":${options.pageId}}`; document.head.appendChild(pageIdScript); } if (options.title && !document.querySelector(".mw-page-title-main")) { const titleElement = document.createElement("h1"); titleElement.className = "mw-page-title-main"; titleElement.textContent = options.title; document.body.prepend(titleElement); } return document; } isCategoryTitle(title) { return /^(Category|Catégorie):/i.test(title); } isCloudflareChallenge(html) { return /Just a moment/i.test(html) || /challenge-platform/i.test(html) || /cf-browser-verification/i.test(html); } extractRedirectUrl(html, fallbackUrl) { const document = new JSDOM(`<!doctype html><html><body>${html}</body></html>`).window.document; const redirectHref = document.querySelector(".redirectText a[href]")?.getAttribute("href"); if (!redirectHref) { return null; } return new URL(redirectHref, fallbackUrl).href; } }; // utils/extractImageURL.ts function extractImageURL(url) { const regex = /^(https?:\/\/.*\.(?:png|jpe?g|gif|bmp|svg|webp|tiff?))(?=[\/?]|$)/i; const match = url.match(regex); return match ? match[1] : url; } // utils/imageUtils.ts async function convertImageToBase64(imageUrl) { try { const response = await fetch(imageUrl); const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); const base64Image = buffer.toString("base64"); return base64Image; } catch (error) { console.error("Error fetching or converting image:", error); throw error; } } // utils/validationUtils.ts function isValidCharacterPage(page, schemaUrl, extractPageIdFn) { if (!page) return false; const id = extractPageIdFn(page); if (id === 0) return false; try { const schemaHost = new URL(schemaUrl).host; const canonicalHref = page.querySelector('link[rel="canonical"]')?.getAttribute("href") || ""; if (canonicalHref) { const canonicalHost = new URL(canonicalHref).host; if (schemaHost !== canonicalHost) return false; } } catch { } return true; } function setPageVersion(page) { return page.querySelectorAll(".pi-data-value") === null || page.querySelectorAll(".pi-data-value").length < 2; } // services/DataExtractor.ts var DataExtractor = class { /** * Extract page ID from a page document * @param page - The page document * @returns The page ID, or 0 if not found */ extractPageId(page) { const allScripts = page.getElementsByTagName("script"); const script = Array.from(allScripts).find((script2) => script2.textContent?.includes("pageId"))?.textContent; if (!script) { return 0; } const regex = /"pageId":(\d+)/; const match = script.match(regex); if (match && match.length > 1) return parseInt(match[1], 10); return 0; } /** * Extract data from the infobox according to the page version (old or new) * @param page - The page document * @param key - The data source key * @param isOldVersion - Whether the page uses the old infobox format * @returns The element containing the data, or null if not found */ getDataAccordingToVersion(page, key, isOldVersion) { if (isOldVersion) { const identifier = ".mw-parser-output"; const tdElement = Array.from(page.querySelectorAll(identifier + " td")).find((td) => { return td?.textContent?.includes(String(key)); }); if (tdElement?.nextElementSibling) { return tdElement?.nextElementSibling; } const thElement = Array.from(page.querySelectorAll(identifier + " th")).find((th) => { return th?.textContent?.includes(String(key)); }); if (thElement?.nextElementSibling) { return thElement.nextElementSibling; } return null; } else { const byDataSource = page.querySelector(`[data-source="${key}"] .pi-data-value`); if (byDataSource) return byDataSource; if (typeof key === "string") { const normalize = (text) => text.toLowerCase().replace(/\s+/g, "").replace(/[:()]/g, "").replace(/&nbsp;/g, "").replace(/[^a-z0-9]/g, ""); const target = normalize(String(key)); const items = page.querySelectorAll(".portable-infobox .pi-item, .portable-infobox .pi-data"); for (const item of Array.from(items)) { const label = item.querySelector(".pi-data-label")?.textContent || ""; const value = item.querySelector(".pi-data-value"); if (!label || !value) continue; if (normalize(label) === target) { return value; } } } return null; } } /** * Set the value from an element, either as a string or an array * @param element - The element to extract value from * @param inAttrToArray - Whether to return an array * @returns The extracted value */ setValue(element, inAttrToArray) { if (inAttrToArray) { let value = [element.innerHTML]; value = value.flatMap( (item) => item.split(/<br\s*\/?>|<li[^>]*>/).map((value2) => removeBrackets(value2)) ); for (let i = 0; i < value.length; i++) { const decodedValue = value[i].replace(/<[^>]*>?/gm, "").replace(/&nbsp;/g, " ").replace(/&lt;br\s*\/?&gt;/g, ""); value[i] = decodedValue.trim(); } const filteredValue = value.filter((value2) => value2 !== ""); return filteredValue; } else { return removeBrackets(element.textContent || ""); } } /** * Extract the quote text from a given DOM element * @param element - The DOM element from which to extract the quote * @returns The extracted quote as a string, or an array of quotes if the element is a list */ extractQuoteFromElement(element) { if (element.tagName.toLowerCase() === "ul") { const quotes = []; element.querySelectorAll("li").forEach((li) => { const quote = this.extractQuoteFromElement(li); if (typeof quote === "string" && quote.length > 0) { quotes.push(quote); } else if (Array.isArray(quote)) { quotes.push(...quote); } }); return quotes; } const citeElement = element.querySelector("cite, sup"); let quoteText; if (citeElement) { const clone = element.cloneNode(true); const citeClone = clone.querySelector("cite, sup"); if (citeClone) { citeClone.remove(); } quoteText = clone.textContent?.trim() || ""; } else { quoteText = element.textContent?.trim() || ""; } return quoteText; } }; // services/CharacterParser.ts var CharacterParser = class { constructor() { this.dataExtractor = new DataExtractor(); } /** * Parse a character page and extract all data according to the schema * @param page - The character page document * @param schema - The schema defining data sources * @param getBase64 - Whether to convert images to base64 * @param keysAttrToArray - Keys that should be converted to arrays * @param attributes - Specific attributes to extract (optional) * @returns The parsed character data */ async parseCharacterPage(page, schema, getBase64, keysAttrToArray, attributes) { const data = {}; if (attributes) { attributes = attributes.filter((attribute) => Object.keys(schema).includes(attribute)); } if (!attributes || attributes.length === 0) { attributes = Object.keys(schema); } const isOldVersion = setPageVersion(page); for (const key in schema) { if (attributes.includes(key) || keysAttrToArray.includes(key)) { const sourceKey = schema[key]; if (!sourceKey) { continue; } if (key === "images") { const images = await this.parseImages(page, schema.images, getBase64); data[key] = images; } else if (key === "quote") { const quote = this.parseQuote(page, sourceKey); if (quote) { data["quote"] = quote; } } else { const element = this.dataExtractor.getDataAccordingToVersion(page, sourceKey, isOldVersion); if (!element) { continue; } const value = this.dataExtractor.setValue(element, keysAttrToArray.includes(key)); if (!value || value.length === 0) { continue; } data[key] = value; } } } return data; } /** * Parse images from a character page * @param page - The page document * @param imagesConfig - The images configuration from schema * @param getBase64 - Whether to convert images to base64 * @returns Array of image URLs or base64 strings */ async parseImages(page, imagesConfig, getBase64) { if (!imagesConfig) { return []; } const elements = imagesConfig.get(page); if (!elements) { return []; } let candidates = Array.from(elements); if (candidates.length === 0) { const fallbackNodeList = page.querySelectorAll( [ ".portable-infobox img.mw-file-element", ".portable-infobox .image img", ".portable-infobox .pi-image img", ".portable-infobox .pi-image-thumbnail img", ".infobox-image img", "figure .image img", "figure.pi-item.pi-image img" ].join(", ") ); candidates = Array.from(fallbackNodeList); } const pickFromSrcset = (srcset) => { if (!srcset) return null; const parts = srcset.split(",").map((s) => s.trim().split(" ")[0]).filter(Boolean); return parts.length > 0 ? parts[parts.length - 1] : null; }; const resolveImageUrl = (element) => { const isImg = element.tagName.toLowerCase() === "img"; const img = isImg ? element : element.querySelector("img"); let candidate = img?.getAttribute("data-src") || img?.getAttribute("src") || pickFromSrcset(img?.getAttribute("data-srcset") || null) || pickFromSrcset(img?.getAttribute("srcset") || null) || element.getAttribute("data-src") || element.getAttribute("src"); if (!candidate) { const tryAttrs = (el) => { for (const attr of Array.from(el.attributes)) { const v = attr.value; if (!v) continue; if (/^https?:\/\//i.test(v) && /\.(png|jpe?g|gif|bmp|svg|webp|tiff?)(?:[/?].*)?$/i.test(v)) { return v; } } return null; }; candidate = tryAttrs(img || element) || tryAttrs(element) || null; } if (!candidate && element.tagName.toLowerCase() === "a") { const href = element.getAttribute("href"); if (href && /^https?:\/\//i.test(href)) { candidate = href; } } return candidate ? extractImageURL(candidate) : null; }; const images = []; const seen = /* @__PURE__ */ new Set(); for (const element of candidates) { const src = resolveImageUrl(element); if (!src) { continue; } if (seen.has(src)) { continue; } seen.add(src); if (imagesConfig.ignore?.includes(src)) { continue; } if (getBase64) { const b64 = await convertImageToBase64(src); images.push(b64); } else { images.push(src); } } return images; } /** * Parse quote from a character page * @param page - The page document * @param sourceKey - The source key for the quote * @returns The parsed quote */ parseQuote(page, sourceKey) { let quoteElement = null; if (sourceKey && typeof sourceKey === "object" && "get" in sourceKey) { quoteElement = sourceKey.get(page); } else if (typeof sourceKey === "string") { quoteElement = page.querySelector(sourceKey); } if (quoteElement) { return this.dataExtractor.extractQuoteFromElement(quoteElement); } return null; } }; // services/QueryBuilder.ts var QueryBuilder = class { constructor() { this.options = { base64: false, recursive: false, withId: true, limit: 50, offset: 0, ignore: [ "Minor Characters", "Unnamed Characters", "Citoyen d'Honneur de Mahr", "Allies", "Attack on Titan Character Encyclopedia FINAL/Civilians", "Attack on Titan Character Encyclopedia FINAL/Garrison", "Attack on Titan Character Encyclopedia FINAL/Marleyan military" ], attributes: [] }; this.keysAttrToArray = []; } /** * Reset options to default values */ reset() { this.options = { base64: false, recursive: false, withId: true, limit: 50, offset: 0, ignore: [], attributes: [] }; this.keysAttrToArray = []; } /** * Set the limit of characters to get * @param limit - The limit value */ setLimit(limit) { if (limit < 1) { throw new Error("Limit must be greater than 0"); } this.options.limit = limit; } /** * Set the offset of characters to get * @param offset - The offset value */ setOffset(offset) { if (offset < 0) { throw new Error("Offset must be greater than 0"); } this.options.offset = offset; } /** * Set the ignored substrings in the characters names * @param ignore - The substrings to ignore */ setIgnore(ignore) { if (!Array.isArray(ignore)) { throw new Error("Ignore parameter must be an array"); } this.options.ignore = ignore; } /** * Set the attributes to get in the characters * @param attributes - The attributes string (space-separated) */ setAttributes(attributes) { if (typeof attributes !== "string") { throw new Error("Attributes parameter must be a string"); } attributes = attributes.replace(/\s\s+/g, " ")?.trim(); this.options.attributes = attributes.split(" "); } /** * Set the keys of the attributes that should be converted to an array * @param attributes - The attributes string (space-separated) */ setAttrToArray(attributes) { if (typeof attributes !== "string") { throw new Error("Attributes to array parameter must be a string"); } attributes = attributes.