UNPKG

fandomscraper

Version:

A package to scrap fandoms wikis characters page. Only scraps the characters info section and the list of all repertoried characters.

129 lines (113 loc) 4.85 kB
// Ensure TypeScript treats this file as global declare global { // the different formats available of pages type TPageFormats = 'classic' | 'table-1' | 'table-2' | 'table-3' | 'table-4' | 'table-5' | 'table-6' /* classic: the classic page with the list of characters names table-1: the table with the image on the left table-2: the sorted table with the different categories */ // Custom page format configuration interface ICustomPageFormat { selector: string; // CSS selector to find character elements ignore?: string[]; // List of strings to ignore in character names next?: { type: string; value: string; }; } // Page format can be either a predefined format or a custom configuration type TPageFormat = TPageFormats | ICustomPageFormat; type TDataset = string | string[]; interface IData { id?: number; // id of the character from the wikia pageId value name: string; // name of the character url: string; // url of the character page on the wikia profileImage?: string; // profile image URL from the MediaWiki pageimages API (when available) data?: IDataset; // data of the character } interface IImage { identifier: string; get: Function; ignore?: string[]; } interface IQuote { identifier: string; get: (page: Document) => Element | null; } interface IKnownDatasetFields { name?: TDataset; // name of the character kanji?: string; // kanji name of the character quote?: string | string[]; // quote of the character romaji?: string; // romaji name of the character status?: string; // status of the character (dead, alive, etc.) species?: TDataset; // race gender?: string; // gender of the character images?: string[]; // array of image urls episode?: TDataset; // array of episode names where the character first appeared manga?: string; // manga chapter where the character first appeared age?: TDataset; // age of the character birthday?: string; // birthday of the character bloodType?: string; // blood type of the character zodiac?: string; // zodiac sign of the character hairColor?: string; // hair color of the character eyeColor?: string; // eye color of the character height?: TDataset; // height of the character weight?: TDataset; // weight of the character relatives?: TDataset; // array of relatives of the character affiliation?: string; // affiliation of the character occupations?: TDataset; // array of occupations of the character nationality?: string; // nationality of the character seiyu?: TDataset; // seiyu of the character voiceActor?: TDataset; // voice actor of the character } // IDataset extends known fields and allows custom fields interface IDataset extends IKnownDatasetFields { [key: string]: TDataset | string | string[] | undefined; } // Known data source fields with their selectors interface IKnownDataSourceFields { name?: string; kanji?: string; quote?: string | IQuote; romaji?: string; status?: string; species?: string; gender?: string; images?: IImage; episode?: string; manga?: string; age?: string; affiliation?: string; hairColor?: string; eyeColor?: string; occupations?: string; seiyu?: string; voiceActor?: string; relatives?: string; birthday?: string; zodiac?: string; height?: string; weight?: string; nationality?: string; bloodType?: string; } // Interface of where to scrap the page to get the data of the characters (data-source) // Supports custom fields beyond the known ones interface IDataSource extends IKnownDataSourceFields { [key: string]: string | IImage | IQuote | undefined; } interface ISchema { // the url of the wiki characters list to scrape (ex: 'https://dragonball.fandom.com/wiki/Characters') url: string; // the format of the characters list page (ex: 'classic' or custom configuration) pageFormat: TPageFormat; // the data-source of the wiki (ex: DragonBallFRDataSource) which will be used to scrape the wiki dataSource: IDataSource; // Full MediaWiki category title used to query characters via the generator API. // When set, enables the faster MediaWiki API path instead of HTML scraping. // Examples: 'Category:Characters', 'Catégorie:Personnages' category?: string; } } export { };