manga-crawl-lib
Version:
A library for scraping manga from various websites.
278 lines (252 loc) • 8.28 kB
text/typescript
/* eslint-disable @typescript-eslint/no-unsafe-argument */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
import axios from 'axios';
import * as cheerio from 'cheerio';
import {
AbstractMangaFactory,
chapter,
genre,
image_chapter,
responseChapter,
responseDetailManga,
responseListManga,
} from '../types/type';
import { useGetDataItemsManga } from '../hooks/getListLatest';
import { Browser } from 'puppeteer';
export class Toonily implements AbstractMangaFactory {
baseUrl: string;
all_genres: genre[];
constructor(baseUrl: string) {
this.baseUrl = baseUrl;
this.all_genres = [] as genre[];
}
browser?: Promise<Browser> | undefined;
getListByGenre(
genre: genre,
page?: number | undefined,
status?: any,
sort?: any
): Promise<responseListManga> {
throw new Error('Method not implemented.');
}
async getListLatestUpdate(
page?: number | undefined
): Promise<responseListManga> {
const axios_get = await axios.get(
`${this.baseUrl}${page !== undefined && page > 1 ? `/page/${page}` : ``}`
);
const $ = cheerio.load(axios_get.data);
const paramsSelector = {
cheerioApi: $,
wrapSelector: '#loop-content > div > div > div',
titleSelector: 'div.item-summary > div.post-title.font-title > h3 > a',
thumbnailSelector: 'div.item-thumb.c-image-hover > a > img',
thumbnailAttr: 'data-src',
hrefSelector: 'div.item-summary > div.post-title.font-title > h3 > a',
};
const data = await useGetDataItemsManga(paramsSelector);
const last_page = $('div.wp-pagenavi').find('a.last').attr('href')!;
const totalPage = Number(
last_page !== undefined
? last_page
.substring(0, last_page.length - 1)
.split('/')
.at(-1)
: page !== undefined
? page
: 1
);
return {
data,
totalData: data.length,
totalPage,
currentPage: page !== undefined ? page : 1,
canNext: page !== undefined ? page < totalPage : 1 < totalPage,
canPrev: page !== undefined ? page > 1 : false,
};
}
async getDetailManga(url: string): Promise<responseDetailManga> {
const $ = cheerio.load((await axios.get(url)).data);
const site_content = $('div.site-content');
const path = url.substring(this.baseUrl.length);
const author = site_content
.find('div.summary-content > div.author-content > a')
.text();
const title = site_content
.find('div.post-content > div.post-title > h1')
.text()
.trim();
const status = site_content
.find('div.post-status > div.post-content_item > div.summary-content')
.text()
.trim();
const genres: genre[] = [] as genre[];
$('div.genres-content > a').each((_i, e) => {
genres.push({
url: $(e).attr('href')!,
name: $(e).text(),
path: $(e).attr('href')!.substring(this.baseUrl.length),
});
});
const views = site_content
.find(
'div.profile-manga.summary-layout-1 > div > div > div > div.tab-summary > div.summary_content_wrap > div > div.post-content > div:nth-child(5) > div.summary-content'
)
.text()
.split('views')[0]
.trim()
.split(' ')
.at(-1)!;
const rate = site_content.find('#averagerate').text().trim();
const rate_number = site_content.find('#countrate').text();
const follows = site_content
.find(
'div.profile-manga.summary-layout-1 > div > div > div > div.tab-summary > div.summary_content_wrap > div > div.post-status > div.manga-action > div.add-bookmark > div.action_detail > span'
)
.text()
.split(' ')[0];
const chapters: chapter[] = [] as chapter[];
site_content
.find('ul.main.version-chap.no-volumn > li.wp-manga-chapter')
.each((i, e) => {
chapters.push({
url: $(e).find('a').attr('href')!,
path: $(e).find('a').attr('href')!.substring(this.baseUrl.length),
parent_href: url,
title: $(e).find('a').text().trim(),
});
});
return {
path,
url,
author,
genres,
rate,
rate_number,
follows,
views,
title,
status,
chapters,
};
}
async getDataChapter(
url_chapter: string,
url?: string | undefined,
path?: string | undefined,
prev_chapter?: chapter | undefined,
next_chapter?: chapter | undefined
): Promise<responseChapter> {
const $ = cheerio.load((await axios.get(url_chapter)).data);
const site_content = $('div.main-col-inner');
const title = site_content
.find('ol.breadcrumb > li:nth-child(3)')
.text()
.trim();
const chapter_data: image_chapter[] = [] as image_chapter[];
site_content
.find('div.entry-content div.reading-content > div.page-break > img')
.each((i, e) => {
chapter_data.push({
_id: i,
src_origin: $(e).attr('data-src')!.trim(),
alt: $(e).attr('alt')!,
});
});
const parent_href = site_content
.find('ol.breadcrumb > li:nth-child(3) > a')
.attr('href')!;
const next_chapter_data: chapter | null = site_content.find(
'div.nav-links > div.nav-next > a'
).length
? {
url: site_content
.find('div.nav-links > div.nav-next > a')
.attr('href')!,
path: site_content
.find('div.nav-links > div.nav-next > a')
.attr('href')!
.substring(this.baseUrl.length),
parent_href: parent_href,
title,
}
: null;
const prev_chapter_data: chapter | null = site_content.find(
'div.nav-links > div.nav-previous > a'
).length
? {
url: site_content
.find('div.nav-links > div.nav-previous > a')
.attr('href')!,
path: site_content
.find('div.nav-links > div.nav-previous > a')
.attr('href')!
.substring(this.baseUrl.length),
parent_href: parent_href,
title,
}
: null;
return {
url: url_chapter,
path: url_chapter.substring(this.baseUrl.length),
title,
chapter_data,
prev_chapter: prev_chapter !== undefined ? null : prev_chapter_data,
next_chapter: next_chapter !== undefined ? null : next_chapter_data,
};
}
async search(
keyword: string,
page?: number | undefined
): Promise<responseListManga> {
keyword = keyword.replace(/\s/g, '-');
const axios_get = await axios.get(
`${this.baseUrl}/search/${keyword}${
page !== undefined && page > 1 ? `/page/${page}` : ``
}`
);
const $ = cheerio.load(axios_get.data);
const wrap_items = $(
'div.page-listing-item > div.row.row-eq-height > div > div'
);
const data: {
_id: number;
title: string;
image_thumbnail: string;
href: string;
}[] = [];
wrap_items.each((i, e) => {
data.push({
_id: i,
title: $(e)
.find('div.item-summary > div.post-title.font-title > h3 > a')
.text(),
image_thumbnail: $(e)
.find('div.item-thumb.c-image-hover > a > img')
.attr('data-src')!,
href: $(e)
.find('div.item-summary > div.post-title.font-title > h3 > a')
.attr('href')!,
});
});
const last_page = $('div.wp-pagenavi').find('a.last').attr('href')!;
const totalPage = Number(
last_page !== undefined
? last_page
.substring(0, last_page.length - 1)
.split('/')
.at(-1)
: page !== undefined
? page
: 1
);
return {
data,
totalData: data.length,
totalPage,
currentPage: page !== undefined ? page : 1,
canNext: page !== undefined ? page < totalPage : 1 < totalPage,
canPrev: page !== undefined ? page > 1 : false,
};
}
}