mwoffliner
Version:
MediaWiki ZIM scraper
90 lines (89 loc) • 3.47 kB
TypeScript
/// <reference types="node" resolution-mode="require"/>
import type { BackoffStrategy } from 'backoff';
import { AxiosRequestConfig, AxiosResponse } from 'axios';
import S3 from './S3.js';
import { Dump } from './Dump.js';
interface DownloaderOpts {
uaString: string;
speed: number;
reqTimeout: number;
optimisationCacheUrl: string;
s3?: S3;
webp: boolean;
backoffOptions?: BackoffOptions;
mwWikiPath?: string;
insecure?: boolean;
}
interface BackoffOptions {
strategy: BackoffStrategy;
failAfter: number;
retryIf: (error?: any) => boolean;
backoffHandler: (number: number, delay: number, error?: any) => void;
}
/**
* Downloader is a class providing content retrieval functionalities for both Mediawiki and S3 remote instances.
*/
declare class Downloader {
loginCookie: string;
readonly speed: number;
cssDependenceUrls: KVS<boolean>;
readonly webp: boolean;
readonly requestTimeout: number;
readonly basicRequestOptions: AxiosRequestConfig;
readonly arrayBufferRequestOptions: AxiosRequestConfig;
readonly jsonRequestOptions: AxiosRequestConfig;
readonly streamRequestOptions: AxiosRequestConfig;
wikimediaMobileJsDependenciesList: string[];
wikimediaMobileStyleDependenciesList: string[];
private readonly uaString;
private activeRequests;
private maxActiveRequests;
private readonly backoffOptions;
private readonly optimisationCacheUrl;
private s3;
private apiUrlDirector;
private articleUrlDirector;
private mainPageUrlDirector;
private readonly insecure;
constructor({ uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions, insecure }: DownloaderOpts);
private getUrlDirector;
setUrlsDirectors(mainPageRenderer: any, articlesRenderer: any): void;
getArticleUrl(articleId: string): string;
getMainPageUrl(articleId: string): string;
removeEtagWeakPrefix(etag: string): string;
query(): KVS<any>;
getArticleDetailsIds(articleIds: string[], shouldGetThumbnail?: boolean): Promise<QueryMwRet>;
getArticleDetailsNS(ns: number, gapcontinue?: string): Promise<{
gapContinue: string;
articleDetails: QueryMwRet;
}>;
getArticle(webp: boolean, _moduleDependencies: any, articleId: string, articleDetailXId: RKVS<ArticleDetail>, articleRenderer: any, articleUrl: any, dump: Dump, articleDetail?: ArticleDetail, isMainPage?: boolean): Promise<any>;
getJSON<T>(_url: string): Promise<T>;
request<T = any, R extends AxiosResponse<T> = AxiosResponse<T>, D = any>(config: AxiosRequestConfig<D>): Promise<R>;
downloadContent(_url: string, kind: DonwloadKind, retry?: boolean): Promise<{
content: Buffer | string;
contentType: string;
setCookie: string | null;
}>;
canGetUrl(url: string): Promise<boolean>;
private static handleMWWarningsAndErrors;
private getArticleQueryOpts;
private setArticleSubCategories;
private claimRequest;
private releaseRequest;
private getJSONCb;
private getImageMimeType;
private getCompressedBody;
private getContentCb;
private downloadImage;
private errHandler;
private getSubCategories;
private backoffCall;
getModuleDependencies(title: string): Promise<{
jsConfigVars: string;
jsDependenciesList: string[];
styleDependenciesList: string[];
}>;
private streamToBuffer;
}
export default Downloader;