mwoffliner
Version:
MediaWiki ZIM scraper
118 lines (117 loc) • 4.76 kB
TypeScript
import type { BackoffStrategy } from 'backoff';
import { AxiosRequestConfig, AxiosResponse } from 'axios';
import S3 from './S3.js';
import { SiteInfoResponse } from './MediaWiki.js';
import { Dump } from './Dump.js';
import ApiURLDirector from './util/builders/url/api.director.js';
import { Renderer } from './renderers/abstract.renderer.js';
interface DownloaderOpts {
uaString: string;
speed: number;
reqTimeout: number;
optimisationCacheUrl: string;
s3?: S3;
webp: boolean;
backoffOptions?: BackoffOptions;
insecure?: boolean;
}
interface BackoffOptions {
strategy: BackoffStrategy;
failAfter: number;
retryIf: (error?: any) => boolean;
backoffHandler: (number: number, delay: number, error?: any) => void;
}
export declare class DownloadError extends Error {
urlCalled: string | null;
httpReturnCode: number | null;
responseContentType: string | null;
responseData: any;
constructor(message: string, urlCalled: string | null, httpReturnCode: number | null, responseContentType: string | null, responseData: any);
}
export interface DownloadErrorContext {
urlCalled: string;
errorCode: string | null;
httpReturnCode: number | null;
responseContentType: string | null;
responseData: any;
}
/**
* Downloader is a class providing content retrieval functionalities for both Mediawiki and S3 remote instances.
*/
declare class Downloader {
private static instance;
static getInstance(): Downloader;
private _speed;
cssDependenceUrls: KVS<boolean>;
private _webp;
private _requestTimeout;
private _basicRequestOptions;
private _arrayBufferRequestOptions;
private _jsonRequestOptions;
private _streamRequestOptions;
wikimediaMobileJsDependenciesList: string[];
wikimediaMobileStyleDependenciesList: string[];
private uaString;
private backoffOptions;
private optimisationCacheUrl;
private s3;
private _apiUrlDirector;
private cookierJar;
private articleUrlDirector;
private mainPageUrlDirector;
private insecure;
get speed(): number;
get webp(): boolean;
get requestTimeout(): number;
get basicRequestOptions(): AxiosRequestConfig<any>;
get arrayBufferRequestOptions(): AxiosRequestConfig<any>;
get jsonRequestOptions(): AxiosRequestConfig<any>;
get streamRequestOptions(): AxiosRequestConfig<any>;
get apiUrlDirector(): ApiURLDirector;
set init({ uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions, insecure }: DownloaderOpts);
private reset;
private getUrlDirector;
setUrlsDirectors(mainPageRenderer: Renderer, articlesRenderer: Renderer): void;
getArticleUrl(articleId: string, articleUrlOpts?: RendererArticleOpts): string;
getMainPageUrl(articleId: string): string;
removeEtagWeakPrefix(etag: string): string;
querySiteInfo(): Promise<SiteInfoResponse>;
getArticleDetailsIds(articleIds: string[], shouldGetThumbnail?: boolean): Promise<QueryMwRet>;
getArticleDetailsNS(ns: number, gapcontinue?: string): Promise<{
gapContinue: string;
articleDetails: QueryMwRet;
}>;
getLogEvents(letype: string, articleId: string): Promise<any>;
getArticle(articleId: string, articleDetailXId: RKVS<ArticleDetail>, articleRenderer: Renderer, articleUrl: any, dump: Dump, articleDetail?: ArticleDetail): Promise<any>;
getJSON<T>(_url: string): Promise<T>;
request<T = any, R extends AxiosResponse<T> = AxiosResponse<T>, D = any>(config: AxiosRequestConfig<D>): Promise<R>;
get<T = any, R extends AxiosResponse<T> = AxiosResponse<T>, D = any>(url: string, config?: AxiosRequestConfig<D>): Promise<R>;
post<T = any, R extends AxiosResponse<T> = AxiosResponse<T>, D = any>(url: string, data?: D, config?: AxiosRequestConfig<D>): Promise<R>;
downloadContent(_url: string, kind: DonwloadKind, retry?: boolean): Promise<{
content: Buffer | string;
contentType: string;
setCookie: string | null;
}>;
canGetUrl(url: string): Promise<boolean>;
private static handleMWWarningsAndErrors;
private getArticleQueryOpts;
private setArticleSubCategories;
private getJSONCb;
private getImageMimeType;
private getCompressedBody;
private getContentCb;
private downloadImage;
private errHandler;
private getSubCategories;
private backoffCall;
getModuleDependencies(title: string): Promise<{
jsConfigVars: string;
jsDependenciesList: string[];
styleDependenciesList: any[];
}>;
private streamToBuffer;
static extractJsConfigVars(headhtml: string): string;
}
export { Downloader as DownloaderClass };
declare const _default: Downloader;
export default _default;