UNPKG

mwn

Version:

JavaScript & TypeScript MediaWiki bot framework for Node.js

676 lines (675 loc) 27.5 kB
/** * * mwn: a MediaWiki bot framework for Node.js * * Copyright (C) 2020 Siddharth VP * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * */ import { AxiosResponse, AxiosInstance } from 'axios'; import * as tough from 'tough-cookie'; import * as OAuth from 'oauth-1.0a'; import { MwnDateStatic } from './date'; import { MwnTitle, MwnTitleStatic } from './title'; import { MwnPageStatic } from './page'; import { MwnWikitextStatic } from './wikitext'; import { MwnUserStatic } from './user'; import { MwnCategoryStatic } from './category'; import { MwnFileStatic } from './file'; import { RawRequestParams } from './core'; import { log, updateLoggingConfig } from './log'; import { MwnError, MwnMissingPageError } from './error'; import { link, Table, template } from './static_utils'; import { sleep } from './utils'; import type { ApiDeleteParams, ApiEditPageParams, ApiMoveParams, ApiParseParams, ApiPurgeParams, ApiQueryAllMessagesParams, ApiQueryAllPagesParams, ApiQueryCategoryMembersParams, ApiQuerySearchParams, ApiQueryUserInfoParams, ApiRollbackParams, ApiUndeleteParams, ApiUploadParams } from 'types-mediawiki-api'; import type { ApiDeleteResponse, ApiEditResponse, ApiMoveResponse, ApiPage, ApiQueryResponse, ApiResponse, ApiRollbackResponse, ApiSearchResult, ApiUndeleteResponse, ApiUploadResponse } from './api_response_types'; export interface MwnOptions { /** Suppress messages, except for error messages and warnings */ silent?: boolean; /** Site API url, example https://en.wikipedia.org/w/api.php */ apiUrl?: string; /** User agent string. Required for WMF wikis, see https://foundation.wikimedia.org/wiki/Policy:User-Agent_policy */ userAgent?: string; /** Bot login username and password, setup using Special:BotPasswords */ username?: string; password?: string; /** OAuth 1.0a credentials */ OAuthCredentials?: { consumerToken: string; consumerSecret: string; accessToken: string; accessSecret: string; }; /** OAuth 2 access token */ OAuth2AccessToken?: string; /** * Max number of times to retry the same request on errors due to * maxlag, wiki being in readonly mode, and other transient errors */ maxRetries?: number; /** Milliseconds to pause before retrying after a transient error */ retryPause?: number; /** Bot emergency shutoff options */ shutoff?: { /** Interval every which to check for shutoff, in milliseconds */ intervalDuration?: number; /** Page to which to check for shutoff */ page?: string; /** Condition satisfied by the page text for the bot to shut off */ condition?: RegExp | ((text: string) => boolean); /** Function to run for the bot to shut off, eg. you can call process.exit(1) here, or shut down more gracefully */ onShutoff?: (text: string) => void; }; /** Default parameters included in every API request */ defaultParams?: ApiParams; /** Suppress logging of warnings received from the API */ suppressAPIWarnings?: boolean; /** Options for the edit() function */ editConfig?: EditConfig; /** Suppress warning about construction of invalid bot.Date objects */ suppressInvalidDateWarning?: boolean; } export type EditTransform = (rev: { content: string; timestamp: string; }) => string | ApiEditPageParams | Promise<string | ApiEditPageParams>; export type EditConfig = { /** Max number of retries on edit conflicts, default: 2 */ conflictRetries?: number; /** Suppress warning on an edit resulting in no change to the page, default: false */ suppressNochangeWarning?: boolean; /** Abort edit if exclusionRegex matches on the page content */ exclusionRegex?: RegExp; }; export type ApiParams = { [param: string]: string | string[] | boolean | number | number[] | Date | File | { stream: NodeJS.ReadableStream; name: string; }; }; export declare class Mwn { /** * Bot instance Login State * Is received from the MW Login API and contains token, userid, etc. */ state: any; /** * Bot instance is logged in or not */ loggedIn: boolean; /** * Bot instance's edit token. Initially set as an invalid token string * so that the badtoken handling logic is invoked if the token is * not set before a query is sent. * @type {string} */ csrfToken: string; /** * Default options. * Should be immutable */ readonly defaultOptions: MwnOptions; /** * Actual, current options of the bot instance * Mix of the default options, the custom options and later changes * @type {Object} */ options: MwnOptions; /** * Cookie jar for the bot instance - holds session and login cookies * @type {tough.CookieJar} */ cookieJar: tough.CookieJar; /** Axios instance for the bot instance. */ axiosInstance: AxiosInstance; static requestDefaults: RawRequestParams; /** * Request options for the axios library. * Change the defaults using setRequestOptions() * @type {Object} */ requestOptions: RawRequestParams; /** * Emergency shutoff config * @type {{hook: NodeJS.Timeout, state: boolean}} */ shutoff: { state: boolean; hook: NodeJS.Timeout; }; hasApiHighLimit: boolean; oauth: OAuth; usingOAuth: boolean; usingOAuth2: boolean; static Error: typeof MwnError; static MissingPageError: typeof MwnMissingPageError; static log: typeof log; static setLoggingConfig: typeof updateLoggingConfig; static link: typeof link; static template: typeof template; static Table: typeof Table; static util: { escapeRegExp: (str: string) => string; escapeHtml: (s: string) => string; rawurlencode: (str: string) => string; wikiUrlencode: (str: string) => string; isIPv4Address: (address: string, allowBlock?: boolean) => boolean; isIPv6Address: (address: string, allowBlock?: boolean) => boolean; isIPAddress: (address: string, allowBlock?: boolean) => boolean; }; /** * Title class associated with the bot instance. * See {@link MwnTitle} interface for methods on title objects. */ Title: MwnTitleStatic; /** * Page class associated with the bot instance. * See {@link MwnPage} interface for methods on page objects. */ Page: MwnPageStatic; /** * Category class associated with the bot instance. * See {@link MwnCategory} interface for methods on category objects. */ Category: MwnCategoryStatic; /** * File class associated with the bot instance. * See {@link MwnFile} interface for methods on file objects. */ File: MwnFileStatic; /** * User class associated with the bot instance. * See {@link MwnUser} interface for methods on user objects. */ User: MwnUserStatic; /** * Wikitext class associated with the bot instance. * See {@link MwnWikitext} interface for methods on wikitext objects. */ Wikitext: MwnWikitextStatic; /** * Date class associated with the bot instance. * See {@link MwnDate} interface for methods on date objects. */ Date: MwnDateStatic; /** * Constructs a new bot instance. Recommended usage is one bot instance for every wiki and user. * A bot instance has its own state (e.g. tokens) that is necessary for some operations. * * @param [customOptions] - Custom options */ constructor(customOptions?: MwnOptions | string); /** * Initialize a bot object. Login to the wiki and fetch editing tokens. If OAuth * credentials are provided, they will be used over BotPassword credentials. * Also fetches the site data needed for parsing and constructing title objects. * @param {Object} config - Bot configurations, including apiUrl, and either the * username and password or the OAuth credentials * @returns {Promise<Mwn>} bot object */ static init(config: MwnOptions): Promise<Mwn>; /** * Set and overwrite mwn options * @param {Object} customOptions */ setOptions(customOptions: MwnOptions): void; /** * Sets the API URL for MediaWiki requests * This can be uses instead of a login, if no actions are used that require login. * @param {string} apiUrl - API url to MediaWiki, e.g. https://en.wikipedia.org/w/api.php */ setApiUrl(apiUrl: string): void; /** * Sets and overwrites the raw request options, used by the axios library * See https://www.npmjs.com/package/axios */ setRequestOptions(customRequestOptions: RawRequestParams): void; /** * Set the default parameters to be sent in API calls. * @param {Object} params - default parameters */ setDefaultParams(params: ApiParams): void; /** * Set your API user agent. See https://meta.wikimedia.org/wiki/User-Agent_policy * Required for WMF wikis. * @param {string} userAgent */ setUserAgent(userAgent: string): void; /** * @private * Determine if we're going to use OAuth for authentication */ private _usingOAuth; /** * Initialize OAuth instance */ initOAuth(): void; /************ CORE REQUESTS ***************/ /** * Executes a raw request * Uses the axios library * @param {Object} requestOptions * @returns {Promise} */ rawRequest(requestOptions: RawRequestParams): Promise<AxiosResponse>; /** * Executes a request with the ability to use custom parameters and custom * request options * @param {Object} params * @param {Object} [customRequestOptions={}] * @returns {Promise} */ request(params: ApiParams, customRequestOptions?: RawRequestParams): Promise<ApiResponse>; query(params: ApiParams, customRequestOptions?: RawRequestParams): Promise<ApiQueryResponse>; /************** CORE FUNCTIONS *******************/ private loginInProgress; /** * Executes a Login * @see https://www.mediawiki.org/wiki/API:Login * @returns {Promise} */ login(loginOptions?: { username?: string; password?: string; apiUrl?: string; }): Promise<ApiResponse>; private loginInternal; /** * Log out of the account. Flushes the cookie jar and clears the saved tokens. * Should not be used if authenticating via OAuth. * @returns {Promise<void>} */ logout(): Promise<void>; /** * Create an account. Only works on wikis without extensions like * ConfirmEdit enabled (hence doesn't work on WMF wikis). * @param username * @param password */ createAccount(username: string, password: string): Promise<any>; /** * Get basic info about the logged-in user * @param [options] * @returns {Promise} */ userinfo(options?: ApiQueryUserInfoParams): Promise<any>; /** * Gets namespace-related information for use in title nested class. * This need not be used if login() is being used. This is for cases * where mwn needs to be used without logging in. * @returns {Promise<void>} */ getSiteInfo(): Promise<void>; /** * Get tokens and saves them in this.state * @returns {Promise<void>} */ getTokens(): Promise<void>; /** * Gets an edit token (also used for most other actions * such as moving and deleting) * This is only compatible with MW >= 1.24 * @returns {Promise<string>} */ getCsrfToken(): Promise<string>; /** * Get tokens and siteinfo (using a single API request) and save them in the bot state. * @returns {Promise<void>} */ getTokensAndSiteInfo(): Promise<void>; /** * Get type of token to be used with an API action * @param {string} action - API action parameter * @returns {Promise<string>} */ getTokenType(action: string): Promise<string>; /** * Get the wiki's server time * @returns {Promise<string>} */ getServerTime(): Promise<string>; /** * Fetch and parse a JSON wikipage * @param {string} title - page title * @returns {Promise<Object>} parsed JSON object */ parseJsonPage(title: string): Promise<any>; /** * Fetch MediaWiki messages * @param messages * @param options */ getMessages(messages: string | string[], options?: ApiQueryAllMessagesParams): Promise<Record<string, string>>; /** * Enable bot emergency shutoff */ enableEmergencyShutoff(shutoffOptions?: { page?: string; intervalDuration?: number; condition?: RegExp | ((text: string) => boolean); onShutoff?: (text: string) => void; }): void; /** * Disable emergency shutoff detection. * Use this only if it was ever enabled. */ disableEmergencyShutoff(): void; /***************** HELPER FUNCTIONS ******************/ /** * Reads the content and metadata of one (or many) pages. * Content from the "main" slot is copied over to every revision object * for easier referencing (`pg.revisions[0].content` can be used instead of * `pg.revisions[0].slots.main.content`). * * @param {string|string[]|number|number[]} titles - for multiple pages use an array * @param {Object} [options] * @returns {Promise<ApiPage>} */ read(titles: string | number | MwnTitle, options?: ApiParams): Promise<ApiPage>; read(titles: string[] | number[] | MwnTitle[], options?: ApiParams): Promise<ApiPage[]>; readGen(titles: string[], options?: ApiParams, batchSize?: number): AsyncGenerator<ApiPage>; /** * @param {string|number|MwnTitle} title - Page title or page ID or MwnTitle object * @param {Function} transform - Callback that prepares the edit. It takes one * argument that is an { content: 'string: page content', timestamp: 'string: * time of last edit' } object. This function should return an object with * edit API parameters or just the updated text, or a promise providing one of * those. * @param {Object} [editConfig] - Overridden edit options. Available options: * conflictRetries, suppressNochangeWarning, exclusionRegex * @return {Promise<Object>} Edit API response */ edit(title: string | number, transform: EditTransform, editConfig?: EditConfig): Promise<ApiEditResponse>; /** * Edit a page without loading it first. Straightforward version of `edit`. * No edit conflict detection. * * @param {string|number} title - title or pageid (as number) * @param {string} content * @param {string} [summary] * @param {object} [options] * @returns {Promise} */ save(title: string | number, content: string, summary?: string, options?: ApiEditPageParams): Promise<ApiEditResponse>; /** * Creates a new pages. Does not edit existing ones * * @param {string} title * @param {string} content * @param {string} [summary] * @param {object} [options] * * @returns {Promise} */ create(title: string, content: string, summary?: string, options?: ApiEditPageParams): Promise<ApiEditResponse>; /** * Post a new section to the page. * * @param {string|number} title - title or pageid (as number) * @param {string} header * @param {string} message wikitext message * @param {Object} [additionalParams] Additional API parameters, e.g. `{ redirect: true }` */ newSection(title: string | number, header: string, message: string, additionalParams?: ApiEditPageParams): Promise<ApiEditResponse>; /** * Deletes a page * * @param {string|number} title - title or pageid (as number) * @param {string} [summary] * @param {object} [options] * @returns {Promise} */ delete(title: string | number, summary: string, options?: ApiDeleteParams): Promise<ApiDeleteResponse>; /** * Undeletes a page. * Note: all deleted revisions of the page will be restored. * * @param {string} title * @param {string} [summary] * @param {object} [options] * @returns {Promise} */ undelete(title: string, summary: string, options?: ApiUndeleteParams): Promise<ApiUndeleteResponse>; /** * Moves a new page * * @param {string} fromtitle * @param {string} totitle * @param {string} [summary] * @param {object} [options] */ move(fromtitle: string, totitle: string, summary: string, options?: ApiMoveParams): Promise<ApiMoveResponse>; /** * Parse wikitext. Convenience method for 'action=parse'. * * @param {string} content Content to parse. * @param {Object} additionalParams Parameters object to set custom settings, e.g. * redirects, sectionpreview. prop should not be overridden. * @return {Promise<string>} */ parseWikitext(content: string, additionalParams?: ApiParseParams): Promise<string>; /** * Parse a given page. Convenience method for 'action=parse'. * * @param {string} title Title of the page to parse * @param {Object} additionalParams Parameters object to set custom settings, e.g. * redirects, sectionpreview. prop should not be overridden. * @return {Promise<string>} */ parseTitle(title: string, additionalParams?: ApiParseParams): Promise<string>; /** * Upload an image from the local disk to the wiki. * If a file with the same name exists, it will be over-written. * @param {string} filepath * @param {string} title * @param {string} text * @param {object} options * @returns {Promise<Object>} */ upload(filepath: string, title: string, text: string, options?: ApiUploadParams): Promise<ApiUploadResponse>; /** * Upload an image from a web URL to the wiki * If a file with the same name exists, it will be over-written, * to disable this behaviour, use `ignorewarning: false` in options. * @param {string} url * @param {string} title * @param {string} text * @param {Object} options * @returns {Promise<Object>} */ uploadFromUrl(url: string, title: string, text: string, options?: ApiUploadParams): Promise<ApiUploadResponse>; /** * Download an image from the wiki. * If you're downloading multiple images, then for better efficiency, you may want * to query the API for the urls of all images in one request, and follow that with * running downloadFromUrl for each one. * @param {string|number} file - title or page ID * @param {string} [localname] - local path (with file name) to download to, * defaults to current directory with same file name as on the wiki. * @returns {Promise<void>} */ download(file: string | number, localname: string): Promise<void>; /** * Download an image from a URL. * @param {string} url * @param {string} [localName] - local path (with file name) to download to, * defaults to current directory with same file name as that of the web image. * @returns {Promise<void>} */ downloadFromUrl(url: string, localName?: string): Promise<void>; saveOption(option: string, value: string): Promise<ApiResponse>; saveOptions(options: Record<string, string>): Promise<ApiResponse>; /** * Convenience method for `action=rollback`. * * @param {string|number} page - page title or page id as number or MwnTitle object * @param {string} user * @param {Object} [params] Additional parameters * @return {Promise} */ rollback(page: string | number, user: string, params?: ApiRollbackParams): Promise<ApiRollbackResponse>; /** * Purge one or more pages (max 500 for bots, 50 for others) * * @param {String[]|String|number[]|number} titles - page titles or page ids * @param {Object} options * @returns {Promise} */ purge(titles: string[] | string | number[] | number, options?: ApiPurgeParams): Promise<ApiResponse>; /** * Get pages with names beginning with a given prefix * @param {string} prefix * @param {Object} otherParams * * @returns {Promise<string[]>} - array of page titles (upto 5000 or 500) */ getPagesByPrefix(prefix: string, otherParams?: ApiQueryAllPagesParams): Promise<string[]>; /** * Get pages in a category * @param {string} category - name of category, with or without namespace prefix * @param {Object} [otherParams] * @returns {Promise<string[]>} */ getPagesInCategory(category: string, otherParams?: ApiQueryCategoryMembersParams): Promise<string[]>; /** * Search the wiki. * @param {string} searchTerm * @param {number} limit * @param {("size"|"timestamp"|"wordcount"|"snippet"|"redirectitle"|"sectiontitle"| * "redirectsnippet"|"titlesnippet"|"sectionsnippet"|"categorysnippet")[]} props * @param {Object} otherParams * @returns {Promise<Object>} */ search(searchTerm: string, limit?: number | 'max', props?: ApiQuerySearchParams['srprop'], otherParams?: ApiQuerySearchParams): Promise<ApiSearchResult[]>; /************* BULK PROCESSING FUNCTIONS ************/ /** * Send an API query that automatically continues till the limit is reached. * * @param {Object} query - The API query * @param {number} [limit=10] - limit on the maximum number of API calls to go through * @returns {Promise<Object[]>} - resolved with an array of responses of individual calls. */ continuedQuery(query?: ApiParams, limit?: number): Promise<ApiResponse[]>; /** * Generator to iterate through API response continuations. * @generator * @param {Object} query * @param {number} [limit=Infinity] * @yields {Object} a single page of the response */ continuedQueryGen(query?: ApiParams, limit?: number): AsyncGenerator<ApiResponse>; /** * Function for using API action=query with more than 50/500 items in multi- * input fields. * * Multi-value fields in the query API take multiple inputs as an array * (internally converted to a pipe-delimted string) but with a limit of 500 * (or 50 for users without apihighlimits). * Example: the fields titles, pageids and revids in any query, ususers in * list=users. * * This function allows you to send a query as if this limit didn't exist. * The array given to the multi-input field is split into batches and individual * queries are sent sequentially for each batch. * A promise is returned finally resolved with the array of responses of each * API call. * * The API calls are made via POST instead of GET to avoid potential 414 (URI * too long) errors. * * @param {Object} query - the query object, the multi-input field should * be an array * @param {string} [batchFieldName=titles] - the name of the multi-input field * * @returns {Promise<Object[]>} - promise resolved when all the API queries have * settled, with the array of responses. */ massQuery(query?: ApiParams, batchFieldName?: string): Promise<ApiResponse[]>; /** * Generator version of massQuery(). Iterate through pages of API results. * @param {Object} query * @param {string} [batchFieldName=titles] * @param {number} [batchSize] */ massQueryGen(query: ApiParams, batchFieldName?: string, batchSize?: number): AsyncGenerator<ApiResponse>; /** * Execute an asynchronous function on a large number of pages (or other arbitrary * items). Designed for working with promises. * * @param {Array} list - list of items to execute actions upon. The array would * usually be of page names (strings). * @param {Function} worker - function to execute upon each item in the list. Must * return a promise. * @param {number} [concurrency=5] - number of concurrent operations to take place. * Set this to 1 for sequential operations. Default 5. Set this according to how * expensive the API calls made by worker are. * @param {number} [retries=0] - max number of times failing actions should be retried. * @returns {Promise<Object>} - resolved when all API calls have finished, with object * { failures: [ ...list of failed items... ] } */ batchOperation<T>(list: T[], worker: (item: T, index: number) => Promise<any>, concurrency?: number, retries?: number): Promise<{ failures: { [item: string]: Error; }; }>; /** * Execute an asynchronous function on a number of pages (or other arbitrary items) * sequentially, with a time delay between actions. * Using this with delay=0 is same as using batchOperation with batchSize=1 * Use of seriesBatchOperation() is not recommended for MediaWiki API actions. Use the * normal mwn methods with async-await in a for loop. The request() method has the better * retry functionality (only network errors are retried, other errors are unlikely to go * away on retries). * @param {Array} list * @param {Function} worker - must return a promise * @param {number} [delay=5000] - number of milliseconds of delay * @param {number} [retries=0] - max number of times failing actions should be retried. * @returns {Promise<Object>} - resolved when all API calls have finished, with object * { failures: { failed item: error, failed item2: error2, ... } } */ seriesBatchOperation<T>(list: T[], worker: (item: T, index: number) => Promise<any>, delay?: number, retries?: number): Promise<{ failures: { [item: string]: Error; }; }>; /********** SUPPLEMENTARY FUNCTIONS **************/ /** * Execute an ASK Query * On a wiki that supports them, like semantic-mediawiki * * @param {string} query * @param {string} [apiUrl] * @param {object} [customRequestOptions] * * @returns {Promise} */ askQuery(query: string, apiUrl: string, customRequestOptions?: RawRequestParams): Promise<any>; /** * Executes a SPARQL Query * On a wiki that supports them, like wikidata * * @param {string} query * @param {string} [endpointUrl] * @param {object} [customRequestOptions] * * @returns {Promise} */ sparqlQuery(query: string, endpointUrl: string, customRequestOptions?: RawRequestParams): Promise<any>; /** * Promisified version of setTimeout * @param {number} duration - of sleep in milliseconds */ sleep: typeof sleep; }