@types/mozilla-readability
Version:
TypeScript definitions for mozilla-readability
146 lines (134 loc) • 5.86 kB
TypeScript
// Type definitions for non-npm package mozilla-readability 0.2
// Project: https://github.com/mozilla/readability
// Definitions by: Charles Vandevoorde <https://github.com/charlesvdv>, Alex Wendland <https://github.com/awendland>
// Definitions: https://github.com/DefinitelyTyped/DefinitelyTyped
// TypeScript Version: 2.2
export = Readability;
/**
* A standalone version of the readability library used for Firefox Reader View.
*
* Note that isProbablyReaderable() was moved into a separate file in https://github.com/mozilla/readability/commit/2620542dd1e8380220d82afa97a2c283ae636e40
* and therefore is no longer part of the Readability class.
*/
declare class Readability {
/**
* ## Usage on the web
*
* To parse a document, you must create a new Readability object from a
* DOM document object, and then call parse(). Here's an example:
*
* ```js
* var article = new Readability(document).parse();
* ```
*
* If you're using Readability on the web, you will likely be able to
* use a document reference from elsewhere (e.g. fetched via XMLHttpRequest,
* in a same-origin <iframe> you have access to, etc.).
*
* ## Usage from node.js
*
* In node.js, you won't generally have a DOM document object. To obtain one, you can use external
* libraries like [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of
* its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do
* not recommend it for general use.
*
* If you're using `jsdom` to create a DOM object, you should ensure that the page doesn't run (page)
* scripts (avoid fetching remote resources etc.) as well as passing it the page's URI as the `url`
* property of the `options` object you pass the `JSDOM` constructor.
*
* ```js
* var JSDOM = require('jsdom').JSDOM;
* var doc = new JSDOM("<body>Here's a bunch of text</body>", {
* url: "https://www.example.com/the-page-i-got-the-source-from",
* });
* let reader = new Readability(doc.window.document);
* let article = reader.parse();
* ```
*/
constructor(doc: Document, options?: Readability.Options);
/**
* Runs readability.
*
* ## Workflow:
*
* 1. Prep the document by removing script tags, css, etc.
* 2. Build readability's DOM tree.
* 3. Grab the article content from the current dom tree.
* 4. Replace the current DOM tree with the new one.
* 5. Read peacefully.
*
* ## Additional notes:
*
* Readability's parse() works by modifying the DOM. This removes some
* elements in the web page. You could avoid this by passing the clone
* of the document object while creating a Readability object.
*
* ```js
* var documentClone = document.cloneNode(true);
* var article = new Readability(documentClone).parse();
* ```
*
* The response will be null if the processing failed (https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L2038)
*/
parse(): Readability.ParseResult | null;
}
declare namespace Readability {
interface Options {
/**
* Control whether log messages are sent to the console
*/
debug?: boolean | undefined;
/**
* Set a maximum size on the documents that will be processed. This size is
* checked before any parsing operations occur. If the number of elements in
* the document exceeds this threshold then an Error will be thrown.
*
* See implementation details at https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L2019
*/
maxElemsToParse?: number | undefined;
nbTopCandidates?: number | undefined;
/**
* Minimum number of characters in the extracted textContent in order to
* consider the article correctly identified. If the threshold is not met then
* the extraction process will automatically run again with different flags.
*
* See implementation details at https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L1208
*
* Changed from wordThreshold in https://github.com/mozilla/readability/commit/3ff9a166fb27928f222c4c0722e730eda412658a
*/
charThreshold?: number | undefined;
/**
* parse() removes the class="" attribute from every element in the given
* subtree, except those that match CLASSES_TO_PRESERVE and
* the classesToPreserve array from the options object.
*/
classesToPreserve?: string[] | undefined;
/**
* By default Readability will strip all classes from the HTML elements in the
* processed article. By setting this to `true` the classes will be retained.
*
* This is a blanket alternative to `classesToPreserve`.
*
* Added in https://github.com/mozilla/readability/commit/2982216913af2c66b0690e88606b03116553ad92
*/
keepClasses?: boolean | undefined;
}
interface ParseResult {
/** Article title */
title: string;
/** Author metadata */
byline: string;
/** Content direction */
dir: string;
/** HTML string of processed article content */
content: string;
/** non-HTML version of `content` */
textContent: string;
/** Length of an article, in characters */
length: number;
/** Article description, or short excerpt from the content */
excerpt: string;
/** Article site name */
siteName: string;
}
}