htmlparser2
Version:
Fast & forgiving HTML/XML parser
87 lines (73 loc) • 2.51 kB
text/typescript
import { Parser, type ParserOptions } from "./Parser.js";
export type { Handler, ParserOptions } from "./Parser.js";
export { Parser } from "./Parser.js";
import {
type Document,
DomHandler,
type DomHandlerOptions,
type Element,
} from "domhandler";
export {
DomHandler,
// Old name for DomHandler
DomHandler as DefaultHandler,
type DomHandlerOptions,
} from "domhandler";
/**
* Combined parser and handler options.
*/
export type Options = ParserOptions & DomHandlerOptions;
// Helper methods
/**
* Parses the data, returns the resulting document.
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM handler.
*/
export function parseDocument(data: string, options?: Options): Document {
const handler = new DomHandler(undefined, options);
new Parser(handler, options).end(data);
return handler.root;
}
/**
* Creates a parser instance, with an attached DOM handler.
* @param callback A callback that will be called once parsing has been completed, with the resulting document.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
export function createDocumentStream(
callback: (error: Error | null, document: Document) => void,
options?: Options,
elementCallback?: (element: Element) => void,
): Parser {
const handler: DomHandler = new DomHandler(
(error: Error | null) => callback(error, handler.root),
options,
elementCallback,
);
return new Parser(handler, options);
}
/*
* All of the following exports exist for backwards-compatibility.
* They should probably be removed eventually.
*/
export * as ElementType from "domelementtype";
export {
type Callbacks as TokenizerCallbacks,
default as Tokenizer,
QuoteType,
} from "./Tokenizer.js";
import { type Feed, getFeed } from "domutils";
export { type Feed, getFeed } from "domutils";
const parseFeedDefaultOptions = { xmlMode: true };
/**
* Parse a feed.
* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
*/
export function parseFeed(
feed: string,
options: Options = parseFeedDefaultOptions,
): Feed | null {
return getFeed(parseDocument(feed, options).children);
}
export * as DomUtils from "domutils";