magnitude-extract
Version:
TypeScript DOM cleaning and structuring library
25 lines (23 loc) • 678 B
text/typescript
/**
* CLI tool for extracting and converting web content to markdown
* Usage: unstructured-ts <url> [options]
*/
interface CLIOptions {
url: string;
output?: string;
includeMetadata?: boolean;
includePageNumbers?: boolean;
includeImages?: boolean;
includeForms?: boolean;
includeLinks?: boolean;
skipNavigation?: boolean;
minTextLength?: number;
noTables?: boolean;
verbose?: boolean;
help?: boolean;
}
declare function fetchHTML(url: string): Promise<string>;
declare function parseArguments(args: string[]): CLIOptions;
declare function main(): Promise<void>;
export { fetchHTML, main, parseArguments };