UNPKG

@hanivanrizky/nestjs-html-parser

Version:

A powerful NestJS HTML parsing service with XPath and CSS selector support, proxy configuration, random user agents, and rich response metadata including headers and status codes

222 lines (221 loc) 6.52 kB
import { DynamicModule, ModuleMetadata, Type } from '@nestjs/common'; import { HtmlParserConfig } from './html-parser.config'; /** * HTML Parser Module for NestJS * * Provides HTML parsing capabilities with XPath and CSS selector support for NestJS applications. * * **Features:** * - XPath and CSS selector extraction * - Proxy support with authentication * - Random user agent rotation * - Rich response metadata * - Verbose logging for debugging * - Retry logic with configurable delays * * @remarks * Always use `HtmlParserModule.forRoot()` or `HtmlParserModule.forRootAsync()` for proper configuration and future-proof usage. * * @example * Basic usage with custom configuration * ```typescript * import { Module } from '@nestjs/common'; * import { HtmlParserModule } from '@hanivanrizky/nestjs-html-parser'; * * @Module({ * imports: [ * HtmlParserModule.forRoot({ * loggerLevel: 'debug' * }) * ], * }) * export class AppModule {} * ``` * * @example * Async configuration with environment variables * ```typescript * import { Module } from '@nestjs/common'; * import { ConfigModule, ConfigService } from '@nestjs/config'; * import { HtmlParserModule } from '@hanivanrizky/nestjs-html-parser'; * * @Module({ * imports: [ * ConfigModule.forRoot(), * HtmlParserModule.forRootAsync({ * imports: [ConfigModule], * useFactory: (configService: ConfigService) => ({ * loggerLevel: configService.get('HTML_PARSER_LOGGER_LEVEL', 'warn') * }), * inject: [ConfigService], * }), * ], * }) * export class AppModule {} * ``` * * @example * Using the service in your application * ```typescript * import { Injectable } from '@nestjs/common'; * import { HtmlParserService } from '@hanivanrizky/nestjs-html-parser'; * * @Injectable() * export class ScrapingService { * constructor(private readonly htmlParser: HtmlParserService) {} * * async scrapeWebsite(url: string) { * const response = await this.htmlParser.fetchHtml(url); * const title = this.htmlParser.extractSingle(response.data, '//title/text()'); * return { title, status: response.status }; * } * } * ``` * * @author Hanivan Rizky Sobari <hanivan20@gmail.com> * @license MIT */ /** * Options for configuring HtmlParserModule asynchronously */ export interface HtmlParserModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> { /** * Use an existing provider that implements HtmlParserConfigFactory */ useExisting?: Type<HtmlParserConfigFactory>; /** * Create a new instance of a class that implements HtmlParserConfigFactory */ useClass?: Type<HtmlParserConfigFactory>; /** * Factory function that returns configuration */ useFactory?: (...args: any[]) => Promise<HtmlParserConfig> | HtmlParserConfig; /** * Dependencies to inject into the factory function */ inject?: any[]; } /** * Factory interface for creating HtmlParserConfig */ export interface HtmlParserConfigFactory { /** * Create configuration for the HTML Parser module * @returns Configuration object or promise that resolves to configuration */ createHtmlParserConfig(): Promise<HtmlParserConfig> | HtmlParserConfig; } /** * HTML Parser Module with default configuration * * When imported directly, provides HtmlParserService with default settings: * - loggerLevel: 'log' * * For custom configuration, use forRoot() or forRootAsync() */ export declare class HtmlParserModule { /** * Configure the HTML Parser Module with custom options * * @param config - Configuration options for the HTML Parser * @returns DynamicModule with configured providers * * @example * Basic configuration * ```typescript * @Module({ * imports: [ * HtmlParserModule.forRoot({ * loggerLevel: 'debug' * }) * ], * }) * export class AppModule {} * ``` * * @example * Using different logger levels * ```typescript * // For production - minimal logging * HtmlParserModule.forRoot({ loggerLevel: 'error' }) * * // For development - detailed logging * HtmlParserModule.forRoot({ loggerLevel: 'debug' }) * * // For testing - verbose logging * HtmlParserModule.forRoot({ loggerLevel: 'verbose' }) * ``` */ static forRoot(config?: HtmlParserConfig): DynamicModule; /** * Configure the HTML Parser Module asynchronously * * Useful when configuration depends on other modules or services that need to be initialized first. * * @param options - Async configuration options * @returns DynamicModule with async configured providers * * @example * Using with ConfigService * ```typescript * @Module({ * imports: [ * ConfigModule.forRoot(), * HtmlParserModule.forRootAsync({ * imports: [ConfigModule], * useFactory: (configService: ConfigService) => ({ * loggerLevel: configService.get('HTML_PARSER_LOGGER_LEVEL', 'warn') * }), * inject: [ConfigService], * }), * ], * }) * export class AppModule {} * ``` * * @example * Using with custom configuration factory * ```typescript * @Injectable() * class HtmlParserConfigService implements HtmlParserConfigFactory { * createHtmlParserConfig(): HtmlParserConfig { * return { * loggerLevel: process.env.NODE_ENV === 'production' ? 'error' : 'debug' * }; * } * } * * @Module({ * imports: [ * HtmlParserModule.forRootAsync({ * useClass: HtmlParserConfigService, * }), * ], * }) * export class AppModule {} * ``` * * @example * Using with existing provider * ```typescript * @Module({ * imports: [ * HtmlParserModule.forRootAsync({ * useExisting: MyExistingConfigService, * }), * ], * }) * export class AppModule {} * ``` */ static forRootAsync(options: HtmlParserModuleAsyncOptions): DynamicModule; /** * Create async providers for the module configuration * * @param options - Async configuration options * @returns Array of providers for async configuration * @private */ private static createAsyncProviders; }