UNPKG

webpage_quality_analyzer

Version:

High-performance webpage quality analyzer with 115 comprehensive metrics - Rust library with WASM, C++, and Python bindings

682 lines (681 loc) 20.9 kB
/* tslint:disable */ /* eslint-disable */ /** * Initialize the WASM module (call this once when loading) * * Sets up panic hooks and other initialization */ export function init(): void; /** * Get the library version */ export function getVersion(): string; /** * Get version changelog for current version */ export function getVersionChangelog(): string; /** * WASM wrapper for the webpage quality analyzer * * This provides a JavaScript-friendly interface to the full Rust analyzer, * bridging async Rust to JavaScript Promises via wasm-bindgen-futures. */ export class WasmAnalyzer { free(): void; /** * Create a new analyzer with default configuration */ constructor(); /** * Create analyzer with custom configuration * * # Arguments * * `profile` - Optional profile name ("default", "news", "blog", etc.) * * `enable_nlp` - Enable NLP features (default: true) * * `add_report` - Include detailed report sections (default: true) */ static withConfig(profile?: string | null, enable_nlp?: boolean | null, add_report?: boolean | null): WasmAnalyzer; /** * Analyze HTML content * * # Arguments * * `html` - HTML content as string * * # Returns * Promise that resolves to a PageQualityReport JSON object * * # Example (JavaScript) * ```js * const analyzer = new WasmAnalyzer(); * const report = await analyzer.analyze('<html>...</html>'); * console.log(`Score: ${report.score}, Quality: ${report.verdict}`); * ``` */ analyze(html: string): Promise<any>; /** * Analyze HTML with a specific URL context * * # Arguments * * `url` - URL of the page (used for context, not fetched) * * `html` - HTML content as string * * # Returns * Promise that resolves to a PageQualityReport JSON object */ analyzeWithUrl(url: string, html: string): Promise<any>; /** * Fetch and analyze a URL using browser's fetch API * * # Arguments * * `url` - URL to fetch and analyze * * # Returns * Promise that resolves to a PageQualityReport JSON object * * # Example (JavaScript) * ```js * const analyzer = new WasmAnalyzer(); * const report = await analyzer.fetchAndAnalyze('https://example.com'); * ``` */ fetchAndAnalyze(url: string): Promise<any>; /** * Get available profile names * * # Returns * Array of profile names as strings (from enhanced_profiles.json) */ static getAvailableProfiles(): string[]; /** * Get version information */ static getVersion(): string; /** * Analyze HTML with custom field selection * * This method allows you to select only specific fields from the analysis * report, reducing bandwidth usage by 60-70% for production deployments. * * # Arguments * * `html` - HTML content to analyze * * `fields` - Array of field names to include (e.g., ['url', 'score', 'metadata']) * * # Returns * Promise that resolves to a filtered PageQualityReport with only requested fields * * # Example (JavaScript) * ```js * const analyzer = new WasmAnalyzer(); * const report = await analyzer.analyzeWithFields(html, [ * 'url', * 'score', * 'metadata', * 'processed_document' * ]); * // Returns only these 4 fields (60-70% smaller JSON) * ``` */ analyzeWithFields(html: string, fields: string[]): Promise<any>; /** * Analyze HTML with advanced field selector * * Provides full control over field selection including sections, * include/exclude rules, and nested field access. * * # Arguments * * `html` - HTML content to analyze * * `selector` - WasmFieldSelector with custom rules * * # Returns * Promise that resolves to a filtered PageQualityReport * * # Example (JavaScript) * ```js * const selector = new WasmFieldSelector() * .includeFields(['url', 'score']) * .includeSections(['metadata']) * .excludeFields(['metadata.og_tags']); * * const report = await analyzer.analyzeWithSelector(html, selector); * ``` */ analyzeWithSelector(html: string, selector: WasmFieldSelector): Promise<any>; /** * Create analyzer from JSON configuration * * Since WASM cannot directly read files, pass the config as a JSON string. * In browser environments, fetch the config file and pass its content. * * # Arguments * * `config_json` - JSON string containing profile configuration * * # Returns * New WasmAnalyzer configured with the provided settings * * # Example (JavaScript - Browser) * ```js * const configJson = await fetch('/configs/my-config.json').then(r => r.text()); * const analyzer = WasmAnalyzer.fromConfig(configJson); * ``` * * # Example (JavaScript - Node.js) * ```js * const fs = require('fs'); * const configJson = fs.readFileSync('config.json', 'utf-8'); * const analyzer = WasmAnalyzer.fromConfig(configJson); * ``` */ static fromConfig(config_json: string): WasmAnalyzer; /** * Get the current profile configuration as JSON string * * Returns the active profile configuration, which can be: * - Saved to a file * - Modified and reloaded * - Shared across team members * * # Returns * JSON string of the current ProfileConfig * * # Example (JavaScript) * ```js * const analyzer = new WasmAnalyzer(); * const configJson = analyzer.getConfig(); * console.log('Current config:', JSON.parse(configJson)); * * // Save to file or modify * localStorage.setItem('analyzer-config', configJson); * ``` */ getConfig(): string; /** * Create a builder for advanced analyzer configuration * * # Returns * WasmAnalyzerBuilder for fluent configuration * * # Example (JavaScript) * ```js * const analyzer = WasmAnalyzer.builder() * .withProfile('news') * .disableMetric('grammar_score') * .build(); * ``` */ static builder(): WasmAnalyzerBuilder; } /** * WASM Analyzer Builder for advanced configuration * * Provides a fluent API for customizing analyzer behavior including: * - Profile selection * - Metric toggling (enable/disable specific metrics) * - Custom thresholds per metric * - NLP and grammar features * * # Example (JavaScript) * ```js * const analyzer = WasmAnalyzer.builder() * .withProfile('news') * .disableMetric('grammar_score') // Skip expensive grammar checking * .setThreshold('word_count', 500, 2000, 5000) * .enableNlp(true) * .build(); * ``` */ export class WasmAnalyzerBuilder { free(): void; /** * Create a new builder with default settings */ constructor(); /** * Set the profile to use (chainable) * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .withProfile('news'); * ``` */ withProfile(profile: string): WasmAnalyzerBuilder; /** * Enable a specific metric (chainable) * * By default, all metrics in the profile are enabled. Use this to * explicitly enable a metric that might be disabled in the profile. * * # Arguments * * `metric_name` - Name of metric (e.g., "word_count", "image_count") * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .enableMetric('word_count') * .enableMetric('readability_score'); * ``` */ enableMetric(metric_name: string): WasmAnalyzerBuilder; /** * Disable a specific metric (chainable) * * Disabling metrics can improve performance by skipping expensive * calculations you don't need. * * # Arguments * * `metric_name` - Name of metric to disable * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .disableMetric('grammar_score') // Skip grammar checking * .disableMetric('link_check'); // Skip external link validation * ``` */ disableMetric(metric_name: string): WasmAnalyzerBuilder; /** * Set custom threshold for a metric (chainable) * * Customize the thresholds used for scoring a metric with full control * over the scoring curve. * * Thresholds define the scoring curve: * - `min`: Below this value, score is 0 * - `optimal_min`: Start of optimal range (100% score) * - `optimal_max`: End of optimal range (100% score) * - `max`: Above this value, score doesn't improve * * # Arguments * * `metric_name` - Name of the metric * * `min` - Minimum acceptable value * * `optimal_min` - Start of optimal range * * `optimal_max` - End of optimal range * * `max` - Maximum useful value * * # Validation * Thresholds must satisfy: min < optimal_min <= optimal_max < max * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .setThreshold( * 'word_count', * 100, // min - articles < 100 words score poorly * 500, // optimal_min - ideal range starts * 2000, // optimal_max - ideal range ends * 5000 // max - diminishing returns after this * ); * ``` */ setThreshold(metric_name: string, min: number, optimal_min: number, optimal_max: number, max: number): WasmAnalyzerBuilder; /** * Set simple threshold for a metric (chainable) * * Convenience method for metrics with a single optimal value rather than a range. * This creates optimal_min == optimal_max. * * # Arguments * * `metric_name` - Name of the metric * * `min` - Minimum acceptable value * * `optimal` - Single optimal value (100% score) * * `max` - Maximum useful value * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .setSimpleThreshold( * 'title_len', * 30, // min - titles < 30 chars are poor * 60, // optimal - 60 chars is perfect * 120 // max - longer titles don't help * ); * ``` */ setSimpleThreshold(metric_name: string, min: number, optimal: number, max: number): WasmAnalyzerBuilder; /** * Set custom weight for a metric (chainable) * * Weights control the relative importance of metrics in scoring. * Default weight is 1.0. Use >1.0 to increase importance, <1.0 to decrease. * * # Arguments * * `metric_name` - Name of the metric to adjust * * `weight` - Multiplier for metric's contribution (0.0-10.0, typically 0.1-3.0) * * # Weight Guidelines * * 0.0 - Disables the metric * * 0.1-0.5 - Significantly reduce importance * * 0.5-0.9 - Moderately reduce importance * * 1.0 - Default (unchanged) * * 1.1-2.0 - Moderately increase importance * * 2.0-3.0 - Significantly increase importance * * >3.0 - Extreme emphasis (use sparingly) * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .setMetricWeight('seo_title_quality', 2.0) // Double SEO importance * .setMetricWeight('grammar_score', 0.5); // Reduce grammar weight * ``` */ setMetricWeight(metric_name: string, weight: number): WasmAnalyzerBuilder; /** * Add a penalty that reduces score when metric falls below threshold * * Penalties help enforce content quality standards by automatically reducing * scores when important metrics don't meet expectations. * * # Parameters * - `penalty_id`: Unique identifier for this penalty * - `metric`: Metric name to monitor (e.g., "word_count", "heading_depth") * - `threshold`: Trigger penalty when metric value is below this * - `penalty_points`: Number of points to subtract (0-100) * - `description`: Human-readable explanation * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .withProfile('content_article') * .addPenaltyBelow('short_content', 'word_count', 300, 10, 'Content too short') * .addPenaltyBelow('shallow_structure', 'heading_depth', 2, 5, 'Insufficient headings'); * ``` */ addPenaltyBelow(penalty_id: string, metric: string, threshold: number, penalty_points: number, description: string): WasmAnalyzerBuilder; /** * Add a penalty that reduces score when metric exceeds threshold * * Useful for penalizing excessive values (e.g., too many links, overly long titles). * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .addPenaltyAbove('title_too_long', 'title_len', 70, 5, 'Title too long for SEO'); * ``` */ addPenaltyAbove(penalty_id: string, metric: string, threshold: number, penalty_points: number, description: string): WasmAnalyzerBuilder; /** * Add a bonus that increases score when metric exceeds excellence threshold * * Bonuses reward exceptional quality, comprehensiveness, or excellence. * * # Parameters * - `bonus_id`: Unique identifier for this bonus * - `metric`: Metric name to monitor * - `threshold`: Grant bonus when metric value exceeds this * - `bonus_points`: Number of points to add (0-50 recommended) * - `description`: Human-readable explanation * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .withProfile('content_article') * .addBonusAbove('comprehensive', 'word_count', 2000, 5, 'Comprehensive content') * .addBonusAbove('deep_structure', 'heading_depth', 4, 3, 'Excellent organization'); * ``` */ addBonusAbove(bonus_id: string, metric: string, threshold: number, bonus_points: number, description: string): WasmAnalyzerBuilder; /** * Add a bonus when multiple metrics are all above thresholds * * Creates a synergy bonus that rewards consistent quality across metrics. * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .addBonusMultiple( * 'seo_excellence', * ['title_len', 'meta_desc_len', 'heading_depth'], * 50, * 5, * 'Excellent SEO across the board' * ); * ``` */ addBonusMultiple(bonus_id: string, metrics: string[], threshold: number, bonus_points: number, description: string): WasmAnalyzerBuilder; /** * Enable or disable NLP features (chainable) * * NLP features include language detection and keyword extraction. * * # Example (JavaScript) * ```js * const builder = new WasmAnalyzerBuilder() * .enableNlp(false); // Disable for faster processing * ``` */ enableNlp(enable: boolean): WasmAnalyzerBuilder; /** * Enable or disable link checking (chainable) * * Note: Link checking in WASM uses browser fetch API and may be subject * to CORS restrictions. Only works when 'linkcheck' feature is enabled. * * # Example * ```js * const builder = new WasmAnalyzerBuilder() * .enableLinkCheck(true); // Enable link validation * ``` */ enableLinkCheck(enable: boolean): WasmAnalyzerBuilder; /** * Enable or disable detailed report sections (chainable) */ addReport(add: boolean): WasmAnalyzerBuilder; /** * Load configuration from JSON string (WASM alternative to from_config_file) * * Since WASM doesn't have file system access, use this method to load * configuration from a JSON string instead. You can fetch the config via * HTTP or embed it in your JavaScript code. * * # Parameters * - `config_json`: JSON string containing the configuration * * # Returns * New WasmAnalyzer instance configured from the JSON * * # Example (JavaScript) * ```js * // Fetch config from server * const configResponse = await fetch('/config/analyzer-config.json'); * const configJson = await configResponse.text(); * const analyzer = WasmAnalyzerBuilder.fromConfigJson(configJson); * * // Or use embedded config * const config = JSON.stringify({ * active_profile: "news", * presets: { * news: { * category_weights: { content: 2.0, seo: 1.5 } * } * } * }); * const analyzer = WasmAnalyzerBuilder.fromConfigJson(config); * ``` */ static fromConfigJson(config_json: string): WasmAnalyzer; /** * Load configuration from YAML string (WASM alternative to from_config_file) * * Since WASM doesn't have file system access, use this method to load * configuration from a YAML string instead. * * # Parameters * - `config_yaml`: YAML string containing the configuration * * # Returns * New WasmAnalyzer instance configured from the YAML * * # Example (JavaScript) * ```js * const configYaml = ` * active_profile: content_article * presets: * content_article: * category_weights: * content: 2.0 * seo: 1.5 * `; * const analyzer = WasmAnalyzerBuilder.fromConfigYaml(configYaml); * ``` */ static fromConfigYaml(config_yaml: string): WasmAnalyzer; /** * Load configuration from TOML string (WASM alternative to from_config_file) * * Since WASM doesn't have file system access, use this method to load * configuration from a TOML string instead. * * # Parameters * - `config_toml`: TOML string containing the configuration * * # Returns * New WasmAnalyzer instance configured from the TOML * * # Example (JavaScript) * ```js * const configToml = ` * active_profile = "blog" * * [presets.blog] * [presets.blog.category_weights] * content = 2.0 * seo = 1.5 * `; * const analyzer = WasmAnalyzerBuilder.fromConfigToml(configToml); * ``` */ static fromConfigToml(config_toml: string): WasmAnalyzer; /** * Build the analyzer with configured settings * * # Returns * WasmAnalyzer instance with custom configuration * * # Example (JavaScript) * ```js * const analyzer = new WasmAnalyzerBuilder() * .withProfile('news') * .disableMetric('grammar_score') * .setThreshold('word_count', 500, 2000, 5000) * .build(); * * const report = await analyzer.analyze(html); * ``` */ build(): WasmAnalyzer; } /** * Batch analyzer for processing multiple HTML documents */ export class WasmBatchAnalyzer { free(): void; /** * Create new batch analyzer */ constructor(); /** * Analyze multiple HTML documents in batch * * # Arguments * * `html_documents` - Array of HTML strings * * # Returns * Promise that resolves to WasmBatchResult * * # Example (JavaScript) * ```js * const batch = new WasmBatchAnalyzer(); * const result = await batch.analyzeBatch([ * '<html>page 1</html>', * '<html>page 2</html>', * ]); * console.log(`${result.getSuccessCount()} succeeded`); * const reports = result.getResults(); * ``` */ analyzeBatch(html_documents: string[]): Promise<WasmBatchResult>; /** * Analyze batch with custom field selection * * Analyze multiple HTML documents and return only selected fields, * optimizing bandwidth for production batch processing. * * # Arguments * * `html_documents` - Array of HTML strings * * `fields` - Array of field names to include * * # Returns * Promise that resolves to array of filtered reports */ analyzeBatchWithFields(html_documents: string[], fields: string[]): Promise<any>; } /** * Batch analysis result container */ export class WasmBatchResult { private constructor(); free(): void; /** * Get number of results */ getCount(): number; /** * Get number of successful analyses */ getSuccessCount(): number; /** * Get number of failed analyses */ getFailureCount(): number; /** * Get all results as JSON array * * Each element is either: * - `{ success: true, report: PageQualityReport }` * - `{ success: false, error: string }` */ getResults(): any; } /** * WASM wrapper for FieldSelector - enables custom output field filtering * * This allows JavaScript/TypeScript to select only specific fields from the * analysis report, reducing JSON payload size by 60-70% for production use. * * # Example (JavaScript) * ```js * const analyzer = new WasmAnalyzer(); * const report = await analyzer.analyzeWithFields(html, ['url', 'score', 'metadata']); * // Returns only the 3 requested fields * ``` */ export class WasmFieldSelector { free(): void; /** * Create a new empty field selector */ constructor(); /** * Add fields to include (chainable) * * # Example (JavaScript) * ```js * const selector = new WasmFieldSelector() * .includeFields(['url', 'score', 'metadata']); * ``` */ includeFields(fields: string[]): WasmFieldSelector; /** * Add fields to exclude (chainable) */ excludeFields(fields: string[]): WasmFieldSelector; /** * Add sections to include (chainable) */ includeSections(sections: string[]): WasmFieldSelector; /** * Add sections to exclude (chainable) */ excludeSections(sections: string[]): WasmFieldSelector; }