UNPKG

@stack.thefennec.dev/telegram-export-parser

Version:

TypeScript library for parsing Telegram Desktop's data export with full type safety

261 lines 10.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ParserFactory = void 0; const errors_1 = require("./errors"); /** * @fileoverview Core parsing engine with priority-based parser selection. * * Implements the factory pattern for universal Telegram export data parsing. * Manages parser registry and provides both single-item and batch processing * with automatic type detection and error resilience. * * @example * ```typescript * import { ParserFactory } from './factory' * import { messageParser, eventParser } from './parsers' * * const factory = new ParserFactory() * factory.register(messageParser) // Priority: 80 * factory.register(eventParser) // Priority: 60 * * // Auto-selects highest priority parser that can handle the data * const result = factory.parse(rawTelegramData) * ``` */ /** * Core parsing engine that manages parser registry and handles data transformation. * * Uses priority-based selection to automatically choose the most appropriate parser * for each data item. Provides both individual and batch processing capabilities * with comprehensive error handling for robust processing of large Telegram exports. * * **Key Features:** * - **Priority-based selection** - Higher priority parsers are tried first * - **Automatic type detection** - No manual parser selection required * - **Error resilience** - Batch operations continue despite individual failures * - **Type safety** - Full TypeScript support with generic return types * - **Extensible** - Easy registration of new parsers without core changes * * **Parser Selection Algorithm:** * 1. Validate input data (must be object) * 2. Sort registered parsers by priority (highest first) * 3. Test each parser's `canHandle()` method * 4. Use first parser that returns `true` and produces non-null result * 5. Throw error if no parser can handle the data * * @example Basic Usage * ```typescript * const factory = new ParserFactory() * * // Register parsers (order doesn't matter - priority determines selection) * factory.register(messageParser) // Priority: 90 * factory.register(entityParser) // Priority: 70 * factory.register(eventParser) // Priority: 60 * * // Parse single items - automatic type detection * const message = factory.parse(rawMessageData) * const entity = factory.parse(rawEntityData) * ``` * * @example Batch Processing * ```typescript * const rawData = loadTelegramExport() // Mixed data types * const { parsed, failed } = factory.parseMany(rawData) * * console.log(`✅ Parsed: ${parsed.length}`) * console.log(`❌ Failed: ${failed.length}`) * * // Handle failures with context * failed.forEach(({ index, error, data }) => { * console.log(`Item ${index} failed:`, error.message) * }) * ``` */ class ParserFactory { /** Internal registry of parsers indexed by unique names */ parsers = new Map(); /** * Register a parser in the factory for automatic selection during parsing. * * Parsers are stored by unique name and sorted by priority during parsing. * Higher priority parsers are tried first, allowing fine-grained control * over parser selection without manual routing logic. * * @template TInput - Input data type the parser accepts * @template TOutput - Parsed result type the parser produces * @param parser - Parser instance to register * @throws {TelegramExportParseError} When parser name already exists * * @example Register parsers with different priorities * ```typescript * // High priority - specific message types * factory.register(photoMessageParser) // Priority: 90 * factory.register(voiceMessageParser) // Priority: 85 * * // Medium priority - general message types * factory.register(textMessageParser) // Priority: 70 * * // Low priority - fallback parsers * factory.register(unknownMessageParser) // Priority: 10 * ``` */ register(parser) { if (this.parsers.has(parser.name)) { throw new errors_1.TelegramExportParseError(`Parser '${parser.name}' already registered`); } this.parsers.set(parser.name, parser); } /** * Parse raw data using priority-based automatic parser selection. * * Core parsing method that implements the factory pattern. Automatically * detects the appropriate parser based on data structure and priority ranking. * Essential for processing mixed Telegram export data without manual routing. * * **Algorithm:** * 1. **Validate** - Ensures input is a valid object * 2. **Sort** - Orders parsers by priority (highest → lowest) * 3. **Test** - Calls `canHandle()` on each parser in order * 4. **Parse** - Uses first parser that accepts the data * 5. **Return** - Type-safe result or throws descriptive error * * @template TOutput - Expected result type (auto-inferred) * @param raw - Raw data object from Telegram export * @returns Parsed and typed result object * @throws {TelegramExportParseError} Invalid input or no suitable parser * * @example Mixed data processing * ```typescript * // All handled automatically by appropriate parsers * const textMsg = factory.parse(rawTextMessage) // → TextMessage * const photoMsg = factory.parse(rawPhotoMessage) // → PhotoMessage * const callEvent = factory.parse(rawCallEvent) // → CallEvent * const boldEntity = factory.parse(rawBoldEntity) // → BoldEntity * ``` * * @example Error handling * ```typescript * try { * const result = factory.parse(unknownData) * } catch (error) { * if (error instanceof TelegramExportParseError) { * console.log('Failed to parse:', error.message) * console.log('Raw data:', error.context) * } * } * ``` */ parse(raw) { if (!raw || typeof raw !== 'object') { throw new errors_1.TelegramExportParseError('Invalid data: expected object'); } // Get parsers sorted by priority (highest first) const sortedParsers = Array.from(this.parsers.values()) .sort((a, b) => b.priority - a.priority); // Try each parser for (const parser of sortedParsers) { if (parser.canHandle(raw)) { const result = parser.parse(raw); if (result !== null) { return result; } } } throw new errors_1.TelegramExportParseError('No suitable parser found'); } /** * Batch parse multiple items with comprehensive error resilience. * * Processes arrays of raw data items while gracefully handling individual * failures. Critical for processing large Telegram exports where some items * may be malformed, corrupted, or unsupported without stopping the entire operation. * * **Error Resilience Strategy:** * - **Continue processing** - Individual failures don't stop the batch * - **Collect failures** - Detailed error information with context preserved * - **Maintain order** - Failed item indices allow correlation with source data * - **Type safety** - Successful results maintain full type information * * @template TOutput - Expected result type for successful parses * @param rawItems - Array of raw data objects to process * @returns Batch processing results with success/failure breakdown * @returns {TOutput[]} returns.parsed - Successfully processed items * @returns {Array<{index: number, data: unknown, error: Error}>} returns.failed - Failed items with context * * @example Large export processing * ```typescript * const telegramExport = loadExportFile() // 50,000+ messages * const { parsed, failed } = factory.parseMany(telegramExport.messages) * * const successRate = (parsed.length / telegramExport.messages.length) * 100 * console.log(`Processing: ${successRate.toFixed(1)}% success rate`) * * // Log failure summary for debugging * const errorTypes = failed.reduce((acc, { error }) => { * acc[error.message] = (acc[error.message] || 0) + 1 * return acc * }, {}) * console.log('Failure breakdown:', errorTypes) * ``` * * @example Selective error handling * ```typescript * const result = factory.parseMany(mixedData) * * // Process successful results * result.parsed.forEach(item => processValidData(item)) * * // Handle specific failure types * result.failed * .filter(f => f.error.message.includes('Invalid date')) * .forEach(f => repairDateFormat(f.data, f.index)) * ``` */ parseMany(rawItems) { const parsed = []; const failed = []; rawItems.forEach((rawItem, index) => { try { parsed.push(this.parse(rawItem)); } catch (error) { failed.push({ index, data: rawItem, error: error }); } }); return { parsed, failed }; } /** * Get all registered parsers for introspection and debugging. * * Provides access to the internal parser registry for advanced use cases * such as performance analysis, parser coverage testing, or debugging * parser selection issues. * * @returns Array of all registered parsers sorted by priority (highest first) * * @example Registry introspection * ```typescript * const parsers = factory.getRegisteredParsers() * console.log(`Registry contains ${parsers.length} parsers:`) * * parsers.forEach(parser => { * console.log(`- ${parser.name} (priority: ${parser.priority})`) * }) * ``` * * @example Performance analysis * ```typescript * const highPriorityParsers = factory.getRegisteredParsers() * .filter(p => p.priority > 80) * * console.log(`${highPriorityParsers.length} high-priority parsers`) * console.log('May need optimization for large batches') * ``` */ getRegisteredParsers() { return Array.from(this.parsers.values()) .sort((a, b) => b.priority - a.priority); } } exports.ParserFactory = ParserFactory; //# sourceMappingURL=factory.js.map