UNPKG

@stack.thefennec.dev/telegram-export-parser

Version:

TypeScript library for parsing Telegram Desktop's data export with full type safety

339 lines 10.9 kB
/** * @fileoverview Actor parsing utilities for extracting user information from Telegram export data. * * This module provides sophisticated actor extraction using multiple strategies with confidence scoring. * It handles various data formats and edge cases in Telegram exports to reliably identify message senders, * service actors, bots, and channel authors. * * ## Key Features * * - **Multi-strategy parsing** - Tries multiple approaches to extract actor data * - **Confidence scoring** - Rates parsing reliability (0-100 scale) * - **Fallback handling** - Graceful degradation when data is incomplete * - **Type detection** - Automatically determines if actor is user, bot, channel, etc. * - **Data normalization** - Cleans and standardizes actor information * * @example Basic Usage * ```typescript * import { parseActor, parseMessageSender } from './parsers/actors' * * // Parse from raw export data * const actor = parseActor({ * from: 'John Doe', * fromId: 'user123456789' * }) * * // Parse message sender specifically * const sender = parseMessageSender('Jane Smith', 'user987654321') * ``` * * @example Advanced Usage * ```typescript * import { parseActorWithConfidence, parseMessageActors } from './parsers/actors' * * // Get confidence scoring for debugging * const result = parseActorWithConfidence(rawData) * console.log(`Confidence: ${result.confidence}%, Source: ${result.source}`) * * // Parse all actors from a message * const primaryActor = parseMessageActors(rawTelegramMessage) * ``` */ import type { Actor } from '../types'; /** * Raw actor data as it appears in Telegram export files. * * This interface captures all possible fields where actor information * might be found across different message types and export formats. * The parser uses multiple strategies to extract meaningful data from * these optional and sometimes inconsistent fields. * * @example * ```typescript * // Regular message sender * const regularMessage: RawActorData = { * from: 'John Doe', * fromId: 'user123456789' * } * * // Service message actor * const serviceMessage: RawActorData = { * actor: 'Admin User', * actorId: 'user987654321' * } * * // Bot message * const botMessage: RawActorData = { * from: 'MyBot', * fromId: 'user555666777', * viaBot: '@helper_bot' * } * ``` */ interface RawActorData { /** Display name from 'from' field (regular messages) */ from?: string; /** Telegram ID from 'from_id' field (regular messages) */ fromId?: string; /** Display name from 'actor' field (service messages) */ actor?: string; /** Telegram ID from 'actor_id' field (service messages) */ actorId?: string; /** Telegram username (without @) */ username?: string; /** Author name (channel posts) */ author?: string; /** Bot username used to send message (via_bot) */ viaBot?: string; /** Numeric user ID (alternative format) */ userId?: number; /** Display name (alternative field) */ displayName?: string; } /** * Result of actor parsing with confidence metadata. * * Contains the parsed actor (if successful) along with reliability information * for debugging and quality assessment. Higher confidence scores indicate * more reliable parsing with complete data. * * @example * ```typescript * const result = parseActorWithConfidence(rawData) * * if (result.confidence > 80) { * console.log('High confidence actor:', result.actor?.displayName) * } else if (result.confidence > 50) { * console.log('Medium confidence, verify:', result.source) * } else { * console.log('Low confidence, needs review') * } * ``` */ interface ActorParseResult { /** The parsed actor object (null if parsing failed) */ actor: Actor | null; /** Confidence score (0-100, higher = more reliable) */ confidence: number; /** Parsing strategy used (for debugging and analytics) */ source: string; } /** * Main actor parsing function with multi-strategy extraction. * * Uses sophisticated parsing strategies to extract actor information from * raw Telegram export data. Automatically selects the best available data * and provides confidence scoring for quality assessment. * * @param data - Raw actor data from Telegram export * @returns Parsed actor object or null if extraction fails * * @example * ```typescript * // Parse regular message sender * const actor = parseActor({ * from: 'John Doe', * fromId: 'user123456789' * }) * * // Parse service message actor * const serviceActor = parseActor({ * actor: 'Admin User', * actorId: 'user987654321' * }) * * // Parse with mixed data * const complexActor = parseActor({ * from: 'Bot Name', * fromId: 'user555666777', * username: 'helper_bot', * viaBot: '@another_bot' * }) * ``` */ export declare const parseActor: (data: RawActorData) => Actor | null; /** * Parse regular message sender from 'from' and 'from_id' fields. * * Optimized for the most common case of regular user messages. * Provides clean API for message parsing code. * * @param from - Display name of the sender * @param fromId - Telegram ID of the sender * @returns Parsed actor or null if insufficient data * * @example * ```typescript * const sender = parseMessageSender('Jane Doe', 'user987654321') * console.log(sender?.displayName) // 'Jane Doe' * console.log(sender?.id) // 987654321 * ``` */ export declare const parseMessageSender: (from?: string, fromId?: string) => Actor | null; /** * Parse service message actor from 'actor' and 'actor_id' fields. * * Service messages represent system actions like user joins, calls, etc. * This function specifically handles the actor who performed the action. * * @param actor - Display name of the service actor * @param actorId - Telegram ID of the service actor * @returns Parsed actor or null if insufficient data * * @example * ```typescript * const serviceActor = parseServiceActor('Admin User', 'user123456789') * // Use for: "Admin User invited 3 users to the group" * ``` */ export declare const parseServiceActor: (actor?: string, actorId?: string) => Actor | null; /** * Parse bot actor from 'via_bot' field. * * When messages are sent through bots, the via_bot field contains * the bot's username. This function extracts that bot information. * * @param viaBot - Bot username (may include @ prefix) * @returns Parsed bot actor or null if invalid * * @example * ```typescript * const bot = parseViaBot('@helper_bot') * console.log(bot?.type) // 'bot' * console.log(bot?.username) // 'helper_bot' * ``` */ export declare const parseViaBot: (viaBot?: string) => Actor | null; /** * Parse channel author from 'author' field. * * In channel posts, the author field indicates who wrote the content * on behalf of the channel. Useful for attributed channel content. * * @param author - Author name from channel post * @param fromId - Optional channel/author ID * @returns Parsed channel author or null if invalid * * @example * ```typescript * const author = parseChannelAuthor('Content Creator', 'channel123456789') * console.log(author?.type) // 'channel_author' * console.log(author?.authoredBy) // 'Content Creator' * ``` */ export declare const parseChannelAuthor: (author?: string, fromId?: string) => Actor | null; /** * Parse actor from username mention data. * * Useful when processing mentions within message content or * when only username information is available. * * @param username - Username (with or without @ prefix) * @param userId - Optional numeric user ID * @returns Parsed actor or null if invalid username * * @example * ```typescript * const mentioned = parseUsernameMention('@john_doe', 123456789) * const usernameOnly = parseUsernameMention('jane_smith') // Lower confidence * ``` */ export declare const parseUsernameMention: (username?: string, userId?: number) => Actor | null; /** * Parse multiple actors from an array of raw data. * * Efficiently processes multiple actor entries and filters out failed parses. * Useful for batch processing of export data. * * @param dataArray - Array of raw actor data objects * @returns Array of successfully parsed actors (failures are excluded) * * @example * ```typescript * const rawActors = [ * { from: 'User 1', fromId: 'user111' }, * { from: 'User 2', fromId: 'user222' }, * { from: '', fromId: '' }, // This will be filtered out * ] * * const actors = parseActors(rawActors) * console.log(actors.length) // 2 (invalid entry filtered out) * ``` */ export declare const parseActors: (dataArray: RawActorData[]) => Actor[]; /** * Parse actor with detailed confidence and debugging information. * * Returns full parsing result including confidence score and strategy used. * Useful for debugging, quality assessment, and analytics. * * @param data - Raw actor data from Telegram export * @returns Complete parse result with confidence metadata * * @example * ```typescript * const result = parseActorWithConfidence({ * from: 'John Doe', * fromId: 'user123456789' * }) * * console.log(`Actor: ${result.actor?.displayName}`) * console.log(`Confidence: ${result.confidence}%`) * console.log(`Strategy: ${result.source}`) * * if (result.confidence < 70) { * console.log('Low confidence - review data quality') * } * ``` */ export declare const parseActorWithConfidence: (data: RawActorData) => ActorParseResult; /** * Extract the primary actor from a complete raw Telegram message. * * Convenience function that tries multiple actor fields from a message * and returns the most relevant one based on priority logic. * * **Priority Order:** * 1. Message sender (from/from_id) - highest priority * 2. Service actor (actor/actor_id) * 3. Channel author (author) * 4. Via bot (via_bot) - lowest priority * * @param rawMessage - Raw message object from Telegram export * @returns Most relevant actor or null if no valid actor found * * @example * ```typescript * // Regular user message * const userMsg = { * from: 'John Doe', * from_id: 'user123456789' * } * const actor1 = parseMessageActors(userMsg) // Returns John Doe * * // Service message with actor * const serviceMsg = { * actor: 'Admin User', * actor_id: 'user987654321' * } * const actor2 = parseMessageActors(serviceMsg) // Returns Admin User * * // Complex message with multiple actors * const complexMsg = { * from: 'Bot Name', * from_id: 'user555666777', * via_bot: '@helper_bot' * } * const actor3 = parseMessageActors(complexMsg) // Returns Bot Name (higher priority) * ``` */ export declare const parseMessageActors: (rawMessage: { from?: string; from_id?: string; actor?: string; actor_id?: string; author?: string; via_bot?: string; }) => Actor | null; export {}; //# sourceMappingURL=actors.d.ts.map