@stack.thefennec.dev/telegram-export-parser
Version:
TypeScript library for parsing Telegram Desktop's data export with full type safety
339 lines • 10.9 kB
TypeScript
/**
* @fileoverview Actor parsing utilities for extracting user information from Telegram export data.
*
* This module provides sophisticated actor extraction using multiple strategies with confidence scoring.
* It handles various data formats and edge cases in Telegram exports to reliably identify message senders,
* service actors, bots, and channel authors.
*
* ## Key Features
*
* - **Multi-strategy parsing** - Tries multiple approaches to extract actor data
* - **Confidence scoring** - Rates parsing reliability (0-100 scale)
* - **Fallback handling** - Graceful degradation when data is incomplete
* - **Type detection** - Automatically determines if actor is user, bot, channel, etc.
* - **Data normalization** - Cleans and standardizes actor information
*
* @example Basic Usage
* ```typescript
* import { parseActor, parseMessageSender } from './parsers/actors'
*
* // Parse from raw export data
* const actor = parseActor({
* from: 'John Doe',
* fromId: 'user123456789'
* })
*
* // Parse message sender specifically
* const sender = parseMessageSender('Jane Smith', 'user987654321')
* ```
*
* @example Advanced Usage
* ```typescript
* import { parseActorWithConfidence, parseMessageActors } from './parsers/actors'
*
* // Get confidence scoring for debugging
* const result = parseActorWithConfidence(rawData)
* console.log(`Confidence: ${result.confidence}%, Source: ${result.source}`)
*
* // Parse all actors from a message
* const primaryActor = parseMessageActors(rawTelegramMessage)
* ```
*/
import type { Actor } from '../types';
/**
* Raw actor data as it appears in Telegram export files.
*
* This interface captures all possible fields where actor information
* might be found across different message types and export formats.
* The parser uses multiple strategies to extract meaningful data from
* these optional and sometimes inconsistent fields.
*
* @example
* ```typescript
* // Regular message sender
* const regularMessage: RawActorData = {
* from: 'John Doe',
* fromId: 'user123456789'
* }
*
* // Service message actor
* const serviceMessage: RawActorData = {
* actor: 'Admin User',
* actorId: 'user987654321'
* }
*
* // Bot message
* const botMessage: RawActorData = {
* from: 'MyBot',
* fromId: 'user555666777',
* viaBot: '@helper_bot'
* }
* ```
*/
interface RawActorData {
/** Display name from 'from' field (regular messages) */
from?: string;
/** Telegram ID from 'from_id' field (regular messages) */
fromId?: string;
/** Display name from 'actor' field (service messages) */
actor?: string;
/** Telegram ID from 'actor_id' field (service messages) */
actorId?: string;
/** Telegram username (without @) */
username?: string;
/** Author name (channel posts) */
author?: string;
/** Bot username used to send message (via_bot) */
viaBot?: string;
/** Numeric user ID (alternative format) */
userId?: number;
/** Display name (alternative field) */
displayName?: string;
}
/**
* Result of actor parsing with confidence metadata.
*
* Contains the parsed actor (if successful) along with reliability information
* for debugging and quality assessment. Higher confidence scores indicate
* more reliable parsing with complete data.
*
* @example
* ```typescript
* const result = parseActorWithConfidence(rawData)
*
* if (result.confidence > 80) {
* console.log('High confidence actor:', result.actor?.displayName)
* } else if (result.confidence > 50) {
* console.log('Medium confidence, verify:', result.source)
* } else {
* console.log('Low confidence, needs review')
* }
* ```
*/
interface ActorParseResult {
/** The parsed actor object (null if parsing failed) */
actor: Actor | null;
/** Confidence score (0-100, higher = more reliable) */
confidence: number;
/** Parsing strategy used (for debugging and analytics) */
source: string;
}
/**
* Main actor parsing function with multi-strategy extraction.
*
* Uses sophisticated parsing strategies to extract actor information from
* raw Telegram export data. Automatically selects the best available data
* and provides confidence scoring for quality assessment.
*
* @param data - Raw actor data from Telegram export
* @returns Parsed actor object or null if extraction fails
*
* @example
* ```typescript
* // Parse regular message sender
* const actor = parseActor({
* from: 'John Doe',
* fromId: 'user123456789'
* })
*
* // Parse service message actor
* const serviceActor = parseActor({
* actor: 'Admin User',
* actorId: 'user987654321'
* })
*
* // Parse with mixed data
* const complexActor = parseActor({
* from: 'Bot Name',
* fromId: 'user555666777',
* username: 'helper_bot',
* viaBot: '@another_bot'
* })
* ```
*/
export declare const parseActor: (data: RawActorData) => Actor | null;
/**
* Parse regular message sender from 'from' and 'from_id' fields.
*
* Optimized for the most common case of regular user messages.
* Provides clean API for message parsing code.
*
* @param from - Display name of the sender
* @param fromId - Telegram ID of the sender
* @returns Parsed actor or null if insufficient data
*
* @example
* ```typescript
* const sender = parseMessageSender('Jane Doe', 'user987654321')
* console.log(sender?.displayName) // 'Jane Doe'
* console.log(sender?.id) // 987654321
* ```
*/
export declare const parseMessageSender: (from?: string, fromId?: string) => Actor | null;
/**
* Parse service message actor from 'actor' and 'actor_id' fields.
*
* Service messages represent system actions like user joins, calls, etc.
* This function specifically handles the actor who performed the action.
*
* @param actor - Display name of the service actor
* @param actorId - Telegram ID of the service actor
* @returns Parsed actor or null if insufficient data
*
* @example
* ```typescript
* const serviceActor = parseServiceActor('Admin User', 'user123456789')
* // Use for: "Admin User invited 3 users to the group"
* ```
*/
export declare const parseServiceActor: (actor?: string, actorId?: string) => Actor | null;
/**
* Parse bot actor from 'via_bot' field.
*
* When messages are sent through bots, the via_bot field contains
* the bot's username. This function extracts that bot information.
*
* @param viaBot - Bot username (may include @ prefix)
* @returns Parsed bot actor or null if invalid
*
* @example
* ```typescript
* const bot = parseViaBot('@helper_bot')
* console.log(bot?.type) // 'bot'
* console.log(bot?.username) // 'helper_bot'
* ```
*/
export declare const parseViaBot: (viaBot?: string) => Actor | null;
/**
* Parse channel author from 'author' field.
*
* In channel posts, the author field indicates who wrote the content
* on behalf of the channel. Useful for attributed channel content.
*
* @param author - Author name from channel post
* @param fromId - Optional channel/author ID
* @returns Parsed channel author or null if invalid
*
* @example
* ```typescript
* const author = parseChannelAuthor('Content Creator', 'channel123456789')
* console.log(author?.type) // 'channel_author'
* console.log(author?.authoredBy) // 'Content Creator'
* ```
*/
export declare const parseChannelAuthor: (author?: string, fromId?: string) => Actor | null;
/**
* Parse actor from username mention data.
*
* Useful when processing mentions within message content or
* when only username information is available.
*
* @param username - Username (with or without @ prefix)
* @param userId - Optional numeric user ID
* @returns Parsed actor or null if invalid username
*
* @example
* ```typescript
* const mentioned = parseUsernameMention('@john_doe', 123456789)
* const usernameOnly = parseUsernameMention('jane_smith') // Lower confidence
* ```
*/
export declare const parseUsernameMention: (username?: string, userId?: number) => Actor | null;
/**
* Parse multiple actors from an array of raw data.
*
* Efficiently processes multiple actor entries and filters out failed parses.
* Useful for batch processing of export data.
*
* @param dataArray - Array of raw actor data objects
* @returns Array of successfully parsed actors (failures are excluded)
*
* @example
* ```typescript
* const rawActors = [
* { from: 'User 1', fromId: 'user111' },
* { from: 'User 2', fromId: 'user222' },
* { from: '', fromId: '' }, // This will be filtered out
* ]
*
* const actors = parseActors(rawActors)
* console.log(actors.length) // 2 (invalid entry filtered out)
* ```
*/
export declare const parseActors: (dataArray: RawActorData[]) => Actor[];
/**
* Parse actor with detailed confidence and debugging information.
*
* Returns full parsing result including confidence score and strategy used.
* Useful for debugging, quality assessment, and analytics.
*
* @param data - Raw actor data from Telegram export
* @returns Complete parse result with confidence metadata
*
* @example
* ```typescript
* const result = parseActorWithConfidence({
* from: 'John Doe',
* fromId: 'user123456789'
* })
*
* console.log(`Actor: ${result.actor?.displayName}`)
* console.log(`Confidence: ${result.confidence}%`)
* console.log(`Strategy: ${result.source}`)
*
* if (result.confidence < 70) {
* console.log('Low confidence - review data quality')
* }
* ```
*/
export declare const parseActorWithConfidence: (data: RawActorData) => ActorParseResult;
/**
* Extract the primary actor from a complete raw Telegram message.
*
* Convenience function that tries multiple actor fields from a message
* and returns the most relevant one based on priority logic.
*
* **Priority Order:**
* 1. Message sender (from/from_id) - highest priority
* 2. Service actor (actor/actor_id)
* 3. Channel author (author)
* 4. Via bot (via_bot) - lowest priority
*
* @param rawMessage - Raw message object from Telegram export
* @returns Most relevant actor or null if no valid actor found
*
* @example
* ```typescript
* // Regular user message
* const userMsg = {
* from: 'John Doe',
* from_id: 'user123456789'
* }
* const actor1 = parseMessageActors(userMsg) // Returns John Doe
*
* // Service message with actor
* const serviceMsg = {
* actor: 'Admin User',
* actor_id: 'user987654321'
* }
* const actor2 = parseMessageActors(serviceMsg) // Returns Admin User
*
* // Complex message with multiple actors
* const complexMsg = {
* from: 'Bot Name',
* from_id: 'user555666777',
* via_bot: '@helper_bot'
* }
* const actor3 = parseMessageActors(complexMsg) // Returns Bot Name (higher priority)
* ```
*/
export declare const parseMessageActors: (rawMessage: {
from?: string;
from_id?: string;
actor?: string;
actor_id?: string;
author?: string;
via_bot?: string;
}) => Actor | null;
export {};
//# sourceMappingURL=actors.d.ts.map