@stack.thefennec.dev/telegram-export-parser
Version:
TypeScript library for parsing Telegram Desktop's data export with full type safety
249 lines • 10.3 kB
TypeScript
import type { Actor, Conversation, RawTelegramChatExport, TelegramEvent, TelegramMessage } from '../types';
/**
* Advanced Telegram chat export processor with lazy loading and streaming support.
*
* Provides immediate access to chat metadata while allowing flexible message processing
* through generators. Supports both in-memory and streaming scenarios with automatic
* actor caching and relationship building.
*
* **Key Features:**
* - **Immediate metadata** - Chat info available instantly
* - **Lazy message processing** - Generator-based for memory efficiency
* - **Actor extraction** - Automatic participant discovery and caching
* - **Streaming ready** - Compatible with streaming parsers
* - **Memory efficient** - Process messages without loading all at once
*
* @example Basic usage
* ```typescript
* const exportProcessor = TelegramExportProcessor.fromFile('export.json')
* console.log(`Chat: ${exportProcessor.conversation.name}`)
*
* // Process messages lazily
* for (const message of exportProcessor.messages()) {
* console.log(`${message.sender.displayName}: ${message.text}`)
* }
* ```
*
* @example Actor extraction
* ```typescript
* const processor = TelegramExportProcessor.fromData(rawData)
* const actors = processor.extractAllActors()
* console.log(`Found ${actors.size} unique participants`)
* ```
*/
export declare class TelegramExportProcessor {
/** Chat metadata - available immediately after construction */
readonly conversation: Conversation;
/** Raw message data - processed lazily through generators */
private readonly rawMessages;
/** Cached actors extracted from all messages */
private actorCache?;
/** Cached date range for the entire export */
private dateRangeCache?;
/**
* Initializes a new instance of the `ChatExportHandler` class.
*
* Constructs the handler using raw Telegram chat export data, extracting meta-information
* about the conversation and retaining the raw message data for further processing.
*
* @param rawData An object representing the raw Telegram chat export data. It must include:
* - `id` (string | number): The unique identifier of the conversation.
* - `name` (string): The name of the conversation (e.g., group or chat name).
* - `type` (string): The type of conversation (e.g., "private", "group", "channel").
* - `messages` (array): The array containing all raw message data from the export.
*
* @throws {Error} If the `rawData` is missing required fields or is invalid.
*
* @example
* ```typescript
* const rawExport = {
* id: 12345,
* name: 'Group Chat',
* type: 'group',
* messages: [
* { id: 1, text: 'Hello!' },
* { id: 2, text: 'Goodbye!' }
* ]
* };
*
* const chatHandler = new ChatExportHandler(rawExport);
* console.log(chatHandler.conversation.name); // "Group Chat"
* ```
*/
constructor(rawData: RawTelegramChatExport);
/**
* Returns a generator for iterating over parsed Telegram messages or events.
*
* Processes a collection of raw Telegram messages, parses each one,
* and yields valid `TelegramMessage` or `TelegramEvent` objects. Malformed messages
* are skipped, and a warning is logged for each failed parsing attempt.
*
* @template TOutput The type of the output, either `TelegramMessage` or `TelegramEvent`
* @yields {TelegramMessage | TelegramEvent} Parsed Telegram message or event
* @throws {Error} Parsing errors for invalid messages are caught and logged, but not re-thrown
*
* @example
* ```typescript
* const generator = instance.messages();
* for (const message of generator) {
* console.log(message); // Logs each parsed TelegramMessage or TelegramEvent
* }
* ```
*/
messages(): Generator<TelegramMessage | TelegramEvent, void, undefined>;
/**
* Extracts all unique actors from a collection of raw messages and organizes them into a map.
*
* This method processes the raw message data to identify distinct actors, such as senders,
* forwarded message authors, and mentioned users. It creates a `Map` of actor details keyed
* by their numeric IDs. Actors may include users with various details, such as display names
* or usernames.
*
* The `actorCache` is utilized to avoid redundant computation if the actors have already been
* extracted. If previously cached, the cached result is returned.
*
* @returns {Map<number, Actor>} A map of unique actors keyed by their numeric ID. Each actor
* object contains the following fields:
* - `id`: The numeric ID of the actor.
* - `type`: The type of actor (currently always `'user'`).
* - `username`: The username of the actor (if available).
* - `displayName`: A human-readable display name for the actor (may default to "Unknown User").
* - `authoredBy`: Additional metadata if applicable (e.g., authorship details from the message).
*
* @throws {Error} Throws an error in case of unexpected or invalid raw message formats.
*
* @example
* ```typescript
* const rawMessages = [
* { from_id: 'user12345', from: 'John Doe', text_entities: [] },
* { from_id: 'user67890', from: 'Jane Smith', text_entities: [] },
* { text_entities: [{ type: 'mention_name', user_id: 54321, text: '@Mike' }] }
* ];
*
* const actors = extractAllActors(); // Assuming `this.rawMessages = rawMessages`
* console.log(actors);
* // Outputs Map: {
* // 12345 => { id: 12345, type: 'user', username: undefined, displayName: 'John Doe', authoredBy: undefined },
* // 67890 => { id: 67890, type: 'user', username: undefined, displayName: 'Jane Smith', authoredBy: undefined },
* // 54321 => { id: 54321, type: 'user', username: undefined, displayName: 'Mike', authoredBy: undefined }
* // }
* ```
*
* @see Actor - For the structure of the returned actor objects.
*/
extractAllActors(): Map<number, Actor>;
/**
* Retrieves the date range (earliest and latest dates) from a collection of messages.
*
* The method calculates the range based on the timestamps of all messages. If the
* method has been called previously, it retrieves the cached result to optimize
* performance. If no messages are present, it defaults both `earliest` and `latest`
* to the current date.
*
* @returns {Object} An object containing two `Date` objects:
* - `earliest`: The earliest date within the collection.
* - `latest`: The latest date within the collection.
* If there are no messages, both `earliest` and `latest` are set to the current date.
*
* @throws {TypeError} If any message object is missing a valid `sentAt` or `date` field.
*
* @example
* ```typescript
* // Assuming the `messages()` method provides a collection of messages:
* const dateRange = getDateRange();
* console.log(dateRange.earliest); // Outputs the earliest date from the messages
* console.log(dateRange.latest); // Outputs the latest date from the messages
* ```
*
* @see this.messages() - Method providing the collection of messages to compute the range.
*/
getDateRange(): {
earliest: Date;
latest: Date;
};
/**
* Retrieves the total number of messages within the current collection.
*
* @returns {number} The total number of messages in the collection.
*/
get totalMessages(): number;
/**
* Create processor from JSON file on disk.
*
* @param filePath - Path to Telegram export JSON file
* @returns New processor instance with immediate metadata access
*/
static fromFile(filePath: string): TelegramExportProcessor;
/**
* Create processor from raw data object.
*
* @param data - Raw Telegram export data
* @returns New processor instance
*/
static fromData(data: RawTelegramChatExport): TelegramExportProcessor;
/**
* Create processor from JSON string.
*
* @param jsonString - JSON string containing export data
* @returns New processor instance
*/
static fromString(jsonString: string): TelegramExportProcessor;
}
/**
* Parse Telegram export from JSON file on disk.
*
* Convenience function that wraps TelegramExportProcessor.fromFile() for
* functional-style usage and as the main library entry point.
*
* @param filePath - Path to Telegram export JSON file
* @returns New processor instance with immediate metadata access
*
* @example
* ```typescript
* import parseFromFile from 'telegram-export-parser'
*
* const processor = parseFromFile('./export.json')
* console.log(`Chat: ${processor.conversation.name}`)
* ```
*/
export declare const parseFromFile: (filePath: string) => TelegramExportProcessor;
/**
* Parse Telegram export from raw data object.
*
* Convenience function that wraps TelegramExportProcessor.fromData() for
* functional-style usage and consistent API surface.
*
* @param data - Raw Telegram export data object
* @returns New processor instance
*
* @example
* ```typescript
* import { parseFromData } from 'telegram-export-parser'
*
* const processor = parseFromData(rawExportData)
* for (const message of processor.messages()) {
* console.log(message)
* }
* ```
*/
export declare const parseFromData: (data: RawTelegramChatExport) => TelegramExportProcessor;
/**
* Parse Telegram export from JSON string.
*
* Convenience function that wraps TelegramExportProcessor.fromString() for
* functional-style usage and consistent API surface.
*
* @param jsonString - JSON string containing export data
* @returns New processor instance
*
* @example
* ```typescript
* import { parseFromString } from 'telegram-export-parser'
*
* const jsonData = fs.readFileSync('export.json', 'utf8')
* const processor = parseFromString(jsonData)
* const actors = processor.extractAllActors()
* ```
*/
export declare const parseFromString: (jsonString: string) => TelegramExportProcessor;
//# sourceMappingURL=main.d.ts.map