UNPKG

@stack.thefennec.dev/telegram-export-parser

Version:

TypeScript library for parsing Telegram Desktop's data export with full type safety

359 lines 14 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.parseFromString = exports.parseFromData = exports.parseFromFile = exports.TelegramExportProcessor = void 0; const fs_1 = require("fs"); const parsers_1 = require("../parsers"); // ===================================================== // TELEGRAM CHAT EXPORT PROCESSOR CLASS // ===================================================== /** * Advanced Telegram chat export processor with lazy loading and streaming support. * * Provides immediate access to chat metadata while allowing flexible message processing * through generators. Supports both in-memory and streaming scenarios with automatic * actor caching and relationship building. * * **Key Features:** * - **Immediate metadata** - Chat info available instantly * - **Lazy message processing** - Generator-based for memory efficiency * - **Actor extraction** - Automatic participant discovery and caching * - **Streaming ready** - Compatible with streaming parsers * - **Memory efficient** - Process messages without loading all at once * * @example Basic usage * ```typescript * const exportProcessor = TelegramExportProcessor.fromFile('export.json') * console.log(`Chat: ${exportProcessor.conversation.name}`) * * // Process messages lazily * for (const message of exportProcessor.messages()) { * console.log(`${message.sender.displayName}: ${message.text}`) * } * ``` * * @example Actor extraction * ```typescript * const processor = TelegramExportProcessor.fromData(rawData) * const actors = processor.extractAllActors() * console.log(`Found ${actors.size} unique participants`) * ``` */ class TelegramExportProcessor { /** Chat metadata - available immediately after construction */ conversation; /** Raw message data - processed lazily through generators */ rawMessages; /** Cached actors extracted from all messages */ actorCache; /** Cached date range for the entire export */ dateRangeCache; /** * Initializes a new instance of the `ChatExportHandler` class. * * Constructs the handler using raw Telegram chat export data, extracting meta-information * about the conversation and retaining the raw message data for further processing. * * @param rawData An object representing the raw Telegram chat export data. It must include: * - `id` (string | number): The unique identifier of the conversation. * - `name` (string): The name of the conversation (e.g., group or chat name). * - `type` (string): The type of conversation (e.g., "private", "group", "channel"). * - `messages` (array): The array containing all raw message data from the export. * * @throws {Error} If the `rawData` is missing required fields or is invalid. * * @example * ```typescript * const rawExport = { * id: 12345, * name: 'Group Chat', * type: 'group', * messages: [ * { id: 1, text: 'Hello!' }, * { id: 2, text: 'Goodbye!' } * ] * }; * * const chatHandler = new ChatExportHandler(rawExport); * console.log(chatHandler.conversation.name); // "Group Chat" * ``` */ constructor(rawData) { // Extract conversation metadata immediately this.conversation = { id: rawData.id, name: rawData.name, type: rawData.type }; this.rawMessages = rawData.messages; } /** * Returns a generator for iterating over parsed Telegram messages or events. * * Processes a collection of raw Telegram messages, parses each one, * and yields valid `TelegramMessage` or `TelegramEvent` objects. Malformed messages * are skipped, and a warning is logged for each failed parsing attempt. * * @template TOutput The type of the output, either `TelegramMessage` or `TelegramEvent` * @yields {TelegramMessage | TelegramEvent} Parsed Telegram message or event * @throws {Error} Parsing errors for invalid messages are caught and logged, but not re-thrown * * @example * ```typescript * const generator = instance.messages(); * for (const message of generator) { * console.log(message); // Logs each parsed TelegramMessage or TelegramEvent * } * ``` */ *messages() { for (const rawMessage of this.rawMessages) { try { yield (0, parsers_1.parse)(rawMessage); } catch (error) { // Skip malformed messages in streaming context console.warn(`Failed to parse message ${rawMessage.id}:`, error); } } } /** * Extracts all unique actors from a collection of raw messages and organizes them into a map. * * This method processes the raw message data to identify distinct actors, such as senders, * forwarded message authors, and mentioned users. It creates a `Map` of actor details keyed * by their numeric IDs. Actors may include users with various details, such as display names * or usernames. * * The `actorCache` is utilized to avoid redundant computation if the actors have already been * extracted. If previously cached, the cached result is returned. * * @returns {Map<number, Actor>} A map of unique actors keyed by their numeric ID. Each actor * object contains the following fields: * - `id`: The numeric ID of the actor. * - `type`: The type of actor (currently always `'user'`). * - `username`: The username of the actor (if available). * - `displayName`: A human-readable display name for the actor (may default to "Unknown User"). * - `authoredBy`: Additional metadata if applicable (e.g., authorship details from the message). * * @throws {Error} Throws an error in case of unexpected or invalid raw message formats. * * @example * ```typescript * const rawMessages = [ * { from_id: 'user12345', from: 'John Doe', text_entities: [] }, * { from_id: 'user67890', from: 'Jane Smith', text_entities: [] }, * { text_entities: [{ type: 'mention_name', user_id: 54321, text: '@Mike' }] } * ]; * * const actors = extractAllActors(); // Assuming `this.rawMessages = rawMessages` * console.log(actors); * // Outputs Map: { * // 12345 => { id: 12345, type: 'user', username: undefined, displayName: 'John Doe', authoredBy: undefined }, * // 67890 => { id: 67890, type: 'user', username: undefined, displayName: 'Jane Smith', authoredBy: undefined }, * // 54321 => { id: 54321, type: 'user', username: undefined, displayName: 'Mike', authoredBy: undefined } * // } * ``` * * @see Actor - For the structure of the returned actor objects. */ extractAllActors() { if (this.actorCache) { return this.actorCache; } const actors = new Map(); for (const rawMessage of this.rawMessages) { // Extract primary sender if (rawMessage.from_id) { const id = parseInt(rawMessage.from_id.replace('user', '')); if (!actors.has(id)) { actors.set(id, { id, type: 'user', username: undefined, displayName: rawMessage.from || 'Unknown User', authoredBy: rawMessage.author }); } } // Extract forwarded from user if (rawMessage.forwarded_from) { // Parse forwarded from string to extract user info // This would need more sophisticated parsing based on format } // Extract mentioned users from text entities if (rawMessage.text_entities) { for (const entity of rawMessage.text_entities) { if (entity.type === 'mention_name' && entity.user_id) { const id = entity.user_id; if (!actors.has(id)) { actors.set(id, { id, type: 'user', username: undefined, displayName: entity.text.replace('@', ''), authoredBy: undefined }); } } } } } this.actorCache = actors; return actors; } /** * Retrieves the date range (earliest and latest dates) from a collection of messages. * * The method calculates the range based on the timestamps of all messages. If the * method has been called previously, it retrieves the cached result to optimize * performance. If no messages are present, it defaults both `earliest` and `latest` * to the current date. * * @returns {Object} An object containing two `Date` objects: * - `earliest`: The earliest date within the collection. * - `latest`: The latest date within the collection. * If there are no messages, both `earliest` and `latest` are set to the current date. * * @throws {TypeError} If any message object is missing a valid `sentAt` or `date` field. * * @example * ```typescript * // Assuming the `messages()` method provides a collection of messages: * const dateRange = getDateRange(); * console.log(dateRange.earliest); // Outputs the earliest date from the messages * console.log(dateRange.latest); // Outputs the latest date from the messages * ``` * * @see this.messages() - Method providing the collection of messages to compute the range. */ getDateRange() { if (this.dateRangeCache) { return this.dateRangeCache; } const dates = []; for (const message of this.messages()) { const timestamp = ('sentAt' in message ? message.sentAt : message.date).getTime(); dates.push(timestamp); } this.dateRangeCache = dates.length > 0 ? { earliest: new Date(Math.min(...dates)), latest: new Date(Math.max(...dates)) } : { earliest: new Date(), latest: new Date() }; return this.dateRangeCache; } /** * Retrieves the total number of messages within the current collection. * * @returns {number} The total number of messages in the collection. */ get totalMessages() { return this.rawMessages.length; } // ===================================================== // FACTORY METHODS // ===================================================== /** * Create processor from JSON file on disk. * * @param filePath - Path to Telegram export JSON file * @returns New processor instance with immediate metadata access */ static fromFile(filePath) { const fileContent = (0, fs_1.readFileSync)(filePath, 'utf-8'); const data = JSON.parse(fileContent); return new TelegramExportProcessor(data); } /** * Create processor from raw data object. * * @param data - Raw Telegram export data * @returns New processor instance */ static fromData(data) { return new TelegramExportProcessor(data); } /** * Create processor from JSON string. * * @param jsonString - JSON string containing export data * @returns New processor instance */ static fromString(jsonString) { const data = JSON.parse(jsonString); return new TelegramExportProcessor(data); } } exports.TelegramExportProcessor = TelegramExportProcessor; // ===================================================== // STANDALONE PARSING FUNCTIONS // ===================================================== /** * Parse Telegram export from JSON file on disk. * * Convenience function that wraps TelegramExportProcessor.fromFile() for * functional-style usage and as the main library entry point. * * @param filePath - Path to Telegram export JSON file * @returns New processor instance with immediate metadata access * * @example * ```typescript * import parseFromFile from 'telegram-export-parser' * * const processor = parseFromFile('./export.json') * console.log(`Chat: ${processor.conversation.name}`) * ``` */ const parseFromFile = (filePath) => { return TelegramExportProcessor.fromFile(filePath); }; exports.parseFromFile = parseFromFile; /** * Parse Telegram export from raw data object. * * Convenience function that wraps TelegramExportProcessor.fromData() for * functional-style usage and consistent API surface. * * @param data - Raw Telegram export data object * @returns New processor instance * * @example * ```typescript * import { parseFromData } from 'telegram-export-parser' * * const processor = parseFromData(rawExportData) * for (const message of processor.messages()) { * console.log(message) * } * ``` */ const parseFromData = (data) => { return TelegramExportProcessor.fromData(data); }; exports.parseFromData = parseFromData; /** * Parse Telegram export from JSON string. * * Convenience function that wraps TelegramExportProcessor.fromString() for * functional-style usage and consistent API surface. * * @param jsonString - JSON string containing export data * @returns New processor instance * * @example * ```typescript * import { parseFromString } from 'telegram-export-parser' * * const jsonData = fs.readFileSync('export.json', 'utf8') * const processor = parseFromString(jsonData) * const actors = processor.extractAllActors() * ``` */ const parseFromString = (jsonString) => { return TelegramExportProcessor.fromString(jsonString); }; exports.parseFromString = parseFromString; //# sourceMappingURL=main.js.map