@stack.thefennec.dev/telegram-export-parser
Version:
TypeScript library for parsing Telegram Desktop's data export with full type safety
359 lines • 14 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.parseFromString = exports.parseFromData = exports.parseFromFile = exports.TelegramExportProcessor = void 0;
const fs_1 = require("fs");
const parsers_1 = require("../parsers");
// =====================================================
// TELEGRAM CHAT EXPORT PROCESSOR CLASS
// =====================================================
/**
* Advanced Telegram chat export processor with lazy loading and streaming support.
*
* Provides immediate access to chat metadata while allowing flexible message processing
* through generators. Supports both in-memory and streaming scenarios with automatic
* actor caching and relationship building.
*
* **Key Features:**
* - **Immediate metadata** - Chat info available instantly
* - **Lazy message processing** - Generator-based for memory efficiency
* - **Actor extraction** - Automatic participant discovery and caching
* - **Streaming ready** - Compatible with streaming parsers
* - **Memory efficient** - Process messages without loading all at once
*
* @example Basic usage
* ```typescript
* const exportProcessor = TelegramExportProcessor.fromFile('export.json')
* console.log(`Chat: ${exportProcessor.conversation.name}`)
*
* // Process messages lazily
* for (const message of exportProcessor.messages()) {
* console.log(`${message.sender.displayName}: ${message.text}`)
* }
* ```
*
* @example Actor extraction
* ```typescript
* const processor = TelegramExportProcessor.fromData(rawData)
* const actors = processor.extractAllActors()
* console.log(`Found ${actors.size} unique participants`)
* ```
*/
class TelegramExportProcessor {
/** Chat metadata - available immediately after construction */
conversation;
/** Raw message data - processed lazily through generators */
rawMessages;
/** Cached actors extracted from all messages */
actorCache;
/** Cached date range for the entire export */
dateRangeCache;
/**
* Initializes a new instance of the `ChatExportHandler` class.
*
* Constructs the handler using raw Telegram chat export data, extracting meta-information
* about the conversation and retaining the raw message data for further processing.
*
* @param rawData An object representing the raw Telegram chat export data. It must include:
* - `id` (string | number): The unique identifier of the conversation.
* - `name` (string): The name of the conversation (e.g., group or chat name).
* - `type` (string): The type of conversation (e.g., "private", "group", "channel").
* - `messages` (array): The array containing all raw message data from the export.
*
* @throws {Error} If the `rawData` is missing required fields or is invalid.
*
* @example
* ```typescript
* const rawExport = {
* id: 12345,
* name: 'Group Chat',
* type: 'group',
* messages: [
* { id: 1, text: 'Hello!' },
* { id: 2, text: 'Goodbye!' }
* ]
* };
*
* const chatHandler = new ChatExportHandler(rawExport);
* console.log(chatHandler.conversation.name); // "Group Chat"
* ```
*/
constructor(rawData) {
// Extract conversation metadata immediately
this.conversation = {
id: rawData.id,
name: rawData.name,
type: rawData.type
};
this.rawMessages = rawData.messages;
}
/**
* Returns a generator for iterating over parsed Telegram messages or events.
*
* Processes a collection of raw Telegram messages, parses each one,
* and yields valid `TelegramMessage` or `TelegramEvent` objects. Malformed messages
* are skipped, and a warning is logged for each failed parsing attempt.
*
* @template TOutput The type of the output, either `TelegramMessage` or `TelegramEvent`
* @yields {TelegramMessage | TelegramEvent} Parsed Telegram message or event
* @throws {Error} Parsing errors for invalid messages are caught and logged, but not re-thrown
*
* @example
* ```typescript
* const generator = instance.messages();
* for (const message of generator) {
* console.log(message); // Logs each parsed TelegramMessage or TelegramEvent
* }
* ```
*/
*messages() {
for (const rawMessage of this.rawMessages) {
try {
yield (0, parsers_1.parse)(rawMessage);
}
catch (error) {
// Skip malformed messages in streaming context
console.warn(`Failed to parse message ${rawMessage.id}:`, error);
}
}
}
/**
* Extracts all unique actors from a collection of raw messages and organizes them into a map.
*
* This method processes the raw message data to identify distinct actors, such as senders,
* forwarded message authors, and mentioned users. It creates a `Map` of actor details keyed
* by their numeric IDs. Actors may include users with various details, such as display names
* or usernames.
*
* The `actorCache` is utilized to avoid redundant computation if the actors have already been
* extracted. If previously cached, the cached result is returned.
*
* @returns {Map<number, Actor>} A map of unique actors keyed by their numeric ID. Each actor
* object contains the following fields:
* - `id`: The numeric ID of the actor.
* - `type`: The type of actor (currently always `'user'`).
* - `username`: The username of the actor (if available).
* - `displayName`: A human-readable display name for the actor (may default to "Unknown User").
* - `authoredBy`: Additional metadata if applicable (e.g., authorship details from the message).
*
* @throws {Error} Throws an error in case of unexpected or invalid raw message formats.
*
* @example
* ```typescript
* const rawMessages = [
* { from_id: 'user12345', from: 'John Doe', text_entities: [] },
* { from_id: 'user67890', from: 'Jane Smith', text_entities: [] },
* { text_entities: [{ type: 'mention_name', user_id: 54321, text: '@Mike' }] }
* ];
*
* const actors = extractAllActors(); // Assuming `this.rawMessages = rawMessages`
* console.log(actors);
* // Outputs Map: {
* // 12345 => { id: 12345, type: 'user', username: undefined, displayName: 'John Doe', authoredBy: undefined },
* // 67890 => { id: 67890, type: 'user', username: undefined, displayName: 'Jane Smith', authoredBy: undefined },
* // 54321 => { id: 54321, type: 'user', username: undefined, displayName: 'Mike', authoredBy: undefined }
* // }
* ```
*
* @see Actor - For the structure of the returned actor objects.
*/
extractAllActors() {
if (this.actorCache) {
return this.actorCache;
}
const actors = new Map();
for (const rawMessage of this.rawMessages) {
// Extract primary sender
if (rawMessage.from_id) {
const id = parseInt(rawMessage.from_id.replace('user', ''));
if (!actors.has(id)) {
actors.set(id, {
id,
type: 'user',
username: undefined,
displayName: rawMessage.from || 'Unknown User',
authoredBy: rawMessage.author
});
}
}
// Extract forwarded from user
if (rawMessage.forwarded_from) {
// Parse forwarded from string to extract user info
// This would need more sophisticated parsing based on format
}
// Extract mentioned users from text entities
if (rawMessage.text_entities) {
for (const entity of rawMessage.text_entities) {
if (entity.type === 'mention_name' && entity.user_id) {
const id = entity.user_id;
if (!actors.has(id)) {
actors.set(id, {
id,
type: 'user',
username: undefined,
displayName: entity.text.replace('@', ''),
authoredBy: undefined
});
}
}
}
}
}
this.actorCache = actors;
return actors;
}
/**
* Retrieves the date range (earliest and latest dates) from a collection of messages.
*
* The method calculates the range based on the timestamps of all messages. If the
* method has been called previously, it retrieves the cached result to optimize
* performance. If no messages are present, it defaults both `earliest` and `latest`
* to the current date.
*
* @returns {Object} An object containing two `Date` objects:
* - `earliest`: The earliest date within the collection.
* - `latest`: The latest date within the collection.
* If there are no messages, both `earliest` and `latest` are set to the current date.
*
* @throws {TypeError} If any message object is missing a valid `sentAt` or `date` field.
*
* @example
* ```typescript
* // Assuming the `messages()` method provides a collection of messages:
* const dateRange = getDateRange();
* console.log(dateRange.earliest); // Outputs the earliest date from the messages
* console.log(dateRange.latest); // Outputs the latest date from the messages
* ```
*
* @see this.messages() - Method providing the collection of messages to compute the range.
*/
getDateRange() {
if (this.dateRangeCache) {
return this.dateRangeCache;
}
const dates = [];
for (const message of this.messages()) {
const timestamp = ('sentAt' in message ? message.sentAt : message.date).getTime();
dates.push(timestamp);
}
this.dateRangeCache = dates.length > 0 ? {
earliest: new Date(Math.min(...dates)),
latest: new Date(Math.max(...dates))
} : {
earliest: new Date(),
latest: new Date()
};
return this.dateRangeCache;
}
/**
* Retrieves the total number of messages within the current collection.
*
* @returns {number} The total number of messages in the collection.
*/
get totalMessages() {
return this.rawMessages.length;
}
// =====================================================
// FACTORY METHODS
// =====================================================
/**
* Create processor from JSON file on disk.
*
* @param filePath - Path to Telegram export JSON file
* @returns New processor instance with immediate metadata access
*/
static fromFile(filePath) {
const fileContent = (0, fs_1.readFileSync)(filePath, 'utf-8');
const data = JSON.parse(fileContent);
return new TelegramExportProcessor(data);
}
/**
* Create processor from raw data object.
*
* @param data - Raw Telegram export data
* @returns New processor instance
*/
static fromData(data) {
return new TelegramExportProcessor(data);
}
/**
* Create processor from JSON string.
*
* @param jsonString - JSON string containing export data
* @returns New processor instance
*/
static fromString(jsonString) {
const data = JSON.parse(jsonString);
return new TelegramExportProcessor(data);
}
}
exports.TelegramExportProcessor = TelegramExportProcessor;
// =====================================================
// STANDALONE PARSING FUNCTIONS
// =====================================================
/**
* Parse Telegram export from JSON file on disk.
*
* Convenience function that wraps TelegramExportProcessor.fromFile() for
* functional-style usage and as the main library entry point.
*
* @param filePath - Path to Telegram export JSON file
* @returns New processor instance with immediate metadata access
*
* @example
* ```typescript
* import parseFromFile from 'telegram-export-parser'
*
* const processor = parseFromFile('./export.json')
* console.log(`Chat: ${processor.conversation.name}`)
* ```
*/
const parseFromFile = (filePath) => {
return TelegramExportProcessor.fromFile(filePath);
};
exports.parseFromFile = parseFromFile;
/**
* Parse Telegram export from raw data object.
*
* Convenience function that wraps TelegramExportProcessor.fromData() for
* functional-style usage and consistent API surface.
*
* @param data - Raw Telegram export data object
* @returns New processor instance
*
* @example
* ```typescript
* import { parseFromData } from 'telegram-export-parser'
*
* const processor = parseFromData(rawExportData)
* for (const message of processor.messages()) {
* console.log(message)
* }
* ```
*/
const parseFromData = (data) => {
return TelegramExportProcessor.fromData(data);
};
exports.parseFromData = parseFromData;
/**
* Parse Telegram export from JSON string.
*
* Convenience function that wraps TelegramExportProcessor.fromString() for
* functional-style usage and consistent API surface.
*
* @param jsonString - JSON string containing export data
* @returns New processor instance
*
* @example
* ```typescript
* import { parseFromString } from 'telegram-export-parser'
*
* const jsonData = fs.readFileSync('export.json', 'utf8')
* const processor = parseFromString(jsonData)
* const actors = processor.extractAllActors()
* ```
*/
const parseFromString = (jsonString) => {
return TelegramExportProcessor.fromString(jsonString);
};
exports.parseFromString = parseFromString;
//# sourceMappingURL=main.js.map