UNPKG

@stack.thefennec.dev/telegram-export-parser

Version:

TypeScript library for parsing Telegram Desktop's data export with full type safety

587 lines 21.4 kB
"use strict"; /** * @fileoverview Base parsing utilities for Telegram export message processing. * * This module provides the foundational parsing functions that implement a **mixin-based architecture** * for building complex message types from simple, composable components. The design allows for * flexible message construction by combining base message properties with specialized mixins. * * ## Mixin Architecture Overview * * The parser uses a sophisticated mixin system where different message aspects are parsed * separately and then combined to create complete message objects: * * ``` * BaseMessage (core properties) * ├── + MediaWithDimensions (width, height) * ├── + MediaWithDuration (audio/video length) * ├── + MediaWithThumbnail (preview images) * ├── + SelfDestructibleMessage (disappearing messages) * └── = Complete Message Type (e.g., VideoMessage) * ``` * * ## Message Composition Examples * * **Video Message Construction:** * ```typescript * // Start with base message properties * const base = parseBaseMessage(rawData) * const mediaBase = parseBaseMediaMessage(rawData, base) * * // Add video-specific mixins * const dimensions = parseMediaWithDimensions(rawData) * const duration = parseMediaWithDuration(rawData) * const thumbnail = parseMediaWithThumbnail(rawData) * * // Combine into complete VideoMessage * const videoMessage = { ...mediaBase, ...dimensions, ...duration, ...thumbnail } * ``` * * **Photo Message Construction:** * ```typescript * const base = parseBaseMessage(rawData) * const dimensions = parseMediaWithDimensions(rawData) * const selfDestruct = parseSelfDestructibleMessage(rawData) * * // Photos use different structure than media messages * const photoMessage = { ...base, ...dimensions, ...selfDestruct, photoURL, photoFileSize } * ``` * * This approach provides maximum flexibility while maintaining type safety and code reuse. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.parseSelfDestructibleMessage = exports.parseMediaWithDuration = exports.parseMediaWithThumbnail = exports.parseMediaWithDimensions = exports.parseBaseMediaMessage = exports.parseBaseEvent = exports.parseBaseMessage = exports.parseInvoiceInfo = exports.parseReaction = exports.parseExportedFile = exports.parseMembers = exports.parseEditedDate = exports.parseDate = void 0; const path_1 = require("path"); const url_1 = require("url"); const index_1 = require("./index"); const actors_1 = require("./actors"); const types_1 = require("../types"); // ===================================================== // CORE DATA TRANSFORMATION UTILITIES // ===================================================== /** * Transforms Telegram date formats into standardized Date objects. * * Telegram exports contain dates in multiple formats for accuracy and compatibility. * This function prioritizes Unix timestamps (more precise) over string dates. * * @param date - Human-readable date string (fallback format) * @param dateUnixtime - Unix timestamp string (preferred for accuracy) * @returns Standardized Date object * * @example * ```typescript * // Prefer Unix timestamp when available * const precise = parseDate('2023-12-01 15:30:00', '1701432600') * * // Fallback to string parsing * const fallback = parseDate('2023-12-01 15:30:00') * ``` */ const parseDate = (date, dateUnixtime) => { if (dateUnixtime) { const timestamp = parseInt(dateUnixtime, 10); if (!isNaN(timestamp)) { return new Date(timestamp * 1000); } } return new Date(date); }; exports.parseDate = parseDate; /** * Parses message edit timestamps with proper null handling. * * Edit timestamps are only present when a message has been modified. * This function maintains the distinction between "never edited" (undefined) * and "edited at a specific time" (Date object). * * @param editedUnixtime - Unix timestamp when message was last edited * @returns Date if message was edited, undefined if never edited * * @example * ```typescript * const editTime = parseEditedDate(1701432600) // Date object * const neverEdited = parseEditedDate(undefined) // undefined * * if (editTime) { * console.log(`Message edited at ${editTime.toISOString()}`) * } * ``` */ const parseEditedDate = (editedUnixtime) => { if (!editedUnixtime) return undefined; return new Date(editedUnixtime * 1000); }; exports.parseEditedDate = parseEditedDate; /** * Transforms member name arrays into structured Actor objects. * * Used for service events like group member additions/removals where * only display names are available. Leverages actor parsing to create * proper Actor objects with type detection. * * @param members - Array of member display names * @returns Array of successfully parsed Actor objects * * @example * ```typescript * const memberNames = ['John Doe', 'Jane Smith', 'Helper Bot'] * const actors = parseMembers(memberNames) * // Returns Actor objects with detected types (user, bot, etc.) * ``` */ const parseMembers = (members) => { if (!members) return []; return members .map(member => (0, actors_1.parseMessageSender)(member, undefined)) .filter((actor) => actor !== null); }; exports.parseMembers = parseMembers; /** * Transforms file paths into standardized ExportedFile references. * * Telegram exports handle files in three distinct states, each requiring * different representation in the parsed output for proper application handling. * * **File States:** * - **Downloaded**: File exists locally → URL object for file access * - **Not Included**: File excluded from export → null (explicit absence) * - **No File**: No file was attached → undefined (no file field) * * @param filePath - Raw file path string from export * @param basePath - Base directory for resolving relative paths * @returns ExportedFile with appropriate state representation * * @example * ```typescript * // Downloaded file * parseExportedFile('photos/image.jpg') // file:///path/to/export/photos/image.jpg * * // File not included in export * parseExportedFile('File not included, change data exporting settings to download.') // null * * // No file attached * parseExportedFile(undefined) // undefined * ``` */ const parseExportedFile = (filePath, basePath = process.cwd()) => { if (!filePath?.trim()) return undefined; if (filePath.includes('File not included')) { return null; } const fullPath = (0, path_1.join)(basePath, filePath.trim()); return (0, url_1.pathToFileURL)(fullPath); }; exports.parseExportedFile = parseExportedFile; /** * Transforms raw reaction data into structured objects with actor resolution. * * Reactions contain emoji identifiers, usage counts, and recent user activity. * This function resolves user references and handles both standard and custom emoji. * * @param raw - Raw reaction data from Telegram export * @returns Complete Reaction object with resolved user references * * @example * ```typescript * const reaction = parseReaction({ * emoji: '👍', * count: 5, * recent: [ * { from: 'John', from_id: '123', date: '2023-12-01' }, * { from: 'Jane', from_id: '456', date: '2023-12-01' } * ] * }) * * console.log(`${reaction.emoji}: ${reaction.count} reactions`) * reaction.recent.forEach(r => console.log(`${r.sender.displayName} reacted`)) * ``` */ const parseReaction = (raw) => ({ emoji: raw.emoji ?? '', documentURL: (0, exports.parseExportedFile)(raw.document_id), count: raw.count, recent: (raw.recent ?? []).map(recentReaction => { return { sender: (0, actors_1.parseMessageSender)(recentReaction.from, recentReaction.from_id.toString()), date: (0, exports.parseDate)(recentReaction.date) }; }) }); exports.parseReaction = parseReaction; /** * Parses invoice data with format flexibility and error resilience. * * Invoice information can appear as JSON strings or pre-parsed objects. * This function handles both formats with graceful degradation for malformed data. * * @param invoiceStr - Invoice data in various formats * @returns Parsed InvoiceInfo or undefined if no valid invoice data * * @example * ```typescript * // JSON string format * const invoice1 = parseInvoiceInfo('{"title":"Premium","amount":999,"currency":"USD"}') * * // Object format * const invoice2 = parseInvoiceInfo({ title: 'Service', amount: 1500, currency: 'EUR' }) * * // Invalid data - graceful fallback * const invoice3 = parseInvoiceInfo('invalid json') * // Returns fallback invoice with description set to original string * ``` */ const parseInvoiceInfo = (invoiceStr) => { if (!invoiceStr) return undefined; // If already an object, return as-is if (typeof invoiceStr === 'object') return invoiceStr; try { return JSON.parse(invoiceStr); } catch { return { title: 'Unknown Invoice', description: invoiceStr, currency: 'USD', amount: 0 }; } }; exports.parseInvoiceInfo = parseInvoiceInfo; // ===================================================== // BASE MESSAGE & EVENT CONSTRUCTORS // ===================================================== /** * **Core Foundation Parser** - Constructs BaseMessage with all universal properties. * * This is the **primary building block** for all user messages in the mixin architecture. * It handles the complex parsing of actors, text entities, reactions, and metadata * that every message type shares, regardless of content. * * **Parsed Components:** * - **Identity**: Message ID, type, timestamps * - **Actors**: Sender, forwarded/saved sources, via bots * - **Content**: Text entities with rich formatting * - **Interactions**: Reactions, replies, inline buttons * - **Metadata**: Edit history, forwarding chains * * **Mixin Usage Pattern:** * ```typescript * // Start with base properties * const base = parseBaseMessage(rawData) * * // Add type-specific mixins * const withMedia = parseBaseMediaMessage(rawData, base) * const withDimensions = parseMediaWithDimensions(rawData) * * // Combine into complete message type * const videoMessage: VideoMessage = { ...withMedia, ...withDimensions, mediaType: 'video_file' } * ``` * * @param raw - Complete raw message data from Telegram export * @returns BaseMessage with all fundamental properties parsed and actors resolved * * @example * ```typescript * const base = parseBaseMessage({ * id: 12345, * from: 'John Doe', * from_id: 'user987654321', * date: '2023-12-01 15:30:00', * text_entities: [{ type: 'plain', text: 'Hello world!' }], * reactions: [{ emoji: '👍', count: 5 }] * }) * * console.log(`Message ${base.id} from ${base.sender.displayName}`) * console.log(`Reactions: ${base.reactions.length}`) * ``` */ const parseBaseMessage = (raw) => { // Parse all possible actors from the raw message const sender = (0, actors_1.parseMessageSender)(raw.from, raw.from_id); const forwardedFrom = (0, actors_1.parseMessageSender)(raw.forwarded_from, undefined) ?? undefined; const savedFrom = (0, actors_1.parseMessageSender)(raw.saved_from, undefined) ?? undefined; const viaBot = (0, actors_1.parseViaBot)(raw.via_bot) ?? undefined; return { id: BigInt(raw.id), type: types_1.MESSAGE_TYPES.MESSAGE, sentAt: (0, exports.parseDate)(raw.date), sender, textEntities: (0, index_1.parseTextEntities)(raw.text_entities), forwardedFrom, savedFrom, replyToMessageId: raw.reply_to_message_id, // TODO: new type here, message reference or something replyToPeerId: raw.reply_to_peer_id ? parseInt(raw.reply_to_peer_id, 10) : undefined, edited: raw.edited ?? false, editedAt: (0, exports.parseEditedDate)(raw.edited_unixtime), reactions: raw.reactions?.map(exports.parseReaction) ?? [], inlineBotButtons: raw.inline_bot_buttons, viaBot }; }; exports.parseBaseMessage = parseBaseMessage; /** * **Service Event Foundation Parser** - Constructs BaseEvent for system messages. * * Service events represent system-generated messages (calls, joins, pins, etc.) * rather than user content. This parser creates the foundation for all event types * with appropriate actor resolution and minimal required properties. * * **Key Differences from BaseMessage:** * - Uses service actor parsing (actor/actor_id fields) * - Simpler property set (no replies, reactions, etc.) * - Different timestamp handling for service events * - Focuses on action performer rather than message sender * * @param raw - Raw service message data from Telegram export * @returns BaseEvent with core service message properties * * @example * ```typescript * const event = parseBaseEvent({ * id: 12346, * actor: 'Admin User', * actor_id: 'user123456789', * action: 'invite_members', * date: '2023-12-01 15:45:00' * }) * * console.log(`${event.actor.displayName} performed action at ${event.date}`) * ``` */ const parseBaseEvent = (raw) => { return { id: raw.id, type: types_1.MESSAGE_TYPES.SERVICE, date: (0, exports.parseDate)(raw.date, raw.date_unixtime), actor: (0, actors_1.parseServiceActor)(raw.actor, raw.actor_id), textEntities: (0, index_1.parseTextEntities)(raw.text_entities) }; }; exports.parseBaseEvent = parseBaseEvent; // ===================================================== // MEDIA MESSAGE MIXINS // ===================================================== /** * **Media Foundation Mixin** - Extends BaseMessage with file attachment properties. * * This mixin transforms a BaseMessage into a BaseMediaMessage by adding file-related * properties. Used as the foundation for all message types that contain downloadable files * (videos, audio, documents, stickers, animations). * * **Added Properties:** * - **fileURL**: Parsed file reference for access * - **fileName**: Original filename from upload * - **fileSize**: File size in bytes * - **mimeType**: Content type for proper handling * * **Mixin Combination Pattern:** * ```typescript * const base = parseBaseMessage(raw) // Core message properties * const media = parseBaseMediaMessage(raw, base) // + File properties * const duration = parseMediaWithDuration(raw) // + Duration for video/audio * * const videoMessage = { ...media, ...duration, mediaType: 'video_file' } * ``` * * @param raw - Raw message data containing file information * @param base - BaseMessage to extend with media properties * @returns BaseMediaMessage combining message and file properties * * @example * ```typescript * const baseMsg = parseBaseMessage(rawData) * const mediaMsg = parseBaseMediaMessage(rawData, baseMsg) * * if (mediaMsg.fileURL) { * console.log(`File: ${mediaMsg.fileName} (${mediaMsg.fileSize} bytes)`) * // Access file at mediaMsg.fileURL * } * ``` */ const parseBaseMediaMessage = (raw, base) => ({ ...base, fileURL: (0, exports.parseExportedFile)(raw.file), fileName: raw.file_name, fileSize: raw.file_size, mimeType: raw.mime_type }); exports.parseBaseMediaMessage = parseBaseMediaMessage; // ===================================================== // SPECIALIZED MEDIA PROPERTY MIXINS // ===================================================== /** * **Visual Dimensions Mixin** - Adds width/height properties for visual media. * * This mixin extracts dimensional information for media that has measurable size. * Essential for proper display scaling, aspect ratio calculation, and UI layout. * * **Compatible Message Types:** * - PhotoMessage (image dimensions) * - VideoMessage (video resolution) * - AnimationMessage (GIF/animation size) * - StickerMessage (sticker dimensions) * - DocumentMessage (when document is an image) * * **Mixin Application:** * ```typescript * // For photos (direct on BaseMessage) * const photo = { ...parseBaseMessage(raw), ...parseMediaWithDimensions(raw) } * * // For videos (combined with media base) * const video = { * ...parseBaseMediaMessage(raw, base), * ...parseMediaWithDimensions(raw), * ...parseMediaWithDuration(raw) * } * ``` * * @param raw - Raw message data with width/height fields * @returns MediaWithDimensions containing visual size properties * * @example * ```typescript * const dimensions = parseMediaWithDimensions(raw) * * if (dimensions.width && dimensions.height) { * const aspectRatio = dimensions.width / dimensions.height * console.log(`Media: ${dimensions.width}x${dimensions.height} (${aspectRatio.toFixed(2)}:1)`) * } * ``` */ const parseMediaWithDimensions = (raw) => ({ height: raw.height, width: raw.width }); exports.parseMediaWithDimensions = parseMediaWithDimensions; /** * **Thumbnail Preview Mixin** - Adds thumbnail properties for media with preview images. * * Many media types include small preview images for efficient loading and display * before the full media is accessed. This mixin handles thumbnail file references * and metadata. * * **Compatible Message Types:** * - VideoMessage (video preview frame) * - AnimationMessage (first frame preview) * - DocumentMessage (document preview) * - MusicMessage (album art thumbnail) * - VideoNoteMessage (circular video preview) * * **Performance Benefits:** * - Quick preview loading for better UX * - Reduced bandwidth for media galleries * - Fallback display when full media unavailable * * @param raw - Raw message data with thumbnail fields * @returns MediaWithThumbnail containing preview image properties * * @example * ```typescript * const thumbnail = parseMediaWithThumbnail(raw) * * // Show thumbnail while loading full video * if (thumbnail.thumbnailURL) { * console.log(`Thumbnail: ${thumbnail.thumbnailURL}`) * console.log(`Thumbnail size: ${thumbnail.thumbnailFileSize} bytes`) * } * ``` */ const parseMediaWithThumbnail = (raw) => ({ thumbnailURL: (0, exports.parseExportedFile)(raw.thumbnail), thumbnailFileSize: raw.thumbnail_file_size }); exports.parseMediaWithThumbnail = parseMediaWithThumbnail; /** * **Duration Timing Mixin** - Adds playback duration for time-based media. * * Time-based media (audio, video, voice notes) require duration information * for proper playback controls, progress bars, and user interface elements. * This mixin extracts and standardizes duration data. * * **Compatible Message Types:** * - VideoMessage (video length) * - VideoNoteMessage (video note duration) * - MusicMessage (song/track length) * - VoiceNoteMessage (voice recording length) * - AnimationMessage (GIF/animation duration) * - StickerMessage (animated sticker length) * * **Duration Applications:** * - Media player progress bars * - Playback time displays * - Auto-advance in playlists * - Media filtering by length * * @param raw - Raw message data with duration_seconds field * @returns MediaWithDuration containing playback length information * * @example * ```typescript * const duration = parseMediaWithDuration(raw) * * const minutes = Math.floor(duration.durationSeconds / 60) * const seconds = duration.durationSeconds % 60 * console.log(`Duration: ${minutes}:${seconds.toString().padStart(2, '0')}`) * * // Use for media controls * if (duration.durationSeconds > 0) { * setupProgressBar(duration.durationSeconds) * } * ``` */ const parseMediaWithDuration = (raw) => ({ durationSeconds: raw.duration_seconds ?? 0 }); exports.parseMediaWithDuration = parseMediaWithDuration; /** * **Self-Destruct Timer Mixin** - Adds disappearing message functionality. * * Some messages can be configured to automatically delete after being viewed, * providing ephemeral messaging capabilities. This mixin handles the timer * configuration for such messages. * * **Compatible Message Types:** * - PhotoMessage (disappearing photos) * - DocumentMessage (disappearing files) * - VideoMessage (disappearing videos - rare) * * **Privacy & Security Features:** * - Automatic deletion after viewing * - Configurable timer duration * - Enhanced privacy for sensitive content * - No permanent storage requirement * * **Timer Behavior:** * - undefined: Permanent message (never disappears) * - number: Seconds until auto-deletion after first view * * @param raw - Raw message data with self_destruct_period_seconds field * @returns SelfDestructibleMessage with timer configuration * * @example * ```typescript * const selfDestruct = parseSelfDestructibleMessage(raw) * * if (selfDestruct.selfDestructPeriodSeconds) { * console.log(`⏰ Disappears in ${selfDestruct.selfDestructPeriodSeconds} seconds`) * * // Set up auto-deletion timer * setTimeout(() => { * deleteMessage(messageId) * }, selfDestruct.selfDestructPeriodSeconds * 1000) * } else { * console.log('📌 Permanent message') * } * ``` */ const parseSelfDestructibleMessage = (raw) => ({ selfDestructPeriodSeconds: raw.self_destruct_period_seconds }); exports.parseSelfDestructibleMessage = parseSelfDestructibleMessage; //# sourceMappingURL=base.js.map