@stack.thefennec.dev/telegram-export-parser
Version:
TypeScript library for parsing Telegram Desktop's data export with full type safety
587 lines • 21.4 kB
JavaScript
;
/**
* @fileoverview Base parsing utilities for Telegram export message processing.
*
* This module provides the foundational parsing functions that implement a **mixin-based architecture**
* for building complex message types from simple, composable components. The design allows for
* flexible message construction by combining base message properties with specialized mixins.
*
* ## Mixin Architecture Overview
*
* The parser uses a sophisticated mixin system where different message aspects are parsed
* separately and then combined to create complete message objects:
*
* ```
* BaseMessage (core properties)
* ├── + MediaWithDimensions (width, height)
* ├── + MediaWithDuration (audio/video length)
* ├── + MediaWithThumbnail (preview images)
* ├── + SelfDestructibleMessage (disappearing messages)
* └── = Complete Message Type (e.g., VideoMessage)
* ```
*
* ## Message Composition Examples
*
* **Video Message Construction:**
* ```typescript
* // Start with base message properties
* const base = parseBaseMessage(rawData)
* const mediaBase = parseBaseMediaMessage(rawData, base)
*
* // Add video-specific mixins
* const dimensions = parseMediaWithDimensions(rawData)
* const duration = parseMediaWithDuration(rawData)
* const thumbnail = parseMediaWithThumbnail(rawData)
*
* // Combine into complete VideoMessage
* const videoMessage = { ...mediaBase, ...dimensions, ...duration, ...thumbnail }
* ```
*
* **Photo Message Construction:**
* ```typescript
* const base = parseBaseMessage(rawData)
* const dimensions = parseMediaWithDimensions(rawData)
* const selfDestruct = parseSelfDestructibleMessage(rawData)
*
* // Photos use different structure than media messages
* const photoMessage = { ...base, ...dimensions, ...selfDestruct, photoURL, photoFileSize }
* ```
*
* This approach provides maximum flexibility while maintaining type safety and code reuse.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.parseSelfDestructibleMessage = exports.parseMediaWithDuration = exports.parseMediaWithThumbnail = exports.parseMediaWithDimensions = exports.parseBaseMediaMessage = exports.parseBaseEvent = exports.parseBaseMessage = exports.parseInvoiceInfo = exports.parseReaction = exports.parseExportedFile = exports.parseMembers = exports.parseEditedDate = exports.parseDate = void 0;
const path_1 = require("path");
const url_1 = require("url");
const index_1 = require("./index");
const actors_1 = require("./actors");
const types_1 = require("../types");
// =====================================================
// CORE DATA TRANSFORMATION UTILITIES
// =====================================================
/**
* Transforms Telegram date formats into standardized Date objects.
*
* Telegram exports contain dates in multiple formats for accuracy and compatibility.
* This function prioritizes Unix timestamps (more precise) over string dates.
*
* @param date - Human-readable date string (fallback format)
* @param dateUnixtime - Unix timestamp string (preferred for accuracy)
* @returns Standardized Date object
*
* @example
* ```typescript
* // Prefer Unix timestamp when available
* const precise = parseDate('2023-12-01 15:30:00', '1701432600')
*
* // Fallback to string parsing
* const fallback = parseDate('2023-12-01 15:30:00')
* ```
*/
const parseDate = (date, dateUnixtime) => {
if (dateUnixtime) {
const timestamp = parseInt(dateUnixtime, 10);
if (!isNaN(timestamp)) {
return new Date(timestamp * 1000);
}
}
return new Date(date);
};
exports.parseDate = parseDate;
/**
* Parses message edit timestamps with proper null handling.
*
* Edit timestamps are only present when a message has been modified.
* This function maintains the distinction between "never edited" (undefined)
* and "edited at a specific time" (Date object).
*
* @param editedUnixtime - Unix timestamp when message was last edited
* @returns Date if message was edited, undefined if never edited
*
* @example
* ```typescript
* const editTime = parseEditedDate(1701432600) // Date object
* const neverEdited = parseEditedDate(undefined) // undefined
*
* if (editTime) {
* console.log(`Message edited at ${editTime.toISOString()}`)
* }
* ```
*/
const parseEditedDate = (editedUnixtime) => {
if (!editedUnixtime)
return undefined;
return new Date(editedUnixtime * 1000);
};
exports.parseEditedDate = parseEditedDate;
/**
* Transforms member name arrays into structured Actor objects.
*
* Used for service events like group member additions/removals where
* only display names are available. Leverages actor parsing to create
* proper Actor objects with type detection.
*
* @param members - Array of member display names
* @returns Array of successfully parsed Actor objects
*
* @example
* ```typescript
* const memberNames = ['John Doe', 'Jane Smith', 'Helper Bot']
* const actors = parseMembers(memberNames)
* // Returns Actor objects with detected types (user, bot, etc.)
* ```
*/
const parseMembers = (members) => {
if (!members)
return [];
return members
.map(member => (0, actors_1.parseMessageSender)(member, undefined))
.filter((actor) => actor !== null);
};
exports.parseMembers = parseMembers;
/**
* Transforms file paths into standardized ExportedFile references.
*
* Telegram exports handle files in three distinct states, each requiring
* different representation in the parsed output for proper application handling.
*
* **File States:**
* - **Downloaded**: File exists locally → URL object for file access
* - **Not Included**: File excluded from export → null (explicit absence)
* - **No File**: No file was attached → undefined (no file field)
*
* @param filePath - Raw file path string from export
* @param basePath - Base directory for resolving relative paths
* @returns ExportedFile with appropriate state representation
*
* @example
* ```typescript
* // Downloaded file
* parseExportedFile('photos/image.jpg') // file:///path/to/export/photos/image.jpg
*
* // File not included in export
* parseExportedFile('File not included, change data exporting settings to download.') // null
*
* // No file attached
* parseExportedFile(undefined) // undefined
* ```
*/
const parseExportedFile = (filePath, basePath = process.cwd()) => {
if (!filePath?.trim())
return undefined;
if (filePath.includes('File not included')) {
return null;
}
const fullPath = (0, path_1.join)(basePath, filePath.trim());
return (0, url_1.pathToFileURL)(fullPath);
};
exports.parseExportedFile = parseExportedFile;
/**
* Transforms raw reaction data into structured objects with actor resolution.
*
* Reactions contain emoji identifiers, usage counts, and recent user activity.
* This function resolves user references and handles both standard and custom emoji.
*
* @param raw - Raw reaction data from Telegram export
* @returns Complete Reaction object with resolved user references
*
* @example
* ```typescript
* const reaction = parseReaction({
* emoji: '👍',
* count: 5,
* recent: [
* { from: 'John', from_id: '123', date: '2023-12-01' },
* { from: 'Jane', from_id: '456', date: '2023-12-01' }
* ]
* })
*
* console.log(`${reaction.emoji}: ${reaction.count} reactions`)
* reaction.recent.forEach(r => console.log(`${r.sender.displayName} reacted`))
* ```
*/
const parseReaction = (raw) => ({
emoji: raw.emoji ?? '',
documentURL: (0, exports.parseExportedFile)(raw.document_id),
count: raw.count,
recent: (raw.recent ?? []).map(recentReaction => {
return {
sender: (0, actors_1.parseMessageSender)(recentReaction.from, recentReaction.from_id.toString()),
date: (0, exports.parseDate)(recentReaction.date)
};
})
});
exports.parseReaction = parseReaction;
/**
* Parses invoice data with format flexibility and error resilience.
*
* Invoice information can appear as JSON strings or pre-parsed objects.
* This function handles both formats with graceful degradation for malformed data.
*
* @param invoiceStr - Invoice data in various formats
* @returns Parsed InvoiceInfo or undefined if no valid invoice data
*
* @example
* ```typescript
* // JSON string format
* const invoice1 = parseInvoiceInfo('{"title":"Premium","amount":999,"currency":"USD"}')
*
* // Object format
* const invoice2 = parseInvoiceInfo({ title: 'Service', amount: 1500, currency: 'EUR' })
*
* // Invalid data - graceful fallback
* const invoice3 = parseInvoiceInfo('invalid json')
* // Returns fallback invoice with description set to original string
* ```
*/
const parseInvoiceInfo = (invoiceStr) => {
if (!invoiceStr)
return undefined;
// If already an object, return as-is
if (typeof invoiceStr === 'object')
return invoiceStr;
try {
return JSON.parse(invoiceStr);
}
catch {
return {
title: 'Unknown Invoice',
description: invoiceStr,
currency: 'USD',
amount: 0
};
}
};
exports.parseInvoiceInfo = parseInvoiceInfo;
// =====================================================
// BASE MESSAGE & EVENT CONSTRUCTORS
// =====================================================
/**
* **Core Foundation Parser** - Constructs BaseMessage with all universal properties.
*
* This is the **primary building block** for all user messages in the mixin architecture.
* It handles the complex parsing of actors, text entities, reactions, and metadata
* that every message type shares, regardless of content.
*
* **Parsed Components:**
* - **Identity**: Message ID, type, timestamps
* - **Actors**: Sender, forwarded/saved sources, via bots
* - **Content**: Text entities with rich formatting
* - **Interactions**: Reactions, replies, inline buttons
* - **Metadata**: Edit history, forwarding chains
*
* **Mixin Usage Pattern:**
* ```typescript
* // Start with base properties
* const base = parseBaseMessage(rawData)
*
* // Add type-specific mixins
* const withMedia = parseBaseMediaMessage(rawData, base)
* const withDimensions = parseMediaWithDimensions(rawData)
*
* // Combine into complete message type
* const videoMessage: VideoMessage = { ...withMedia, ...withDimensions, mediaType: 'video_file' }
* ```
*
* @param raw - Complete raw message data from Telegram export
* @returns BaseMessage with all fundamental properties parsed and actors resolved
*
* @example
* ```typescript
* const base = parseBaseMessage({
* id: 12345,
* from: 'John Doe',
* from_id: 'user987654321',
* date: '2023-12-01 15:30:00',
* text_entities: [{ type: 'plain', text: 'Hello world!' }],
* reactions: [{ emoji: '👍', count: 5 }]
* })
*
* console.log(`Message ${base.id} from ${base.sender.displayName}`)
* console.log(`Reactions: ${base.reactions.length}`)
* ```
*/
const parseBaseMessage = (raw) => {
// Parse all possible actors from the raw message
const sender = (0, actors_1.parseMessageSender)(raw.from, raw.from_id);
const forwardedFrom = (0, actors_1.parseMessageSender)(raw.forwarded_from, undefined) ?? undefined;
const savedFrom = (0, actors_1.parseMessageSender)(raw.saved_from, undefined) ?? undefined;
const viaBot = (0, actors_1.parseViaBot)(raw.via_bot) ?? undefined;
return {
id: BigInt(raw.id),
type: types_1.MESSAGE_TYPES.MESSAGE,
sentAt: (0, exports.parseDate)(raw.date),
sender,
textEntities: (0, index_1.parseTextEntities)(raw.text_entities),
forwardedFrom,
savedFrom,
replyToMessageId: raw.reply_to_message_id, // TODO: new type here, message reference or something
replyToPeerId: raw.reply_to_peer_id ? parseInt(raw.reply_to_peer_id, 10) : undefined,
edited: raw.edited ?? false,
editedAt: (0, exports.parseEditedDate)(raw.edited_unixtime),
reactions: raw.reactions?.map(exports.parseReaction) ?? [],
inlineBotButtons: raw.inline_bot_buttons,
viaBot
};
};
exports.parseBaseMessage = parseBaseMessage;
/**
* **Service Event Foundation Parser** - Constructs BaseEvent for system messages.
*
* Service events represent system-generated messages (calls, joins, pins, etc.)
* rather than user content. This parser creates the foundation for all event types
* with appropriate actor resolution and minimal required properties.
*
* **Key Differences from BaseMessage:**
* - Uses service actor parsing (actor/actor_id fields)
* - Simpler property set (no replies, reactions, etc.)
* - Different timestamp handling for service events
* - Focuses on action performer rather than message sender
*
* @param raw - Raw service message data from Telegram export
* @returns BaseEvent with core service message properties
*
* @example
* ```typescript
* const event = parseBaseEvent({
* id: 12346,
* actor: 'Admin User',
* actor_id: 'user123456789',
* action: 'invite_members',
* date: '2023-12-01 15:45:00'
* })
*
* console.log(`${event.actor.displayName} performed action at ${event.date}`)
* ```
*/
const parseBaseEvent = (raw) => {
return {
id: raw.id,
type: types_1.MESSAGE_TYPES.SERVICE,
date: (0, exports.parseDate)(raw.date, raw.date_unixtime),
actor: (0, actors_1.parseServiceActor)(raw.actor, raw.actor_id),
textEntities: (0, index_1.parseTextEntities)(raw.text_entities)
};
};
exports.parseBaseEvent = parseBaseEvent;
// =====================================================
// MEDIA MESSAGE MIXINS
// =====================================================
/**
* **Media Foundation Mixin** - Extends BaseMessage with file attachment properties.
*
* This mixin transforms a BaseMessage into a BaseMediaMessage by adding file-related
* properties. Used as the foundation for all message types that contain downloadable files
* (videos, audio, documents, stickers, animations).
*
* **Added Properties:**
* - **fileURL**: Parsed file reference for access
* - **fileName**: Original filename from upload
* - **fileSize**: File size in bytes
* - **mimeType**: Content type for proper handling
*
* **Mixin Combination Pattern:**
* ```typescript
* const base = parseBaseMessage(raw) // Core message properties
* const media = parseBaseMediaMessage(raw, base) // + File properties
* const duration = parseMediaWithDuration(raw) // + Duration for video/audio
*
* const videoMessage = { ...media, ...duration, mediaType: 'video_file' }
* ```
*
* @param raw - Raw message data containing file information
* @param base - BaseMessage to extend with media properties
* @returns BaseMediaMessage combining message and file properties
*
* @example
* ```typescript
* const baseMsg = parseBaseMessage(rawData)
* const mediaMsg = parseBaseMediaMessage(rawData, baseMsg)
*
* if (mediaMsg.fileURL) {
* console.log(`File: ${mediaMsg.fileName} (${mediaMsg.fileSize} bytes)`)
* // Access file at mediaMsg.fileURL
* }
* ```
*/
const parseBaseMediaMessage = (raw, base) => ({
...base,
fileURL: (0, exports.parseExportedFile)(raw.file),
fileName: raw.file_name,
fileSize: raw.file_size,
mimeType: raw.mime_type
});
exports.parseBaseMediaMessage = parseBaseMediaMessage;
// =====================================================
// SPECIALIZED MEDIA PROPERTY MIXINS
// =====================================================
/**
* **Visual Dimensions Mixin** - Adds width/height properties for visual media.
*
* This mixin extracts dimensional information for media that has measurable size.
* Essential for proper display scaling, aspect ratio calculation, and UI layout.
*
* **Compatible Message Types:**
* - PhotoMessage (image dimensions)
* - VideoMessage (video resolution)
* - AnimationMessage (GIF/animation size)
* - StickerMessage (sticker dimensions)
* - DocumentMessage (when document is an image)
*
* **Mixin Application:**
* ```typescript
* // For photos (direct on BaseMessage)
* const photo = { ...parseBaseMessage(raw), ...parseMediaWithDimensions(raw) }
*
* // For videos (combined with media base)
* const video = {
* ...parseBaseMediaMessage(raw, base),
* ...parseMediaWithDimensions(raw),
* ...parseMediaWithDuration(raw)
* }
* ```
*
* @param raw - Raw message data with width/height fields
* @returns MediaWithDimensions containing visual size properties
*
* @example
* ```typescript
* const dimensions = parseMediaWithDimensions(raw)
*
* if (dimensions.width && dimensions.height) {
* const aspectRatio = dimensions.width / dimensions.height
* console.log(`Media: ${dimensions.width}x${dimensions.height} (${aspectRatio.toFixed(2)}:1)`)
* }
* ```
*/
const parseMediaWithDimensions = (raw) => ({
height: raw.height,
width: raw.width
});
exports.parseMediaWithDimensions = parseMediaWithDimensions;
/**
* **Thumbnail Preview Mixin** - Adds thumbnail properties for media with preview images.
*
* Many media types include small preview images for efficient loading and display
* before the full media is accessed. This mixin handles thumbnail file references
* and metadata.
*
* **Compatible Message Types:**
* - VideoMessage (video preview frame)
* - AnimationMessage (first frame preview)
* - DocumentMessage (document preview)
* - MusicMessage (album art thumbnail)
* - VideoNoteMessage (circular video preview)
*
* **Performance Benefits:**
* - Quick preview loading for better UX
* - Reduced bandwidth for media galleries
* - Fallback display when full media unavailable
*
* @param raw - Raw message data with thumbnail fields
* @returns MediaWithThumbnail containing preview image properties
*
* @example
* ```typescript
* const thumbnail = parseMediaWithThumbnail(raw)
*
* // Show thumbnail while loading full video
* if (thumbnail.thumbnailURL) {
* console.log(`Thumbnail: ${thumbnail.thumbnailURL}`)
* console.log(`Thumbnail size: ${thumbnail.thumbnailFileSize} bytes`)
* }
* ```
*/
const parseMediaWithThumbnail = (raw) => ({
thumbnailURL: (0, exports.parseExportedFile)(raw.thumbnail),
thumbnailFileSize: raw.thumbnail_file_size
});
exports.parseMediaWithThumbnail = parseMediaWithThumbnail;
/**
* **Duration Timing Mixin** - Adds playback duration for time-based media.
*
* Time-based media (audio, video, voice notes) require duration information
* for proper playback controls, progress bars, and user interface elements.
* This mixin extracts and standardizes duration data.
*
* **Compatible Message Types:**
* - VideoMessage (video length)
* - VideoNoteMessage (video note duration)
* - MusicMessage (song/track length)
* - VoiceNoteMessage (voice recording length)
* - AnimationMessage (GIF/animation duration)
* - StickerMessage (animated sticker length)
*
* **Duration Applications:**
* - Media player progress bars
* - Playback time displays
* - Auto-advance in playlists
* - Media filtering by length
*
* @param raw - Raw message data with duration_seconds field
* @returns MediaWithDuration containing playback length information
*
* @example
* ```typescript
* const duration = parseMediaWithDuration(raw)
*
* const minutes = Math.floor(duration.durationSeconds / 60)
* const seconds = duration.durationSeconds % 60
* console.log(`Duration: ${minutes}:${seconds.toString().padStart(2, '0')}`)
*
* // Use for media controls
* if (duration.durationSeconds > 0) {
* setupProgressBar(duration.durationSeconds)
* }
* ```
*/
const parseMediaWithDuration = (raw) => ({
durationSeconds: raw.duration_seconds ?? 0
});
exports.parseMediaWithDuration = parseMediaWithDuration;
/**
* **Self-Destruct Timer Mixin** - Adds disappearing message functionality.
*
* Some messages can be configured to automatically delete after being viewed,
* providing ephemeral messaging capabilities. This mixin handles the timer
* configuration for such messages.
*
* **Compatible Message Types:**
* - PhotoMessage (disappearing photos)
* - DocumentMessage (disappearing files)
* - VideoMessage (disappearing videos - rare)
*
* **Privacy & Security Features:**
* - Automatic deletion after viewing
* - Configurable timer duration
* - Enhanced privacy for sensitive content
* - No permanent storage requirement
*
* **Timer Behavior:**
* - undefined: Permanent message (never disappears)
* - number: Seconds until auto-deletion after first view
*
* @param raw - Raw message data with self_destruct_period_seconds field
* @returns SelfDestructibleMessage with timer configuration
*
* @example
* ```typescript
* const selfDestruct = parseSelfDestructibleMessage(raw)
*
* if (selfDestruct.selfDestructPeriodSeconds) {
* console.log(`⏰ Disappears in ${selfDestruct.selfDestructPeriodSeconds} seconds`)
*
* // Set up auto-deletion timer
* setTimeout(() => {
* deleteMessage(messageId)
* }, selfDestruct.selfDestructPeriodSeconds * 1000)
* } else {
* console.log('📌 Permanent message')
* }
* ```
*/
const parseSelfDestructibleMessage = (raw) => ({
selfDestructPeriodSeconds: raw.self_destruct_period_seconds
});
exports.parseSelfDestructibleMessage = parseSelfDestructibleMessage;
//# sourceMappingURL=base.js.map