UNPKG

@stack.thefennec.dev/telegram-export-parser

Version:

TypeScript library for parsing Telegram Desktop's data export with full type safety

297 lines 11.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.processTextEntities = exports.cleanupMarkdown = exports.ENTITY_PARSERS = void 0; const types_1 = require("../types"); const parser_1 = require("../core/parser"); const base_1 = require("./base"); const actors_1 = require("./actors"); // ===================================================== // UTILITIES - IMMUTABLE PURE FUNCTIONS // ===================================================== const getUrlForType = (type, text) => { switch (type) { case types_1.TEXT_ENTITY_TYPES.HASHTAG: case types_1.TEXT_ENTITY_TYPES.CASHTAG: case types_1.TEXT_ENTITY_TYPES.BOT_COMMAND: return `#${text}`; case types_1.TEXT_ENTITY_TYPES.EMAIL: return `mailto:${text}`; case types_1.TEXT_ENTITY_TYPES.PHONE: return `tel:${text.replace(/\s+/g, '')}`; case types_1.TEXT_ENTITY_TYPES.MENTION: return `https://t.me/${text}`; case types_1.TEXT_ENTITY_TYPES.MENTION_NAME: return `tg://user?id=${text}`; case types_1.TEXT_ENTITY_TYPES.LINK: default: return text; } }; const escapeHtml = (text) => text .replace(/&/g, '&amp;') .replace(/</g, '&lt;') .replace(/>/g, '&gt;') .replace(/"/g, '&quot;') .replace(/'/g, '&#39;'); const createBaseEntity = (raw, converter) => ({ text: raw.text, toMarkdown: () => converter.toMarkdown(raw), toHTML: () => converter.toHTML(raw) }); // ===================================================== // TEXT ENTITY PARSERS // ===================================================== const plainTextEntityParser = (0, parser_1.createParser)({ name: 'plain-text-entity', priority: 10, // Lowest priority - fallback canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.PLAIN, parse: (raw) => createBaseEntity(raw, { toMarkdown: (entity) => entity.text, toHTML: (entity) => escapeHtml(entity.text) }) }); const boldTextEntityParser = (0, parser_1.createParser)({ name: 'bold-text-entity', priority: 80, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.BOLD, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => `**${entity.text}**`, toHTML: (entity) => `<strong>${escapeHtml(entity.text)}</strong>` }), type: types_1.TEXT_ENTITY_TYPES.BOLD }) }); const italicTextEntityParser = (0, parser_1.createParser)({ name: 'italic-text-entity', priority: 75, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.ITALIC, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => `*${entity.text}*`, toHTML: (entity) => `<em>${escapeHtml(entity.text)}</em>` }), type: types_1.TEXT_ENTITY_TYPES.ITALIC }) }); const underlineTextEntityParser = (0, parser_1.createParser)({ name: 'underline-text-entity', priority: 70, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.UNDERLINE, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => `__${entity.text}__`, toHTML: (entity) => `<u>${escapeHtml(entity.text)}</u>` }), type: types_1.TEXT_ENTITY_TYPES.UNDERLINE }) }); const strikethroughTextEntityParser = (0, parser_1.createParser)({ name: 'strikethrough-text-entity', priority: 65, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.STRIKETHROUGH, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => `~~${entity.text}~~`, toHTML: (entity) => `<del>${escapeHtml(entity.text)}</del>` }), type: types_1.TEXT_ENTITY_TYPES.STRIKETHROUGH }) }); const spoilerTextEntityParser = (0, parser_1.createParser)({ name: 'spoiler-text-entity', priority: 60, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.SPOILER, parse: (raw) => createBaseEntity(raw, { toMarkdown: (entity) => `||${entity.text}||`, toHTML: (entity) => `<span class="spoiler">${escapeHtml(entity.text)}</span>` }) }); const codeTextEntityParser = (0, parser_1.createParser)({ name: 'code-text-entity', priority: 85, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.CODE, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => `\`${entity.text}\``, toHTML: (entity) => `<code>${escapeHtml(entity.text)}</code>` }), type: types_1.TEXT_ENTITY_TYPES.CODE, language: raw.language }) }); const preTextEntityParser = (0, parser_1.createParser)({ name: 'pre-text-entity', priority: 90, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.PRE, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => { const language = entity.language?.trim() ?? ''; return language ? `\`\`\`${language}\n${entity.text}\n\`\`\`` : `\`\`\`\n${entity.text}\n\`\`\``; }, toHTML: (entity) => { const language = entity.language?.trim() ?? ''; const className = language ? ` class="language-${language}"` : ''; return `<pre><code${className}>${escapeHtml(entity.text)}</code></pre>`; } }), type: types_1.TEXT_ENTITY_TYPES.PRE, language: raw.language }) }); const customEmojiEntityParser = (0, parser_1.createParser)({ name: 'custom-emoji-entity', priority: 55, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.CUSTOM_EMOJI, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => entity.document_id ? `${entity.text} <!-- custom emoji: ${entity.document_id} -->` : entity.text, toHTML: (entity) => entity.document_id ? `<span class="custom-emoji" data-document-id="${escapeHtml(entity.document_id)}">${escapeHtml(entity.text)}</span>` : escapeHtml(entity.text) }), type: types_1.TEXT_ENTITY_TYPES.CUSTOM_EMOJI, documentURL: (0, base_1.parseExportedFile)(raw.document_id) }) }); const blockquoteEntityParser = (0, parser_1.createParser)({ name: 'blockquote-entity', priority: 50, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.BLOCKQUOTE, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => { const quoteLines = entity.text.split('\n'); const quotePrefix = entity.collapsed ? '**> ' : '> '; return quoteLines .map((line, index) => index === 0 ? `${quotePrefix}${line}` : `> ${line}`) .join('\n'); }, toHTML: (entity) => { const className = entity.collapsed ? 'blockquote collapsed' : 'blockquote'; return `<blockquote class="${className}">${escapeHtml(entity.text)}</blockquote>`; } }), type: types_1.TEXT_ENTITY_TYPES.BLOCKQUOTE, collapsed: raw.collapsed ?? false }) }); const bankCardEntityParser = (0, parser_1.createParser)({ name: 'bank-card-entity', priority: 45, canHandle: (raw) => raw.type === types_1.TEXT_ENTITY_TYPES.BANK_CARD, parse: (raw) => ({ ...createBaseEntity(raw, { toMarkdown: (entity) => { const formattedCard = entity.text.replace(/(\d{4})(?=\d)/g, '$1 '); return `\`${formattedCard}\``; }, toHTML: (entity) => { const formattedCard = entity.text.replace(/(\d{4})(?=\d)/g, '$1 '); return `<code class="bank-card">${escapeHtml(formattedCard)}</code>`; } }), type: types_1.TEXT_ENTITY_TYPES.BANK_CARD }) }); // ===================================================== // LINK ENTITY PARSERS // ===================================================== const linkConverter = { toMarkdown: (entity) => { const url = entity.href ?? getUrlForType(entity.type, entity.text); return `[${entity.text}](${url})`; }, toHTML: (entity) => { const url = escapeHtml(entity.href ?? getUrlForType(entity.type, entity.text)); return `<a href="${url}">${escapeHtml(entity.text)}</a>`; } }; const createLinkEntityParser = (name, type, priority) => (0, parser_1.createParser)({ name, priority, canHandle: (raw) => raw.type === type, parse: (raw) => ({ ...createBaseEntity(raw, linkConverter), type: raw.type, url: raw.href ?? getUrlForType(raw.type, raw.text) }) }); const textLinkEntityParser = createLinkEntityParser('text-link-entity', types_1.TEXT_ENTITY_TYPES.TEXT_LINK, 95); const linkEntityParser = createLinkEntityParser('link-entity', types_1.TEXT_ENTITY_TYPES.LINK, 40); const hashtagEntityParser = createLinkEntityParser('hashtag-entity', types_1.TEXT_ENTITY_TYPES.HASHTAG, 35); const cashtagEntityParser = createLinkEntityParser('cashtag-entity', types_1.TEXT_ENTITY_TYPES.CASHTAG, 30); const botCommandEntityParser = createLinkEntityParser('bot-command-entity', types_1.TEXT_ENTITY_TYPES.BOT_COMMAND, 25); const emailEntityParser = createLinkEntityParser('email-entity', types_1.TEXT_ENTITY_TYPES.EMAIL, 20); const phoneEntityParser = createLinkEntityParser('phone-entity', types_1.TEXT_ENTITY_TYPES.PHONE, 15); // ===================================================== // MENTION ENTITY PARSERS // ===================================================== const createMentionEntityParser = (name, type, priority) => (0, parser_1.createParser)({ name, priority, canHandle: (raw) => raw.type === type, parse: (raw) => ({ ...createBaseEntity(raw, linkConverter), type: raw.type, mention: (0, actors_1.parseUsernameMention)(raw.text, raw.user_id) }) }); const mentionEntityParser = createMentionEntityParser('mention-entity', types_1.TEXT_ENTITY_TYPES.MENTION, 85); const mentionNameEntityParser = createMentionEntityParser('mention-name-entity', types_1.TEXT_ENTITY_TYPES.MENTION_NAME, 80); // ===================================================== // EXPORTS // ===================================================== exports.ENTITY_PARSERS = [ // High priority - specific formatting textLinkEntityParser, preTextEntityParser, mentionEntityParser, mentionNameEntityParser, codeTextEntityParser, boldTextEntityParser, italicTextEntityParser, underlineTextEntityParser, strikethroughTextEntityParser, spoilerTextEntityParser, customEmojiEntityParser, blockquoteEntityParser, bankCardEntityParser, // Medium priority - links and commands linkEntityParser, hashtagEntityParser, cashtagEntityParser, botCommandEntityParser, emailEntityParser, phoneEntityParser, // Lowest priority - fallback plainTextEntityParser ]; // ===================================================== // UTILITY FUNCTIONS FOR PROCESSING // ===================================================== const cleanupMarkdown = (text) => text .replace(/[ \t]+/g, ' ') .replace(/\n{4,}/g, '\n\n\n') .replace(/`\s+/g, '` ') .replace(/\s+`/g, ' `') .replace(/\*\*\s+/g, '**') .replace(/\s+\*\*/g, '**') .replace(/\*\s+/g, '*') .replace(/\s+\*/g, '*') .split('\n') .map(line => line.trim()) .join('\n') .trim(); exports.cleanupMarkdown = cleanupMarkdown; const processTextEntities = (entities) => ({ markdown: (0, exports.cleanupMarkdown)(entities.map(entity => entity.toMarkdown()).join('')), html: entities.map(entity => entity.toHTML()).join('') }); exports.processTextEntities = processTextEntities; //# sourceMappingURL=text-entities.js.map