UNPKG

convokit

Version:

A flexible TypeScript framework for ingesting, processing, and exporting chat/conversation data for LLM training and analysis.

176 lines 6.85 kB
import { ProviderRegistry, getConfig, loadConfig, ConvoKitLogging as ckl } from '../index.js'; // Ensure configuration is loaded before defining provider logic that might depend on it. await loadConfig(); function checkIfCompatible(chat_data) { if (!chat_data || typeof chat_data !== 'object') { ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data is not an object'); return false; } if (typeof chat_data.name !== 'string' && chat_data.name !== null) { ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.name is neither a string nor null'); return false; } if (typeof chat_data.type !== 'string') { ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.type is not a string'); return false; } if (typeof chat_data.id !== 'number') { ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.id is not a string'); return false; } if (!Array.isArray(chat_data.messages)) { ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.messages is not an array'); return false; } if (chat_data.messages.length === 0) { ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.messages is empty'); return false; } for (const message of chat_data.messages) { if (!message || typeof message !== 'object') { ckl.warn("Provider: Telegram", ' failed: a message is not an object'); return false; } if (typeof message.id !== 'number') { ckl.debug(`checkIfCompatible failed: message.id is not a string (got: ${message.id})`); return false; } if (typeof message.type !== 'string') { ckl.debug(`checkIfCompatible failed: message.type is not a string (got: ${message.type})`); return false; } try { new Date(message.date); } catch (e) { ckl.debug(`checkIfCompatible failed: message.date is not a valid date (got: ${message.date})`); ckl.error("Provider: Telegram", e); return false; } if (typeof message.date_unixtime !== 'string') { ckl.warn("Provider: Telegram", ' failed: message.date_unixtime is not a number'); return false; } if (message.type === 'message') { if (typeof message.from !== 'string' && message.from !== null) { ckl.warn("Provider: Telegram", ' failed: message.from is neither a string nor null'); return false; } if (typeof message.from_id !== 'string') { ckl.warn("Provider: Telegram", ' failed: message.from_id is not a string'); return false; } } else if (message.type === 'service') { if (message.actor && typeof message.actor !== 'string') { ckl.warn("Provider: Telegram", ' failed: message.actor is not a string'); return false; } if (message.actor_id && typeof message.actor_id !== 'string') { ckl.warn("Provider: Telegram", ' failed: message.actor_id is not a string'); return false; } if (message.action && typeof message.action !== 'string') { ckl.warn("Provider: Telegram", ' failed: message.action is not a string'); return false; } } if (typeof message.text !== 'string') { ckl.warn("Provider: Telegram", ' failed: message.text is not a string'); return false; } if (!Array.isArray(message.text_entities)) { ckl.warn("Provider: Telegram", ' failed: message.text_entities is not an array'); return false; } for (const entity of message.text_entities) { if (typeof entity.type !== 'string') { ckl.warn("Provider: Telegram", ' failed: an entity.type is not a string'); return false; } if (typeof entity.text !== 'string') { ckl.warn("Provider: Telegram", ' failed: an entity.text is not a string'); return false; } } } return true; } function getSenderAndReceiverInfo(chat_data) { const expectedSenderName = chat_data.name; let senderId = null; let receiverId = null; let receiverName = null; for (const message of chat_data.messages) { if (message.from === expectedSenderName || message.from === null && expectedSenderName === null) { senderId = message.from_id; } if (message.from !== expectedSenderName && message.from !== null) { receiverId = message.from_id; receiverName = message.from; } } return { sender: { id: senderId, name: expectedSenderName, }, receiver: { id: receiverId, name: receiverName, } }; } function convertToConvoKitFormat(chat_data) { let conversationId = chat_data.id.toString(); if (getConfig().anonymizeProviderConversationIds) { conversationId = crypto.randomUUID().replace(/-/g, '').slice(0, 16); } const { sender, receiver } = getSenderAndReceiverInfo(chat_data); const messages = chat_data.messages.map((message) => ({ timestamp: new Date(message.date), message: message.text, author: { id: message.from_id, name: message.from, nickname: message.from || null, } })); const metadata = { conversationId: conversationId, exportedAt: null, messageCount: chat_data.messages.length, messageSenderId: sender.id || null, messageSenderName: sender.name || null, messageReceiverId: receiver.id || null, messageReceiverName: receiver.name || null, providerId: 'telegram', }; return { metadata, messages, }; } export const ProviderInfo = { name: "Telegram", description: "Telegram chat data exported using Telegram Desktop (JSON). Will read from the Telegram folder.", version: "1.0.0", author: "ConvoKit", InputDataInfo: { fileExtension: ".json", directoryName: "Telegram", } }; export class Provider { Data = null; constructor(chat_data) { this.Data = chat_data; } ProviderInfo = ProviderInfo; Test() { return checkIfCompatible(this.Data); } Convert() { return convertToConvoKitFormat(this.Data); } } // Self-register the provider ProviderRegistry.register('telegram', Provider, ProviderInfo); //# sourceMappingURL=telegram.js.map