convokit
Version:
A flexible TypeScript framework for ingesting, processing, and exporting chat/conversation data for LLM training and analysis.
176 lines • 6.85 kB
JavaScript
import { ProviderRegistry, getConfig, loadConfig, ConvoKitLogging as ckl } from '../index.js';
// Ensure configuration is loaded before defining provider logic that might depend on it.
await loadConfig();
function checkIfCompatible(chat_data) {
if (!chat_data || typeof chat_data !== 'object') {
ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data is not an object');
return false;
}
if (typeof chat_data.name !== 'string' && chat_data.name !== null) {
ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.name is neither a string nor null');
return false;
}
if (typeof chat_data.type !== 'string') {
ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.type is not a string');
return false;
}
if (typeof chat_data.id !== 'number') {
ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.id is not a string');
return false;
}
if (!Array.isArray(chat_data.messages)) {
ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.messages is not an array');
return false;
}
if (chat_data.messages.length === 0) {
ckl.warn("Provider: Telegram", ' Telegram data not compatible! - chat_data.messages is empty');
return false;
}
for (const message of chat_data.messages) {
if (!message || typeof message !== 'object') {
ckl.warn("Provider: Telegram", ' failed: a message is not an object');
return false;
}
if (typeof message.id !== 'number') {
ckl.debug(`checkIfCompatible failed: message.id is not a string (got: ${message.id})`);
return false;
}
if (typeof message.type !== 'string') {
ckl.debug(`checkIfCompatible failed: message.type is not a string (got: ${message.type})`);
return false;
}
try {
new Date(message.date);
}
catch (e) {
ckl.debug(`checkIfCompatible failed: message.date is not a valid date (got: ${message.date})`);
ckl.error("Provider: Telegram", e);
return false;
}
if (typeof message.date_unixtime !== 'string') {
ckl.warn("Provider: Telegram", ' failed: message.date_unixtime is not a number');
return false;
}
if (message.type === 'message') {
if (typeof message.from !== 'string' && message.from !== null) {
ckl.warn("Provider: Telegram", ' failed: message.from is neither a string nor null');
return false;
}
if (typeof message.from_id !== 'string') {
ckl.warn("Provider: Telegram", ' failed: message.from_id is not a string');
return false;
}
}
else if (message.type === 'service') {
if (message.actor && typeof message.actor !== 'string') {
ckl.warn("Provider: Telegram", ' failed: message.actor is not a string');
return false;
}
if (message.actor_id && typeof message.actor_id !== 'string') {
ckl.warn("Provider: Telegram", ' failed: message.actor_id is not a string');
return false;
}
if (message.action && typeof message.action !== 'string') {
ckl.warn("Provider: Telegram", ' failed: message.action is not a string');
return false;
}
}
if (typeof message.text !== 'string') {
ckl.warn("Provider: Telegram", ' failed: message.text is not a string');
return false;
}
if (!Array.isArray(message.text_entities)) {
ckl.warn("Provider: Telegram", ' failed: message.text_entities is not an array');
return false;
}
for (const entity of message.text_entities) {
if (typeof entity.type !== 'string') {
ckl.warn("Provider: Telegram", ' failed: an entity.type is not a string');
return false;
}
if (typeof entity.text !== 'string') {
ckl.warn("Provider: Telegram", ' failed: an entity.text is not a string');
return false;
}
}
}
return true;
}
function getSenderAndReceiverInfo(chat_data) {
const expectedSenderName = chat_data.name;
let senderId = null;
let receiverId = null;
let receiverName = null;
for (const message of chat_data.messages) {
if (message.from === expectedSenderName || message.from === null && expectedSenderName === null) {
senderId = message.from_id;
}
if (message.from !== expectedSenderName && message.from !== null) {
receiverId = message.from_id;
receiverName = message.from;
}
}
return { sender: {
id: senderId,
name: expectedSenderName,
}, receiver: {
id: receiverId,
name: receiverName,
} };
}
function convertToConvoKitFormat(chat_data) {
let conversationId = chat_data.id.toString();
if (getConfig().anonymizeProviderConversationIds) {
conversationId = crypto.randomUUID().replace(/-/g, '').slice(0, 16);
}
const { sender, receiver } = getSenderAndReceiverInfo(chat_data);
const messages = chat_data.messages.map((message) => ({
timestamp: new Date(message.date),
message: message.text,
author: {
id: message.from_id,
name: message.from,
nickname: message.from || null,
}
}));
const metadata = {
conversationId: conversationId,
exportedAt: null,
messageCount: chat_data.messages.length,
messageSenderId: sender.id || null,
messageSenderName: sender.name || null,
messageReceiverId: receiver.id || null,
messageReceiverName: receiver.name || null,
providerId: 'telegram',
};
return {
metadata,
messages,
};
}
export const ProviderInfo = {
name: "Telegram",
description: "Telegram chat data exported using Telegram Desktop (JSON). Will read from the Telegram folder.",
version: "1.0.0",
author: "ConvoKit",
InputDataInfo: {
fileExtension: ".json",
directoryName: "Telegram",
}
};
export class Provider {
Data = null;
constructor(chat_data) {
this.Data = chat_data;
}
ProviderInfo = ProviderInfo;
Test() {
return checkIfCompatible(this.Data);
}
Convert() {
return convertToConvoKitFormat(this.Data);
}
}
// Self-register the provider
ProviderRegistry.register('telegram', Provider, ProviderInfo);
//# sourceMappingURL=telegram.js.map