whatsapp-claude-gpt
Version:
WhatsApp-Claude-GPT is a WhatsApp chatbot that supports multiple AI providers for chat, optional image generation/editing, and voice (speech-to-text and text-to-speech). It’s built for natural, contextual conversations and can now also handle reminders an
288 lines (242 loc) • 11.1 kB
text/typescript
import { Chat, Client, Message, MessageMedia, MessageTypes } from "whatsapp-web.js";
import { AIContent, AiMessage, AIRole } from "../interfaces/ai-interfaces";
import { bufferToStream, getAuthorId, getFormattedDate, getUnsupportedMessage, getUserName, sanitizeForLog } from "../utils";
import logger from "../logger";
import NodeCache from "node-cache";
import OpenAISvc from "../services/openai-service";
import { CONFIG } from "../config";
import { Array } from "openai/internal/builtin-types";
import { ChatConfiguration, chatConfigurationManager } from "../config/chat-configurations";
class WspWeb {
private msgMediaCache: NodeCache = new NodeCache({
stdTTL: CONFIG.BotConfig.mediaCacheTtl || CONFIG.BotConfig.nodeCacheTime,
checkperiod: 600,
useClones: false,
});
private transcribedMessagesCache: NodeCache = new NodeCache({
stdTTL: CONFIG.BotConfig.transcriptionCacheTtl || CONFIG.BotConfig.nodeCacheTime,
checkperiod: 600,
useClones: false,
});
private wspClient: Client;
private lastProcessed = new Map<string, string>();
private readonly MAX_LAST_PROCESSED = 500;
constructor() {
}
public async generateMessageArray(wspMessage: Message, chatData: Chat, chatCfg: ChatConfiguration, chatCached: boolean): Promise<AiMessage[]> {
// Early return for cached direct chats — avoid expensive fetchMessages call.
// Safe for -reset: deleteChatCache clears the cache, so chatCached will be false.
if (chatCached && !chatData.isGroup) {
const aiMessage = await this.convertWspMsgToAiMsg(wspMessage, chatCfg.botName);
this.setLastProcessed(chatData.id._serialized, wspMessage.id._serialized);
return [aiMessage];
}
const messageList: AiMessage[] = [];
const lastChatMsgProcessed = this.lastProcessed.get(chatData.id._serialized);
const fetchedMessages = await chatData.fetchMessages({limit: chatCfg.maxMsgsLimit});
const resetIndex = fetchedMessages.map(msg => msg.body).lastIndexOf("-reset");
const messagesToProcess = resetIndex >= 0 ? fetchedMessages.slice(resetIndex + 1) : fetchedMessages;
const wspMessageIndex = messagesToProcess.findIndex(msg => msg.id._serialized === wspMessage.id._serialized);
const startIndex = wspMessageIndex !== -1 ? wspMessageIndex : 0;
const messagesToProcessFiltered = messagesToProcess.slice(0, startIndex + 1);
for (const msg of messagesToProcessFiltered.reverse()) {
try {
const actualDate = new Date();
const msgDate = new Date(msg.timestamp * 1000);
if ((actualDate.getTime() - msgDate.getTime()) / (1000 * 60 * 60) > chatCfg.maxHoursLimit) break;
if (lastChatMsgProcessed == msg.id._serialized ) break;
if (chatCached && msg.fromMe) break;
if (wspMessage.timestamp < msg.timestamp) continue;
const aiMessage = await this.convertWspMsgToAiMsg(msg, chatCfg.botName);
messageList.push(aiMessage);
} catch (e: any) {
logger.error(`Error reading message - msg.type:${msg.type}; msg.body:${sanitizeForLog(msg.body)}. Error:${sanitizeForLog(e.message)}`);
}
}
this.setLastProcessed(chatData.id._serialized, wspMessage.id._serialized);
return messageList.reverse() || [];
}
private setLastProcessed(chatId: string, messageId: string) {
if (this.lastProcessed.size >= this.MAX_LAST_PROCESSED) {
const firstKey = this.lastProcessed.keys().next().value;
this.lastProcessed.delete(firstKey);
}
this.lastProcessed.set(chatId, messageId);
}
async extractMedia(wspMsg: Message): Promise<{errorMedia: string | null, mediaData: MessageMedia | null}> {
let mediaData: MessageMedia | null | undefined = this.msgMediaCache.get<MessageMedia>(wspMsg.id._serialized);
if (!mediaData) {
try {
mediaData = await wspMsg.downloadMedia();
} catch (e: any) {
logger.warn(`[extractMedia] Download failed for msg ${wspMsg.id._serialized}: ${e.message}`);
return { errorMedia: 'download', mediaData: null };
}
}
const isImage = wspMsg.type === MessageTypes.IMAGE || wspMsg.type === MessageTypes.STICKER
if (mediaData) {
if (!mediaData.data) {
logger.warn(`[extractMedia] Media data is empty for msg ${wspMsg.id._serialized}`);
return { errorMedia: 'download', mediaData: null };
}
if(!mediaData.mimetype.startsWith('image') && mediaData.mimetype != 'application/pdf')
return { errorMedia: 'type', mediaData: null};
const sizeInMB = Buffer.from(mediaData.data, 'base64').length / (1024 * 1024);
const maxMB = isImage
? CONFIG.BotConfig.maxImageSizeMB
: CONFIG.BotConfig.maxDocumentSizeMB;
if (sizeInMB > maxMB) {
logger.warn(`Rejected file: ${sizeInMB.toFixed(2)}MB (limit: ${maxMB}MB)`);
return { errorMedia: 'size', mediaData: null};
}
this.msgMediaCache.set(wspMsg.id._serialized, mediaData, CONFIG.BotConfig.nodeCacheTime);
return { errorMedia:null, mediaData };
}
return { errorMedia:null, mediaData: null };
}
public async convertWspMsgToAiMsg(wspMsg: Message, inputBotName?: string): Promise<AiMessage> {
let mediaData: MessageMedia = this.msgMediaCache.get<any>(wspMsg.id._serialized);
let errorMedia = null;
const chat = await wspMsg.getChat();
const msgDate = new Date(wspMsg.timestamp * 1000);
const author_id = getAuthorId(wspMsg);
const botName = inputBotName ?? (await chatConfigurationManager.getChatConfig(chat.id._serialized, chat.name)).botName;
const isImage = wspMsg.type === MessageTypes.IMAGE || wspMsg.type === MessageTypes.STICKER;
const isSticker = wspMsg.type === MessageTypes.STICKER;
const isAudio = wspMsg.type === MessageTypes.VOICE || wspMsg.type === MessageTypes.AUDIO;
const isDocument = wspMsg.type === MessageTypes.DOCUMENT;
if (!mediaData && (isImage || isDocument)) {
({mediaData, errorMedia} = await this.extractMedia(wspMsg));
}
const isOther = (!isImage && !isAudio && wspMsg.type != 'chat') || errorMedia == 'type';
const role = (!wspMsg.fromMe || isImage) ? AIRole.USER : AIRole.ASSISTANT;
const name = wspMsg.fromMe ? botName : (await getUserName(wspMsg));
const content: Array<AIContent> = [];
if (isImage) {
if (mediaData) {
content.push({
type: 'image',
value: mediaData.data,
mimetype: mediaData.mimetype,
msg_id: wspMsg.id._serialized,
filename: isSticker? 'sticker':'image',
author_id,
dateString: getFormattedDate(msgDate)
});
} else {
content.push({
type: 'text',
msg_id: wspMsg.id._serialized,
value: '<Unprocessed image>',
author_id,
dateString: getFormattedDate(msgDate)
});
}
}
if (isAudio) {
content.push({
type: 'ASR',
msg_id: wspMsg.id._serialized,
value: await this.transcribeVoice(wspMsg),
author_id,
dateString: getFormattedDate(msgDate)
});
}
if (isDocument) {
if (mediaData) {
content.push({
type: 'file',
msg_id: wspMsg.id._serialized,
mimetype: mediaData.mimetype,
filename: mediaData.filename,
value: mediaData.data,
author_id,
dateString: getFormattedDate(msgDate)
});
} else {
content.push({
type: 'text',
msg_id: wspMsg.id._serialized,
value: 'SYSTEM:⚠️ The document could not be processed.',
author_id,
dateString: getFormattedDate(msgDate)
});
}
}
if (errorMedia || (isOther && !mediaData)){
let errorMessage = getUnsupportedMessage(wspMsg.type, wspMsg.body);
if(errorMedia == 'size') errorMessage = `SYSTEM:⚠️ The file could not be processed because it exceeds the maximum allowed size (${isImage?CONFIG.BotConfig.maxImageSizeMB:CONFIG.BotConfig.maxDocumentSizeMB}MB).`
if(errorMedia == 'download') errorMessage = 'SYSTEM:⚠️ The file could not be downloaded and was not processed.'
content.push({
type: 'text',
msg_id: wspMsg.id._serialized,
value: errorMessage,
author_id,
dateString: getFormattedDate(new Date(wspMsg.timestamp * 1000))
});
}
if (wspMsg.body && !isOther) {
content.push({
type: 'text',
msg_id: wspMsg.id._serialized,
value: wspMsg.body,
author_id,
dateString: getFormattedDate(msgDate)
});
}
return {role: role, name: name, content: content};
}
/**
* Transcribes a voice message to text, using cache when possible.
*
* - Checks NodeCache for existing transcription.
* - Converts base64 media to a stream.
* - Sends to the configured transcription service (e.g. Whisper).
* - Caches the resulting text for future reuse.
*
* @returns Promise<string> the transcribed text or error placeholder.
* @param wspMsg
*/
private async transcribeVoice(wspMsg: Message): Promise<string> {
try {
let transcribedText = this.transcribedMessagesCache.get<string>(wspMsg.id._serialized);
if(transcribedText) return transcribedText;
let media;
try {
media = await wspMsg.downloadMedia();
} catch (e: any) {
logger.warn(`[transcribeVoice] Download failed for msg ${wspMsg.id._serialized}: ${e.message}`);
return '<Error: voice message download failed>';
}
if (!media || !media.data) {
logger.warn(`[transcribeVoice] Empty media for msg ${wspMsg.id._serialized}`);
return '<Error: voice message media is empty>';
}
const audioBuffer = Buffer.from(media.data, 'base64');
const audioStream = bufferToStream(audioBuffer);
logger.debug(`[OpenAI->transcribeVoice] Starting audio transcription`);
transcribedText = await OpenAISvc.transcription(audioStream);
logger.debug(`[OpenAI->transcribeVoice] Transcribed text: ${sanitizeForLog(transcribedText)}`);
this.transcribedMessagesCache.set<string>(wspMsg.id._serialized,transcribedText, CONFIG.BotConfig.nodeCacheTime);
return transcribedText;
} catch (error: any) {
logger.error(`Error transcribing voice message: ${JSON.stringify(sanitizeForLog(error))}`);
return '<Error transcribing voice message>';
}
}
public returnResponse(message: Message, responseMsg: string, isGroup: boolean) {
if (isGroup) return message.reply(responseMsg, null, {linkPreview: false});
else return this.wspClient.sendMessage(message.from, responseMsg, {linkPreview: false});
}
public setWspClient(client: Client) {
this.wspClient = client;
}
public getWspClient(): Client {
if (!this.wspClient) {
throw new Error('WhatsApp client not available yet. Ensure setWspClient() is called before getWspClient().');
}
return this.wspClient;
}
}
const WhatsappHandler = new WspWeb();
export default WhatsappHandler;