@yuchida-tamu/podcast-gen
Version:
AI-Powered Monologue Podcast Generator
116 lines • 4.84 kB
JavaScript
import { PodcastGenerationError } from '../utils/errors.js';
import { createMonologuePrompt, NARRATOR_PROMPT } from './prompts.js';
export class MonologueEngine {
llmClient;
narrator;
previousContent;
constructor(llmClient) {
this.llmClient = llmClient;
this.narrator = {
name: 'Narrator',
personality: 'Engaging podcast host with balanced perspective',
voice: 'conversational',
systemPrompt: NARRATOR_PROMPT,
};
this.previousContent = [];
}
async generateMonologue(topic, duration = 5) {
try {
this.previousContent = [];
const segments = [];
let currentTime = 0;
// Calculate target segments based on duration
const targetSegments = this.calculateTargetSegments(duration);
// Phase 1: Introduction (15% of content)
const introSegments = Math.max(1, Math.floor(targetSegments * 0.15));
for (let i = 0; i < introSegments; i++) {
const content = await this.generateContent(topic, 'introduction');
const segment = this.createSegment(content, currentTime);
segments.push(segment);
currentTime += segment.duration;
this.previousContent.push(segment);
}
// Phase 2: Exploration (70% of content)
const explorationSegments = Math.floor(targetSegments * 0.7);
for (let i = 0; i < explorationSegments; i++) {
const content = await this.generateContent(topic, 'exploration');
const segment = this.createSegment(content, currentTime);
segments.push(segment);
currentTime += segment.duration;
this.previousContent.push(segment);
}
// Phase 3: Conclusion (15% of content)
const conclusionSegments = Math.max(1, Math.floor(targetSegments * 0.15));
for (let i = 0; i < conclusionSegments; i++) {
const content = await this.generateContent(topic, 'conclusion');
const segment = this.createSegment(content, currentTime);
segments.push(segment);
currentTime += segment.duration;
this.previousContent.push(segment);
}
return segments;
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
throw new PodcastGenerationError(`Failed to generate monologue: ${errorMessage}`, 'monologue');
}
}
async generateContent(topic, phase) {
const systemPrompt = this.narrator.systemPrompt.personality +
'\n\n' +
this.narrator.systemPrompt.formatInstructions;
const userPrompt = createMonologuePrompt(topic, phase, this.previousContent);
const request = {
systemPrompt,
userPrompt,
};
const response = await this.llmClient.generateContent(request);
return response.content;
}
createSegment(content, startTime) {
const emotion = this.extractEmotion(content);
const cleanText = this.cleanText(content);
const duration = this.estimateDuration(cleanText);
return {
timestamp: this.formatTime(startTime),
text: cleanText,
emotion: emotion,
duration: duration,
};
}
extractEmotion(text) {
const emotionMatch = text.match(/\[([^\]]+)\]/);
return emotionMatch ? emotionMatch[1] : 'neutral';
}
cleanText(text) {
// Remove emotion indicators from the main text
return text.replace(/\[([^\]]+)\]/g, '').trim();
}
/**
*
* @param duration total duration of an audio episode (in minute)
* @returns
*/
calculateTargetSegments(duration) {
// Aim for segments of about 20-30 seconds each
return Math.floor((duration * 60) / 30);
}
estimateDuration(text) {
// Estimate speaking time: ~150 words per minute, ~5 characters per word
const wordsPerMinute = 150;
const charactersPerWord = 5;
const charactersPerSecond = (wordsPerMinute * charactersPerWord) / 60;
const baseDuration = text.length / charactersPerSecond;
// Add some natural variation (±20%)
const variation = (Math.random() - 0.5) * 0.4;
return Math.max(5, Math.floor(baseDuration * (1 + variation)));
}
formatTime(seconds) {
const minutes = Math.floor(seconds / 60);
const remainingSeconds = seconds % 60;
return `${minutes.toString().padStart(2, '0')}:${remainingSeconds
.toString()
.padStart(2, '0')}`;
}
}
//# sourceMappingURL=engine.js.map