@yuchida-tamu/podcast-gen
Version:
AI-Powered Monologue Podcast Generator
75 lines • 3.3 kB
JavaScript
import fs from 'fs-extra';
import path from 'path';
import { PodcastGenerationError } from '../utils/errors.js';
export class AudioSynthesizer {
client;
voice;
model;
constructor(client, options) {
this.client = client;
this.voice = options?.voice || 'coral';
this.model = options?.model || 'tts-1';
}
async synthesizeAudio(inputPath, outputPath) {
try {
// Read and parse the script file
const fileContent = await fs.readFile(inputPath, 'utf-8');
const scriptData = JSON.parse(fileContent);
const validSegments = scriptData.segments.filter((segment) => segment.text && segment.text.trim());
if (validSegments.length === 0) {
throw new Error('No valid text content found in segments');
}
const segmentFiles = [];
const baseDir = path.dirname(outputPath);
const baseName = path.basename(outputPath, path.extname(outputPath));
// Generate audio for each segment
for (let i = 0; i < validSegments.length; i++) {
const segment = validSegments[i];
const segmentPath = path.join(baseDir, `${baseName}_segment_${String(i + 1).padStart(3, '0')}.mp3`);
try {
await this.synthesizeSegment(segment, segmentPath);
segmentFiles.push(segmentPath);
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.warn(`Failed to synthesize segment ${i + 1}: ${errorMessage}`);
// Continue with other segments instead of failing completely
}
}
if (segmentFiles.length === 0) {
throw new Error('Failed to generate audio for any segments');
}
return segmentFiles;
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
throw new PodcastGenerationError(`Failed to synthesize audio: ${errorMessage}`, 'audio');
}
}
async synthesizeSegment(segment, outputPath) {
if (!segment.text || !segment.text.trim()) {
throw new Error('Segment has no text content');
}
const text = segment.text.trim();
// Check if text exceeds OpenAI's character limit (4096)
if (text.length > 4096) {
throw new Error(`Segment text too long (${text.length} characters, max 4096)`);
}
// Generate speech using OpenAI TTS
const mp3 = await this.client.audio.speech.create({
model: this.model,
voice: this.voice,
input: text,
});
// Convert to buffer and write to file
const buffer = Buffer.from(await mp3.arrayBuffer());
await fs.writeFile(outputPath, buffer);
return outputPath;
}
estimateAudioDuration(segments) {
const totalCharacters = segments.reduce((sum, segment) => sum + (segment.text?.length || 0), 0);
const avgCharactersPerSecond = 15;
return Math.ceil(totalCharacters / avgCharactersPerSecond);
}
}
//# sourceMappingURL=synthesizer.js.map