UNPKG

@yuchida-tamu/podcast-gen

Version:

AI-Powered Monologue Podcast Generator

75 lines 3.3 kB
import fs from 'fs-extra'; import path from 'path'; import { PodcastGenerationError } from '../utils/errors.js'; export class AudioSynthesizer { client; voice; model; constructor(client, options) { this.client = client; this.voice = options?.voice || 'coral'; this.model = options?.model || 'tts-1'; } async synthesizeAudio(inputPath, outputPath) { try { // Read and parse the script file const fileContent = await fs.readFile(inputPath, 'utf-8'); const scriptData = JSON.parse(fileContent); const validSegments = scriptData.segments.filter((segment) => segment.text && segment.text.trim()); if (validSegments.length === 0) { throw new Error('No valid text content found in segments'); } const segmentFiles = []; const baseDir = path.dirname(outputPath); const baseName = path.basename(outputPath, path.extname(outputPath)); // Generate audio for each segment for (let i = 0; i < validSegments.length; i++) { const segment = validSegments[i]; const segmentPath = path.join(baseDir, `${baseName}_segment_${String(i + 1).padStart(3, '0')}.mp3`); try { await this.synthesizeSegment(segment, segmentPath); segmentFiles.push(segmentPath); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.warn(`Failed to synthesize segment ${i + 1}: ${errorMessage}`); // Continue with other segments instead of failing completely } } if (segmentFiles.length === 0) { throw new Error('Failed to generate audio for any segments'); } return segmentFiles; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; throw new PodcastGenerationError(`Failed to synthesize audio: ${errorMessage}`, 'audio'); } } async synthesizeSegment(segment, outputPath) { if (!segment.text || !segment.text.trim()) { throw new Error('Segment has no text content'); } const text = segment.text.trim(); // Check if text exceeds OpenAI's character limit (4096) if (text.length > 4096) { throw new Error(`Segment text too long (${text.length} characters, max 4096)`); } // Generate speech using OpenAI TTS const mp3 = await this.client.audio.speech.create({ model: this.model, voice: this.voice, input: text, }); // Convert to buffer and write to file const buffer = Buffer.from(await mp3.arrayBuffer()); await fs.writeFile(outputPath, buffer); return outputPath; } estimateAudioDuration(segments) { const totalCharacters = segments.reduce((sum, segment) => sum + (segment.text?.length || 0), 0); const avgCharactersPerSecond = 15; return Math.ceil(totalCharacters / avgCharactersPerSecond); } } //# sourceMappingURL=synthesizer.js.map