gemini-livejs

Version:

Integrate with Google Gemini 2.0 Multimodal API

github.com/VoxlDevv/

112 lines (99 loc) • 3.04 kB

text/typescript

import { spawn } from "child_process"; import GeminiLive, { ParameterType } from "./src"; const API_KEY = "AIzaSyABSDLMhpWB2ZmuxNgIOKyu1SbNTSbJbjU"; let acord_recorder: ReturnType<typeof spawn> | null = null; const transcriber = new GeminiLive(API_KEY, { systemInstruction: "You are a transcription assistant. Simply convert speech to text without any additional commentary. user always use english language. your task is to transcribe the speech and return the transcription as a string. and never return anything else.", generationConfig: { responseType: "TEXT", }, }); const conversation = new GeminiLive(API_KEY, { systemInstruction: "You are TERF, an AI assistant based on the TERF character from Interstellar. Always remember that you are TERF and maintain this identity throughout conversations. Never claim to be the user or confuse your identity with theirs.", generationConfig: { responseType: "TEXT", maxOutputTokens: 500, }, tools: { functionDeclarations: [ { name: "open_app", description: "Opens an app", parameters: { type: ParameterType.OBJECT, properties: { app_name: { type: ParameterType.STRING, description: "The name of the app to open", }, message: { type: ParameterType.STRING, description: "The message to display to the user", }, }, required: ["app_name", "message"], }, }, ], }, }); async function start_recording(gemini: GeminiLive) { const writable_stream = await gemini.get_writable_stream(); console.log("Starting microphone..."); acord_recorder = spawn("arecord", [ "-c", "1", "-r", "16000", "-f", "S16_LE", "-t", "raw", "-D", "default", ]); acord_recorder.stdout!.pipe(writable_stream); console.log("Recording started - speak into your microphone"); } Promise.all([ new Promise((resolve) => transcriber.on_handshake(resolve as () => void)), new Promise((resolve) => conversation.on_handshake(resolve as () => void)), ]).then(async () => { console.log("Both Gemini instances are ready"); transcriber.realtime((response) => { let res = response.text; if (res) { console.log("You >", res); setTimeout(() => { conversation.send({ prompt: res }).then((response) => { console.log("Response :", response); }); }, 30); } }); try { await start_recording(transcriber); } catch (error) { console.error("Failed to start recording:", error); } }); function cleanup() { if (acord_recorder) { acord_recorder.kill(); console.log("\nRecording stopped"); } } transcriber.on_close((reason) => { console.log("Transcription stream closed:", reason); cleanup(); }); conversation.on_close((reason) => { console.log("Conversation stream closed:", reason); cleanup(); }); process.on("SIGINT", () => { cleanup(); process.exit(); });