UNPKG

whisper.rn

Version:

React Native binding of whisper.cpp

482 lines (468 loc) 15.7 kB
var _RNWhisper$getConstan; import { NativeEventEmitter, DeviceEventEmitter, Platform, Image } from 'react-native'; import RNWhisper from './NativeRNWhisper'; import AudioSessionIos from './AudioSessionIos'; import { version } from './version.json'; let EventEmitter; if (Platform.OS === 'ios') { // @ts-ignore EventEmitter = new NativeEventEmitter(RNWhisper); } if (Platform.OS === 'android') { EventEmitter = DeviceEventEmitter; } const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress'; const EVENT_ON_TRANSCRIBE_NEW_SEGMENTS = '@RNWhisper_onTranscribeNewSegments'; const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe'; const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'; const updateAudioSession = async setting => { await AudioSessionIos.setCategory(setting.category, setting.options || []); if (setting.mode) { await AudioSessionIos.setMode(setting.mode); } await AudioSessionIos.setActive(setting.active ?? true); }; export class WhisperContext { gpu = false; reasonNoGPU = ''; constructor(_ref) { let { contextId, gpu, reasonNoGPU } = _ref; this.id = contextId; this.gpu = gpu; this.reasonNoGPU = reasonNoGPU; } transcribeWithNativeMethod(method, data) { let options = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {}; const jobId = Math.floor(Math.random() * 10000); const { onProgress, onNewSegments, ...rest } = options; let progressListener; let lastProgress = 0; if (onProgress) { progressListener = EventEmitter.addListener(EVENT_ON_TRANSCRIBE_PROGRESS, evt => { const { contextId, progress } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; lastProgress = progress > 100 ? 100 : progress; onProgress(lastProgress); }); } const removeProgressListener = () => { if (progressListener) { progressListener.remove(); progressListener = null; } }; let newSegmentsListener; if (onNewSegments) { newSegmentsListener = EventEmitter.addListener(EVENT_ON_TRANSCRIBE_NEW_SEGMENTS, evt => { const { contextId, result } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; onNewSegments(result); }); } const removeNewSegmenetsListener = () => { if (newSegmentsListener) { newSegmentsListener.remove(); newSegmentsListener = null; } }; return { stop: async () => { await RNWhisper.abortTranscribe(this.id, jobId); removeProgressListener(); removeNewSegmenetsListener(); }, promise: RNWhisper[method](this.id, jobId, data, { ...rest, onProgress: !!onProgress, onNewSegments: !!onNewSegments }).then(result => { removeProgressListener(); removeNewSegmenetsListener(); if (!result.isAborted && lastProgress !== 100) { // Handle the case that the last progress event is not triggered onProgress === null || onProgress === void 0 ? void 0 : onProgress(100); } return result; }).catch(e => { removeProgressListener(); removeNewSegmenetsListener(); throw e; }) }; } /** * Transcribe audio file (path or base64 encoded wav file) * base64: need add `data:audio/wav;base64,` prefix */ transcribe(filePathOrBase64) { let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; let path = ''; if (typeof filePathOrBase64 === 'number') { try { const source = Image.resolveAssetSource(filePathOrBase64); if (source) path = source.uri; } catch (e) { throw new Error(`Invalid asset: ${filePathOrBase64}`); } } else { if (filePathOrBase64.startsWith('http')) throw new Error('Transcribe remote file is not supported, please download it first'); path = filePathOrBase64; } if (path.startsWith('file://')) path = path.slice(7); return this.transcribeWithNativeMethod('transcribeFile', path, options); } /** * Transcribe audio data (base64 encoded float32 PCM data) */ transcribeData(data) { let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; return this.transcribeWithNativeMethod('transcribeData', data, options); } /** Transcribe the microphone audio stream, the microphone user permission is required */ async transcribeRealtime() { let options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; let lastTranscribePayload; const slices = []; let sliceIndex = 0; let tOffset = 0; const putSlice = payload => { if (!payload.isUseSlices || !payload.data) return; if (sliceIndex !== payload.sliceIndex) { var _slices$sliceIndex, _segments; const { segments = [] } = ((_slices$sliceIndex = slices[sliceIndex]) === null || _slices$sliceIndex === void 0 ? void 0 : _slices$sliceIndex.data) || {}; tOffset = ((_segments = segments[segments.length - 1]) === null || _segments === void 0 ? void 0 : _segments.t1) || 0; } ; ({ sliceIndex } = payload); slices[sliceIndex] = { ...payload, data: { ...payload.data, segments: payload.data.segments.map(segment => ({ ...segment, t0: segment.t0 + tOffset, t1: segment.t1 + tOffset })) || [] } }; }; const mergeSlicesIfNeeded = payload => { if (!payload.isUseSlices) return payload; const mergedPayload = {}; slices.forEach(slice => { var _mergedPayload$data, _slice$data, _mergedPayload$data2, _slice$data2; mergedPayload.data = { result: (((_mergedPayload$data = mergedPayload.data) === null || _mergedPayload$data === void 0 ? void 0 : _mergedPayload$data.result) || '') + (((_slice$data = slice.data) === null || _slice$data === void 0 ? void 0 : _slice$data.result) || ''), segments: [...((mergedPayload === null || mergedPayload === void 0 ? void 0 : (_mergedPayload$data2 = mergedPayload.data) === null || _mergedPayload$data2 === void 0 ? void 0 : _mergedPayload$data2.segments) || []), ...(((_slice$data2 = slice.data) === null || _slice$data2 === void 0 ? void 0 : _slice$data2.segments) || [])] }; mergedPayload.processTime = slice.processTime; mergedPayload.recordingTime = ((mergedPayload === null || mergedPayload === void 0 ? void 0 : mergedPayload.recordingTime) || 0) + slice.recordingTime; }); return { ...payload, ...mergedPayload, slices }; }; let prevAudioSession; if (Platform.OS === 'ios' && options !== null && options !== void 0 && options.audioSessionOnStartIos) { // iOS: Remember current audio session state if ((options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos) === 'restore') { const categoryResult = await AudioSessionIos.getCurrentCategory(); const mode = await AudioSessionIos.getCurrentMode(); prevAudioSession = { ...categoryResult, mode, active: false // TODO: Need to check isOtherAudioPlaying to set active }; } // iOS: Update audio session state await updateAudioSession(options === null || options === void 0 ? void 0 : options.audioSessionOnStartIos); } if (Platform.OS === 'ios' && typeof (options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos) === 'object') { prevAudioSession = options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos; } const jobId = Math.floor(Math.random() * 10000); try { await RNWhisper.startRealtimeTranscribe(this.id, jobId, options); } catch (e) { if (prevAudioSession) await updateAudioSession(prevAudioSession); throw e; } return { stop: async () => { await RNWhisper.abortTranscribe(this.id, jobId); if (prevAudioSession) await updateAudioSession(prevAudioSession); }, subscribe: callback => { let transcribeListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE, evt => { const { contextId, payload } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; lastTranscribePayload = payload; putSlice(payload); callback({ contextId, jobId: evt.jobId, ...mergeSlicesIfNeeded(payload) }); }); let endListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE_END, evt => { const { contextId, payload } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; const lastPayload = { ...lastTranscribePayload, ...payload }; putSlice(lastPayload); callback({ contextId, jobId: evt.jobId, ...mergeSlicesIfNeeded(lastPayload), isCapturing: false }); if (transcribeListener) { transcribeListener.remove(); transcribeListener = null; } if (endListener) { endListener.remove(); endListener = null; } }); } }; } async bench(maxThreads) { const result = await RNWhisper.bench(this.id, maxThreads); const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] = JSON.parse(result); return { config, nThreads, encodeMs, decodeMs, batchMs, promptMs }; } async release() { return RNWhisper.releaseContext(this.id); } } const coreMLModelAssetPaths = ['analytics/coremldata.bin', 'weights/weight.bin', 'model.mil', 'coremldata.bin']; export async function initWhisper(_ref2) { let { filePath, coreMLModelAsset, isBundleAsset, useGpu = true, useCoreMLIos = true, useFlashAttn = false } = _ref2; let path = ''; let coreMLAssets; if (coreMLModelAsset) { const { filename, assets } = coreMLModelAsset; if (filename && assets) { coreMLAssets = assets === null || assets === void 0 ? void 0 : assets.map(asset => { if (typeof asset === 'number') { const { uri } = Image.resolveAssetSource(asset); const filepath = coreMLModelAssetPaths.find(p => uri.includes(p)); if (filepath) { return { uri, filepath: `${filename}/${filepath}` }; } } else if (typeof asset === 'string') { return { uri: asset, filepath: `${filename}/${asset}` }; } return undefined; }).filter(asset => asset !== undefined); } } if (typeof filePath === 'number') { try { const source = Image.resolveAssetSource(filePath); if (source) { path = source.uri; } } catch (e) { throw new Error(`Invalid asset: ${filePath}`); } } else { if (!isBundleAsset && filePath.startsWith('http')) throw new Error('Transcribe remote file is not supported, please download it first'); path = filePath; } if (path.startsWith('file://')) path = path.slice(7); const { contextId, gpu, reasonNoGPU } = await RNWhisper.initContext({ filePath: path, isBundleAsset: !!isBundleAsset, useFlashAttn, useGpu, useCoreMLIos, // Only development mode need download Core ML model assets (from packager server) downloadCoreMLAssets: __DEV__ && !!coreMLAssets, coreMLAssets }); return new WhisperContext({ contextId, gpu, reasonNoGPU }); } export async function releaseAllWhisper() { return RNWhisper.releaseAllContexts(); } /** Current version of whisper.cpp */ export const libVersion = version; const { useCoreML, coreMLAllowFallback } = ((_RNWhisper$getConstan = RNWhisper.getConstants) === null || _RNWhisper$getConstan === void 0 ? void 0 : _RNWhisper$getConstan.call(RNWhisper)) || {}; /** Is use CoreML models on iOS */ export const isUseCoreML = !!useCoreML; /** Is allow fallback to CPU if load CoreML model failed */ export const isCoreMLAllowFallback = !!coreMLAllowFallback; export { AudioSessionIos }; // // VAD (Voice Activity Detection) Context // export class WhisperVadContext { gpu = false; reasonNoGPU = ''; constructor(_ref3) { let { contextId, gpu, reasonNoGPU } = _ref3; this.id = contextId; this.gpu = gpu; this.reasonNoGPU = reasonNoGPU; } /** * Detect speech segments in audio file (path or base64 encoded wav file) * base64: need add `data:audio/wav;base64,` prefix */ async detectSpeech(filePathOrBase64) { let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; let path = ''; if (typeof filePathOrBase64 === 'number') { try { const source = Image.resolveAssetSource(filePathOrBase64); if (source) path = source.uri; } catch (e) { throw new Error(`Invalid asset: ${filePathOrBase64}`); } } else { if (filePathOrBase64.startsWith('http')) throw new Error('VAD remote file is not supported, please download it first'); path = filePathOrBase64; } if (path.startsWith('file://')) path = path.slice(7); // Check if this is base64 encoded audio data if (path.startsWith('data:audio/')) { // This is base64 encoded audio data, use the raw data method return RNWhisper.vadDetectSpeech(this.id, path, options); } else { // This is a file path, use the file method return RNWhisper.vadDetectSpeechFile(this.id, path, options); } } /** * Detect speech segments in raw audio data (base64 encoded float32 PCM data) */ async detectSpeechData(audioData) { let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; return RNWhisper.vadDetectSpeech(this.id, audioData, options); } async release() { return RNWhisper.releaseVadContext(this.id); } } /** * Initialize a VAD context for voice activity detection * @param options VAD context options * @returns Promise resolving to WhisperVadContext instance */ export async function initWhisperVad(_ref4) { let { filePath, isBundleAsset, useGpu = true, nThreads } = _ref4; let path = ''; if (typeof filePath === 'number') { try { const source = Image.resolveAssetSource(filePath); if (source) { path = source.uri; } } catch (e) { throw new Error(`Invalid asset: ${filePath}`); } } else { if (!isBundleAsset && filePath.startsWith('http')) throw new Error('VAD remote file is not supported, please download it first'); path = filePath; } if (path.startsWith('file://')) path = path.slice(7); const { contextId, gpu, reasonNoGPU } = await RNWhisper.initVadContext({ filePath: path, isBundleAsset: !!isBundleAsset, useGpu, nThreads }); return new WhisperVadContext({ contextId, gpu, reasonNoGPU }); } /** * Release all VAD contexts and free their memory * @returns Promise resolving when all contexts are released */ export async function releaseAllWhisperVad() { return RNWhisper.releaseAllVadContexts(); } //# sourceMappingURL=index.js.map