UNPKG

whisper.rn

Version:

React Native binding of whisper.cpp

389 lines (381 loc) 13.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); Object.defineProperty(exports, "AudioSessionIos", { enumerable: true, get: function () { return _AudioSessionIos.default; } }); exports.WhisperContext = void 0; exports.initWhisper = initWhisper; exports.libVersion = exports.isUseCoreML = exports.isCoreMLAllowFallback = void 0; exports.releaseAllWhisper = releaseAllWhisper; var _reactNative = require("react-native"); var _NativeRNWhisper = _interopRequireDefault(require("./NativeRNWhisper")); var _AudioSessionIos = _interopRequireDefault(require("./AudioSessionIos")); var _version = require("./version.json"); var _RNWhisper$getConstan; function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } let EventEmitter; if (_reactNative.Platform.OS === 'ios') { // @ts-ignore EventEmitter = new _reactNative.NativeEventEmitter(_NativeRNWhisper.default); } if (_reactNative.Platform.OS === 'android') { EventEmitter = _reactNative.DeviceEventEmitter; } const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress'; const EVENT_ON_TRANSCRIBE_NEW_SEGMENTS = '@RNWhisper_onTranscribeNewSegments'; const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe'; const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'; const updateAudioSession = async setting => { await _AudioSessionIos.default.setCategory(setting.category, setting.options || []); if (setting.mode) { await _AudioSessionIos.default.setMode(setting.mode); } await _AudioSessionIos.default.setActive(setting.active ?? true); }; class WhisperContext { gpu = false; reasonNoGPU = ''; constructor(_ref) { let { contextId, gpu, reasonNoGPU } = _ref; this.id = contextId; this.gpu = gpu; this.reasonNoGPU = reasonNoGPU; } transcribeWithNativeMethod(method, data) { let options = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {}; const jobId = Math.floor(Math.random() * 10000); const { onProgress, onNewSegments, ...rest } = options; let progressListener; let lastProgress = 0; if (onProgress) { progressListener = EventEmitter.addListener(EVENT_ON_TRANSCRIBE_PROGRESS, evt => { const { contextId, progress } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; lastProgress = progress > 100 ? 100 : progress; onProgress(lastProgress); }); } const removeProgressListener = () => { if (progressListener) { progressListener.remove(); progressListener = null; } }; let newSegmentsListener; if (onNewSegments) { newSegmentsListener = EventEmitter.addListener(EVENT_ON_TRANSCRIBE_NEW_SEGMENTS, evt => { const { contextId, result } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; onNewSegments(result); }); } const removeNewSegmenetsListener = () => { if (newSegmentsListener) { newSegmentsListener.remove(); newSegmentsListener = null; } }; return { stop: async () => { await _NativeRNWhisper.default.abortTranscribe(this.id, jobId); removeProgressListener(); removeNewSegmenetsListener(); }, promise: _NativeRNWhisper.default[method](this.id, jobId, data, { ...rest, onProgress: !!onProgress, onNewSegments: !!onNewSegments }).then(result => { removeProgressListener(); removeNewSegmenetsListener(); if (!result.isAborted && lastProgress !== 100) { // Handle the case that the last progress event is not triggered onProgress === null || onProgress === void 0 ? void 0 : onProgress(100); } return result; }).catch(e => { removeProgressListener(); removeNewSegmenetsListener(); throw e; }) }; } /** * Transcribe audio file (path or base64 encoded wav file) * base64: need add `data:audio/wav;base64,` prefix */ transcribe(filePathOrBase64) { let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; let path = ''; if (typeof filePathOrBase64 === 'number') { try { const source = _reactNative.Image.resolveAssetSource(filePathOrBase64); if (source) path = source.uri; } catch (e) { throw new Error(`Invalid asset: ${filePathOrBase64}`); } } else { if (filePathOrBase64.startsWith('http')) throw new Error('Transcribe remote file is not supported, please download it first'); path = filePathOrBase64; } if (path.startsWith('file://')) path = path.slice(7); return this.transcribeWithNativeMethod('transcribeFile', path, options); } /** * Transcribe audio data (base64 encoded float32 PCM data) */ transcribeData(data) { let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; return this.transcribeWithNativeMethod('transcribeData', data, options); } /** Transcribe the microphone audio stream, the microphone user permission is required */ async transcribeRealtime() { let options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; let lastTranscribePayload; const slices = []; let sliceIndex = 0; let tOffset = 0; const putSlice = payload => { if (!payload.isUseSlices || !payload.data) return; if (sliceIndex !== payload.sliceIndex) { var _slices$sliceIndex, _segments; const { segments = [] } = ((_slices$sliceIndex = slices[sliceIndex]) === null || _slices$sliceIndex === void 0 ? void 0 : _slices$sliceIndex.data) || {}; tOffset = ((_segments = segments[segments.length - 1]) === null || _segments === void 0 ? void 0 : _segments.t1) || 0; } ; ({ sliceIndex } = payload); slices[sliceIndex] = { ...payload, data: { ...payload.data, segments: payload.data.segments.map(segment => ({ ...segment, t0: segment.t0 + tOffset, t1: segment.t1 + tOffset })) || [] } }; }; const mergeSlicesIfNeeded = payload => { if (!payload.isUseSlices) return payload; const mergedPayload = {}; slices.forEach(slice => { var _mergedPayload$data, _slice$data, _mergedPayload$data2, _slice$data2; mergedPayload.data = { result: (((_mergedPayload$data = mergedPayload.data) === null || _mergedPayload$data === void 0 ? void 0 : _mergedPayload$data.result) || '') + (((_slice$data = slice.data) === null || _slice$data === void 0 ? void 0 : _slice$data.result) || ''), segments: [...((mergedPayload === null || mergedPayload === void 0 ? void 0 : (_mergedPayload$data2 = mergedPayload.data) === null || _mergedPayload$data2 === void 0 ? void 0 : _mergedPayload$data2.segments) || []), ...(((_slice$data2 = slice.data) === null || _slice$data2 === void 0 ? void 0 : _slice$data2.segments) || [])] }; mergedPayload.processTime = slice.processTime; mergedPayload.recordingTime = ((mergedPayload === null || mergedPayload === void 0 ? void 0 : mergedPayload.recordingTime) || 0) + slice.recordingTime; }); return { ...payload, ...mergedPayload, slices }; }; let prevAudioSession; if (_reactNative.Platform.OS === 'ios' && options !== null && options !== void 0 && options.audioSessionOnStartIos) { // iOS: Remember current audio session state if ((options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos) === 'restore') { const categoryResult = await _AudioSessionIos.default.getCurrentCategory(); const mode = await _AudioSessionIos.default.getCurrentMode(); prevAudioSession = { ...categoryResult, mode, active: false // TODO: Need to check isOtherAudioPlaying to set active }; } // iOS: Update audio session state await updateAudioSession(options === null || options === void 0 ? void 0 : options.audioSessionOnStartIos); } if (_reactNative.Platform.OS === 'ios' && typeof (options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos) === 'object') { prevAudioSession = options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos; } const jobId = Math.floor(Math.random() * 10000); try { await _NativeRNWhisper.default.startRealtimeTranscribe(this.id, jobId, options); } catch (e) { if (prevAudioSession) await updateAudioSession(prevAudioSession); throw e; } return { stop: async () => { await _NativeRNWhisper.default.abortTranscribe(this.id, jobId); if (prevAudioSession) await updateAudioSession(prevAudioSession); }, subscribe: callback => { let transcribeListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE, evt => { const { contextId, payload } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; lastTranscribePayload = payload; putSlice(payload); callback({ contextId, jobId: evt.jobId, ...mergeSlicesIfNeeded(payload) }); }); let endListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE_END, evt => { const { contextId, payload } = evt; if (contextId !== this.id || evt.jobId !== jobId) return; const lastPayload = { ...lastTranscribePayload, ...payload }; putSlice(lastPayload); callback({ contextId, jobId: evt.jobId, ...mergeSlicesIfNeeded(lastPayload), isCapturing: false }); if (transcribeListener) { transcribeListener.remove(); transcribeListener = null; } if (endListener) { endListener.remove(); endListener = null; } }); } }; } async bench(maxThreads) { const result = await _NativeRNWhisper.default.bench(this.id, maxThreads); const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] = JSON.parse(result); return { config, nThreads, encodeMs, decodeMs, batchMs, promptMs }; } async release() { return _NativeRNWhisper.default.releaseContext(this.id); } } exports.WhisperContext = WhisperContext; const coreMLModelAssetPaths = ['analytics/coremldata.bin', 'weights/weight.bin', 'model.mil', 'coremldata.bin']; async function initWhisper(_ref2) { let { filePath, coreMLModelAsset, isBundleAsset, useGpu = true, useCoreMLIos = true, useFlashAttn = false } = _ref2; let path = ''; let coreMLAssets; if (coreMLModelAsset) { const { filename, assets } = coreMLModelAsset; if (filename && assets) { coreMLAssets = assets === null || assets === void 0 ? void 0 : assets.map(asset => { if (typeof asset === 'number') { const { uri } = _reactNative.Image.resolveAssetSource(asset); const filepath = coreMLModelAssetPaths.find(p => uri.includes(p)); if (filepath) { return { uri, filepath: `${filename}/${filepath}` }; } } else if (typeof asset === 'string') { return { uri: asset, filepath: `${filename}/${asset}` }; } return undefined; }).filter(asset => asset !== undefined); } } if (typeof filePath === 'number') { try { const source = _reactNative.Image.resolveAssetSource(filePath); if (source) { path = source.uri; } } catch (e) { throw new Error(`Invalid asset: ${filePath}`); } } else { if (!isBundleAsset && filePath.startsWith('http')) throw new Error('Transcribe remote file is not supported, please download it first'); path = filePath; } if (path.startsWith('file://')) path = path.slice(7); const { contextId, gpu, reasonNoGPU } = await _NativeRNWhisper.default.initContext({ filePath: path, isBundleAsset: !!isBundleAsset, useFlashAttn, useGpu, useCoreMLIos, // Only development mode need download Core ML model assets (from packager server) downloadCoreMLAssets: __DEV__ && !!coreMLAssets, coreMLAssets }); return new WhisperContext({ contextId, gpu, reasonNoGPU }); } async function releaseAllWhisper() { return _NativeRNWhisper.default.releaseAllContexts(); } /** Current version of whisper.cpp */ const libVersion = _version.version; exports.libVersion = libVersion; const { useCoreML, coreMLAllowFallback } = ((_RNWhisper$getConstan = _NativeRNWhisper.default.getConstants) === null || _RNWhisper$getConstan === void 0 ? void 0 : _RNWhisper$getConstan.call(_NativeRNWhisper.default)) || {}; /** Is use CoreML models on iOS */ const isUseCoreML = !!useCoreML; /** Is allow fallback to CPU if load CoreML model failed */ exports.isUseCoreML = isUseCoreML; const isCoreMLAllowFallback = !!coreMLAllowFallback; exports.isCoreMLAllowFallback = isCoreMLAllowFallback; //# sourceMappingURL=index.js.map