whisper.rn
Version:
React Native binding of whisper.cpp
482 lines (468 loc) • 15.7 kB
JavaScript
var _RNWhisper$getConstan;
import { NativeEventEmitter, DeviceEventEmitter, Platform, Image } from 'react-native';
import RNWhisper from './NativeRNWhisper';
import AudioSessionIos from './AudioSessionIos';
import { version } from './version.json';
let EventEmitter;
if (Platform.OS === 'ios') {
// @ts-ignore
EventEmitter = new NativeEventEmitter(RNWhisper);
}
if (Platform.OS === 'android') {
EventEmitter = DeviceEventEmitter;
}
const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress';
const EVENT_ON_TRANSCRIBE_NEW_SEGMENTS = '@RNWhisper_onTranscribeNewSegments';
const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe';
const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd';
const updateAudioSession = async setting => {
await AudioSessionIos.setCategory(setting.category, setting.options || []);
if (setting.mode) {
await AudioSessionIos.setMode(setting.mode);
}
await AudioSessionIos.setActive(setting.active ?? true);
};
export class WhisperContext {
gpu = false;
reasonNoGPU = '';
constructor(_ref) {
let {
contextId,
gpu,
reasonNoGPU
} = _ref;
this.id = contextId;
this.gpu = gpu;
this.reasonNoGPU = reasonNoGPU;
}
transcribeWithNativeMethod(method, data) {
let options = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
const jobId = Math.floor(Math.random() * 10000);
const {
onProgress,
onNewSegments,
...rest
} = options;
let progressListener;
let lastProgress = 0;
if (onProgress) {
progressListener = EventEmitter.addListener(EVENT_ON_TRANSCRIBE_PROGRESS, evt => {
const {
contextId,
progress
} = evt;
if (contextId !== this.id || evt.jobId !== jobId) return;
lastProgress = progress > 100 ? 100 : progress;
onProgress(lastProgress);
});
}
const removeProgressListener = () => {
if (progressListener) {
progressListener.remove();
progressListener = null;
}
};
let newSegmentsListener;
if (onNewSegments) {
newSegmentsListener = EventEmitter.addListener(EVENT_ON_TRANSCRIBE_NEW_SEGMENTS, evt => {
const {
contextId,
result
} = evt;
if (contextId !== this.id || evt.jobId !== jobId) return;
onNewSegments(result);
});
}
const removeNewSegmenetsListener = () => {
if (newSegmentsListener) {
newSegmentsListener.remove();
newSegmentsListener = null;
}
};
return {
stop: async () => {
await RNWhisper.abortTranscribe(this.id, jobId);
removeProgressListener();
removeNewSegmenetsListener();
},
promise: RNWhisper[method](this.id, jobId, data, {
...rest,
onProgress: !!onProgress,
onNewSegments: !!onNewSegments
}).then(result => {
removeProgressListener();
removeNewSegmenetsListener();
if (!result.isAborted && lastProgress !== 100) {
// Handle the case that the last progress event is not triggered
onProgress === null || onProgress === void 0 ? void 0 : onProgress(100);
}
return result;
}).catch(e => {
removeProgressListener();
removeNewSegmenetsListener();
throw e;
})
};
}
/**
* Transcribe audio file (path or base64 encoded wav file)
* base64: need add `data:audio/wav;base64,` prefix
*/
transcribe(filePathOrBase64) {
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
let path = '';
if (typeof filePathOrBase64 === 'number') {
try {
const source = Image.resolveAssetSource(filePathOrBase64);
if (source) path = source.uri;
} catch (e) {
throw new Error(`Invalid asset: ${filePathOrBase64}`);
}
} else {
if (filePathOrBase64.startsWith('http')) throw new Error('Transcribe remote file is not supported, please download it first');
path = filePathOrBase64;
}
if (path.startsWith('file://')) path = path.slice(7);
return this.transcribeWithNativeMethod('transcribeFile', path, options);
}
/**
* Transcribe audio data (base64 encoded float32 PCM data)
*/
transcribeData(data) {
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return this.transcribeWithNativeMethod('transcribeData', data, options);
}
/** Transcribe the microphone audio stream, the microphone user permission is required */
async transcribeRealtime() {
let options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
let lastTranscribePayload;
const slices = [];
let sliceIndex = 0;
let tOffset = 0;
const putSlice = payload => {
if (!payload.isUseSlices || !payload.data) return;
if (sliceIndex !== payload.sliceIndex) {
var _slices$sliceIndex, _segments;
const {
segments = []
} = ((_slices$sliceIndex = slices[sliceIndex]) === null || _slices$sliceIndex === void 0 ? void 0 : _slices$sliceIndex.data) || {};
tOffset = ((_segments = segments[segments.length - 1]) === null || _segments === void 0 ? void 0 : _segments.t1) || 0;
}
;
({
sliceIndex
} = payload);
slices[sliceIndex] = {
...payload,
data: {
...payload.data,
segments: payload.data.segments.map(segment => ({
...segment,
t0: segment.t0 + tOffset,
t1: segment.t1 + tOffset
})) || []
}
};
};
const mergeSlicesIfNeeded = payload => {
if (!payload.isUseSlices) return payload;
const mergedPayload = {};
slices.forEach(slice => {
var _mergedPayload$data, _slice$data, _mergedPayload$data2, _slice$data2;
mergedPayload.data = {
result: (((_mergedPayload$data = mergedPayload.data) === null || _mergedPayload$data === void 0 ? void 0 : _mergedPayload$data.result) || '') + (((_slice$data = slice.data) === null || _slice$data === void 0 ? void 0 : _slice$data.result) || ''),
segments: [...((mergedPayload === null || mergedPayload === void 0 ? void 0 : (_mergedPayload$data2 = mergedPayload.data) === null || _mergedPayload$data2 === void 0 ? void 0 : _mergedPayload$data2.segments) || []), ...(((_slice$data2 = slice.data) === null || _slice$data2 === void 0 ? void 0 : _slice$data2.segments) || [])]
};
mergedPayload.processTime = slice.processTime;
mergedPayload.recordingTime = ((mergedPayload === null || mergedPayload === void 0 ? void 0 : mergedPayload.recordingTime) || 0) + slice.recordingTime;
});
return {
...payload,
...mergedPayload,
slices
};
};
let prevAudioSession;
if (Platform.OS === 'ios' && options !== null && options !== void 0 && options.audioSessionOnStartIos) {
// iOS: Remember current audio session state
if ((options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos) === 'restore') {
const categoryResult = await AudioSessionIos.getCurrentCategory();
const mode = await AudioSessionIos.getCurrentMode();
prevAudioSession = {
...categoryResult,
mode,
active: false // TODO: Need to check isOtherAudioPlaying to set active
};
}
// iOS: Update audio session state
await updateAudioSession(options === null || options === void 0 ? void 0 : options.audioSessionOnStartIos);
}
if (Platform.OS === 'ios' && typeof (options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos) === 'object') {
prevAudioSession = options === null || options === void 0 ? void 0 : options.audioSessionOnStopIos;
}
const jobId = Math.floor(Math.random() * 10000);
try {
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options);
} catch (e) {
if (prevAudioSession) await updateAudioSession(prevAudioSession);
throw e;
}
return {
stop: async () => {
await RNWhisper.abortTranscribe(this.id, jobId);
if (prevAudioSession) await updateAudioSession(prevAudioSession);
},
subscribe: callback => {
let transcribeListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE, evt => {
const {
contextId,
payload
} = evt;
if (contextId !== this.id || evt.jobId !== jobId) return;
lastTranscribePayload = payload;
putSlice(payload);
callback({
contextId,
jobId: evt.jobId,
...mergeSlicesIfNeeded(payload)
});
});
let endListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE_END, evt => {
const {
contextId,
payload
} = evt;
if (contextId !== this.id || evt.jobId !== jobId) return;
const lastPayload = {
...lastTranscribePayload,
...payload
};
putSlice(lastPayload);
callback({
contextId,
jobId: evt.jobId,
...mergeSlicesIfNeeded(lastPayload),
isCapturing: false
});
if (transcribeListener) {
transcribeListener.remove();
transcribeListener = null;
}
if (endListener) {
endListener.remove();
endListener = null;
}
});
}
};
}
async bench(maxThreads) {
const result = await RNWhisper.bench(this.id, maxThreads);
const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] = JSON.parse(result);
return {
config,
nThreads,
encodeMs,
decodeMs,
batchMs,
promptMs
};
}
async release() {
return RNWhisper.releaseContext(this.id);
}
}
const coreMLModelAssetPaths = ['analytics/coremldata.bin', 'weights/weight.bin', 'model.mil', 'coremldata.bin'];
export async function initWhisper(_ref2) {
let {
filePath,
coreMLModelAsset,
isBundleAsset,
useGpu = true,
useCoreMLIos = true,
useFlashAttn = false
} = _ref2;
let path = '';
let coreMLAssets;
if (coreMLModelAsset) {
const {
filename,
assets
} = coreMLModelAsset;
if (filename && assets) {
coreMLAssets = assets === null || assets === void 0 ? void 0 : assets.map(asset => {
if (typeof asset === 'number') {
const {
uri
} = Image.resolveAssetSource(asset);
const filepath = coreMLModelAssetPaths.find(p => uri.includes(p));
if (filepath) {
return {
uri,
filepath: `${filename}/${filepath}`
};
}
} else if (typeof asset === 'string') {
return {
uri: asset,
filepath: `${filename}/${asset}`
};
}
return undefined;
}).filter(asset => asset !== undefined);
}
}
if (typeof filePath === 'number') {
try {
const source = Image.resolveAssetSource(filePath);
if (source) {
path = source.uri;
}
} catch (e) {
throw new Error(`Invalid asset: ${filePath}`);
}
} else {
if (!isBundleAsset && filePath.startsWith('http')) throw new Error('Transcribe remote file is not supported, please download it first');
path = filePath;
}
if (path.startsWith('file://')) path = path.slice(7);
const {
contextId,
gpu,
reasonNoGPU
} = await RNWhisper.initContext({
filePath: path,
isBundleAsset: !!isBundleAsset,
useFlashAttn,
useGpu,
useCoreMLIos,
// Only development mode need download Core ML model assets (from packager server)
downloadCoreMLAssets: __DEV__ && !!coreMLAssets,
coreMLAssets
});
return new WhisperContext({
contextId,
gpu,
reasonNoGPU
});
}
export async function releaseAllWhisper() {
return RNWhisper.releaseAllContexts();
}
/** Current version of whisper.cpp */
export const libVersion = version;
const {
useCoreML,
coreMLAllowFallback
} = ((_RNWhisper$getConstan = RNWhisper.getConstants) === null || _RNWhisper$getConstan === void 0 ? void 0 : _RNWhisper$getConstan.call(RNWhisper)) || {};
/** Is use CoreML models on iOS */
export const isUseCoreML = !!useCoreML;
/** Is allow fallback to CPU if load CoreML model failed */
export const isCoreMLAllowFallback = !!coreMLAllowFallback;
export { AudioSessionIos };
//
// VAD (Voice Activity Detection) Context
//
export class WhisperVadContext {
gpu = false;
reasonNoGPU = '';
constructor(_ref3) {
let {
contextId,
gpu,
reasonNoGPU
} = _ref3;
this.id = contextId;
this.gpu = gpu;
this.reasonNoGPU = reasonNoGPU;
}
/**
* Detect speech segments in audio file (path or base64 encoded wav file)
* base64: need add `data:audio/wav;base64,` prefix
*/
async detectSpeech(filePathOrBase64) {
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
let path = '';
if (typeof filePathOrBase64 === 'number') {
try {
const source = Image.resolveAssetSource(filePathOrBase64);
if (source) path = source.uri;
} catch (e) {
throw new Error(`Invalid asset: ${filePathOrBase64}`);
}
} else {
if (filePathOrBase64.startsWith('http')) throw new Error('VAD remote file is not supported, please download it first');
path = filePathOrBase64;
}
if (path.startsWith('file://')) path = path.slice(7);
// Check if this is base64 encoded audio data
if (path.startsWith('data:audio/')) {
// This is base64 encoded audio data, use the raw data method
return RNWhisper.vadDetectSpeech(this.id, path, options);
} else {
// This is a file path, use the file method
return RNWhisper.vadDetectSpeechFile(this.id, path, options);
}
}
/**
* Detect speech segments in raw audio data (base64 encoded float32 PCM data)
*/
async detectSpeechData(audioData) {
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return RNWhisper.vadDetectSpeech(this.id, audioData, options);
}
async release() {
return RNWhisper.releaseVadContext(this.id);
}
}
/**
* Initialize a VAD context for voice activity detection
* @param options VAD context options
* @returns Promise resolving to WhisperVadContext instance
*/
export async function initWhisperVad(_ref4) {
let {
filePath,
isBundleAsset,
useGpu = true,
nThreads
} = _ref4;
let path = '';
if (typeof filePath === 'number') {
try {
const source = Image.resolveAssetSource(filePath);
if (source) {
path = source.uri;
}
} catch (e) {
throw new Error(`Invalid asset: ${filePath}`);
}
} else {
if (!isBundleAsset && filePath.startsWith('http')) throw new Error('VAD remote file is not supported, please download it first');
path = filePath;
}
if (path.startsWith('file://')) path = path.slice(7);
const {
contextId,
gpu,
reasonNoGPU
} = await RNWhisper.initVadContext({
filePath: path,
isBundleAsset: !!isBundleAsset,
useGpu,
nThreads
});
return new WhisperVadContext({
contextId,
gpu,
reasonNoGPU
});
}
/**
* Release all VAD contexts and free their memory
* @returns Promise resolving when all contexts are released
*/
export async function releaseAllWhisperVad() {
return RNWhisper.releaseAllVadContexts();
}
//# sourceMappingURL=index.js.map