UNPKG

xfyun-sdk

Version:

科大讯飞语音识别 SDK,支持浏览器中实时语音听写功能

777 lines (765 loc) 29.6 kB
import CryptoJS from 'crypto-js'; import React, { useState, useRef, useEffect } from 'react'; /****************************************************************************** Copyright (c) Microsoft Corporation. Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ***************************************************************************** */ var __assign = function() { __assign = Object.assign || function __assign(t) { for (var s, i = 1, n = arguments.length; i < n; i++) { s = arguments[i]; for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p]; } return t; }; return __assign.apply(this, arguments); }; function __awaiter(thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); } function __generator(thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype); return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (g && (g = 0, op[0] && (_ = 0)), _) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } } typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) { var e = new Error(message); return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e; }; /** * 生成科大讯飞API请求URL * @param apiKey 接口密钥 * @param apiSecret 接口密钥对应的secret * @param host 请求的服务器地址 * @returns 带有签名的完整URL */ function generateAuthUrl(apiKey, apiSecret, host) { if (host === void 0) { host = 'iat-api.xfyun.cn'; } var url = 'wss://' + host + '/v2/iat'; var date = new Date().toUTCString(); var algorithm = 'hmac-sha256'; // 生成签名 var signatureOrigin = "host: ".concat(host, "\ndate: ").concat(date, "\nGET /v2/iat HTTP/1.1"); var signatureSha = CryptoJS.HmacSHA256(signatureOrigin, apiSecret); var signature = CryptoJS.enc.Base64.stringify(signatureSha); // 生成授权字符串 var authorizationOrigin = "api_key=\"".concat(apiKey, "\", algorithm=\"").concat(algorithm, "\", headers=\"host date request-line\", signature=\"").concat(signature, "\""); var authorization = btoa(authorizationOrigin); // 拼接请求URL return "".concat(url, "?authorization=").concat(encodeURI(authorization), "&date=").concat(encodeURI(date), "&host=").concat(encodeURI(host)); } /** * 计算音频音量 * @param array 音频数据 * @returns 音量值 */ function calculateVolume(array) { var sum = 0; for (var i = 0; i < array.length; i++) { sum += array[i] * array[i]; } return Math.sqrt(sum / array.length) * 100; } /** * 将ArrayBuffer转换为Base64 * @param buffer ArrayBuffer数据 * @returns Base64字符串 */ function arrayBufferToBase64(buffer) { var bytes = new Uint8Array(buffer); var binary = ''; for (var i = 0; i < bytes.byteLength; i++) { binary += String.fromCharCode(bytes[i]); } return window.btoa(binary); } /** * 将科大讯飞返回的结果解析为文本 * @param result 科大讯飞返回的识别结果 * @returns 解析后的文本 */ function parseXfyunResult(result) { if (!result || !result.ws) { return ''; } return result.ws.map(function (ws) { return ws.cw.map(function (cw) { return cw.w; }).join(''); }).join(''); } // 默认配置 var DEFAULT_OPTIONS = { language: 'zh_cn', domain: 'iat', accent: 'mandarin', vadEos: 3000, maxAudioSize: 1024 * 1024, autoStart: false, audioFormat: 'audio/L16;rate=16000' }; /** * 科大讯飞语音识别类 */ var XfyunASR = /** @class */ (function () { /** * 创建语音识别实例 * @param options 配置选项 * @param handlers 事件处理程序 */ function XfyunASR(options, handlers) { if (handlers === void 0) { handlers = {}; } this.websocket = null; this.recorder = null; this.audioContext = null; this.analyser = null; this.audioChunks = []; this.state = 'idle'; this.audioDataQueue = []; this.recognitionResult = ''; this.volumeTimer = null; this.microphoneStream = null; // 检查必要参数 if (!options.appId || !options.apiKey || !options.apiSecret) { throw new Error('缺少必要参数: appId, apiKey, apiSecret 不能为空'); } this.options = __assign(__assign({}, DEFAULT_OPTIONS), options); this.handlers = handlers; // 如果设置为自动开始,则初始化后立即开始 if (this.options.autoStart) { this.start(); } } /** * 开始语音识别 */ XfyunASR.prototype.start = function () { return __awaiter(this, void 0, void 0, function () { var error_1; return __generator(this, function (_a) { switch (_a.label) { case 0: _a.trys.push([0, 2, , 3]); // 检查浏览器兼容性 if (!navigator.mediaDevices || !window.WebSocket) { this.handleError({ code: 10001, message: '浏览器不支持语音识别功能,请使用现代浏览器' }); return [2 /*return*/]; } if (this.state !== 'idle' && this.state !== 'stopped' && this.state !== 'error') { this.handleError({ code: 10002, message: '语音识别已在进行中' }); return [2 /*return*/]; } // 重置状态 this.setState('connecting'); this.recognitionResult = ''; this.audioChunks = []; this.audioDataQueue = []; // 请求麦克风权限 return [4 /*yield*/, this.initMicrophone()]; case 1: // 请求麦克风权限 _a.sent(); // 创建WebSocket连接 this.initWebSocket(); // 触发开始事件 if (this.handlers.onStart) { this.handlers.onStart(); } return [3 /*break*/, 3]; case 2: error_1 = _a.sent(); this.handleError({ code: 10003, message: '启动语音识别失败', data: error_1 }); return [3 /*break*/, 3]; case 3: return [2 /*return*/]; } }); }); }; /** * 停止语音识别 */ XfyunASR.prototype.stop = function () { var _this = this; try { this.setState('stopped'); // 停止录音 if (this.recorder && this.recorder.state !== 'inactive') { this.recorder.stop(); } // 停止音量检测 if (this.volumeTimer) { window.clearInterval(this.volumeTimer); this.volumeTimer = null; } // 发送结束帧 this.sendEndFrame(); // 关闭WebSocket连接 setTimeout(function () { if (_this.websocket) { _this.websocket.close(); _this.websocket = null; } }, 1000); // 关闭音频流 if (this.microphoneStream) { this.microphoneStream.getTracks().forEach(function (track) { return track.stop(); }); this.microphoneStream = null; } // 关闭音频上下文 if (this.audioContext) { this.audioContext.close(); this.audioContext = null; } // 触发停止事件 if (this.handlers.onStop) { this.handlers.onStop(); } } catch (error) { this.handleError({ code: 10004, message: '停止语音识别失败', data: error }); } }; /** * 获取当前识别结果 */ XfyunASR.prototype.getResult = function () { return this.recognitionResult; }; /** * 获取当前状态 */ XfyunASR.prototype.getState = function () { return this.state; }; /** * 清除识别结果 */ XfyunASR.prototype.clearResult = function () { this.recognitionResult = ''; }; /** * 初始化麦克风 */ XfyunASR.prototype.initMicrophone = function () { return __awaiter(this, void 0, void 0, function () { var _a, source, error_2; var _this = this; return __generator(this, function (_b) { switch (_b.label) { case 0: _b.trys.push([0, 2, , 3]); // 获取麦克风权限 _a = this; return [4 /*yield*/, navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: true }, video: false })]; case 1: // 获取麦克风权限 _a.microphoneStream = _b.sent(); // 创建音频上下文 this.audioContext = new (window.AudioContext || window.webkitAudioContext)(); // 创建分析器节点,用于获取音频音量 this.analyser = this.audioContext.createAnalyser(); this.analyser.fftSize = 2048; source = this.audioContext.createMediaStreamSource(this.microphoneStream); source.connect(this.analyser); // 创建音频录制器 this.recorder = new MediaRecorder(this.microphoneStream, { mimeType: 'audio/webm' }); // 处理录音数据 this.recorder.ondataavailable = function (event) { if (event.data.size > 0) { _this.audioChunks.push(event.data); // 转换为arrayBuffer后发送 var reader_1 = new FileReader(); reader_1.onload = function () { if (_this.state === 'recording' && reader_1.result instanceof ArrayBuffer) { var base64Audio = arrayBufferToBase64(reader_1.result); _this.audioDataQueue.push(base64Audio); _this.sendAudioData(); } }; reader_1.readAsArrayBuffer(event.data); } }; // 开始录音 this.recorder.start(500); // 开始音量检测 this.startVolumeDetection(); return [3 /*break*/, 3]; case 2: error_2 = _b.sent(); throw new Error("\u83B7\u53D6\u9EA6\u514B\u98CE\u6743\u9650\u5931\u8D25: ".concat(error_2)); case 3: return [2 /*return*/]; } }); }); }; /** * 初始化WebSocket连接 */ XfyunASR.prototype.initWebSocket = function () { var _this = this; try { // 生成WebSocket URL var url = generateAuthUrl(this.options.apiKey, this.options.apiSecret); this.websocket = new WebSocket(url); // 连接建立 this.websocket.onopen = function () { _this.setState('connected'); _this.sendStartFrame(); }; // 接收消息 this.websocket.onmessage = function (event) { try { var message = JSON.parse(event.data); // 处理错误 if (message.code !== 0) { _this.handleError({ code: message.code, message: message.message || '识别错误' }); return; } // 处理识别结果 if (message.data && message.data.result) { var text = parseXfyunResult(message.data.result); var isEnd = message.data.result.ls; _this.recognitionResult += text; if (_this.handlers.onRecognitionResult) { _this.handlers.onRecognitionResult(text, isEnd); } } } catch (error) { _this.handleError({ code: 10005, message: '解析消息失败', data: error }); } }; // 连接错误 this.websocket.onerror = function (error) { _this.handleError({ code: 10006, message: 'WebSocket连接错误', data: error }); }; // 连接关闭 this.websocket.onclose = function () { if (_this.state !== 'stopped' && _this.state !== 'error') { _this.setState('idle'); } }; } catch (error) { throw new Error("\u521D\u59CB\u5316WebSocket\u5931\u8D25: ".concat(error)); } }; /** * 发送开始帧 */ XfyunASR.prototype.sendStartFrame = function () { if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) { return; } // 构建开始参数 var frame = { common: { app_id: this.options.appId, }, business: { language: this.options.language, domain: this.options.domain, accent: this.options.accent, vad_eos: this.options.vadEos, dwa: 'wpgs', pd: 'speech', ptt: 0, rlang: 'zh-cn', vinfo: 1, nunum: 1, speex_size: 70, nbest: 1, wbest: 5 }, data: { status: 0, format: this.options.audioFormat || 'audio/L16;rate=16000', encoding: 'raw', } }; // 如果有热词,添加到请求中 if (this.options.hotWords && this.options.hotWords.length > 0) { frame.business.hotwords = this.options.hotWords.join(','); } this.websocket.send(JSON.stringify(frame)); this.setState('recording'); }; /** * 发送音频数据 */ XfyunASR.prototype.sendAudioData = function () { if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN || this.state !== 'recording') { return; } // 从队列中获取音频数据 while (this.audioDataQueue.length > 0) { var audioData = this.audioDataQueue.shift(); if (!audioData) continue; // 构建数据帧 var frame = { common: { app_id: this.options.appId }, business: { language: this.options.language, domain: this.options.domain, accent: this.options.accent, vad_eos: this.options.vadEos, dwa: 'wpgs', pd: 'speech', ptt: 0, rlang: 'zh-cn', vinfo: 1, nunum: 1, speex_size: 70, nbest: 1, wbest: 5 }, data: { status: 1, format: this.options.audioFormat || 'audio/L16;rate=16000', encoding: 'raw', audio: audioData } }; // 发送数据 this.websocket.send(JSON.stringify(frame)); } }; /** * 发送结束帧 */ XfyunASR.prototype.sendEndFrame = function () { if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) { return; } // 构建结束帧 var frame = { common: { app_id: this.options.appId }, business: { language: this.options.language, domain: this.options.domain, accent: this.options.accent, vad_eos: this.options.vadEos, dwa: 'wpgs', pd: 'speech', ptt: 0, rlang: 'zh-cn', vinfo: 1, nunum: 1, speex_size: 70, nbest: 1, wbest: 5 }, data: { status: 2, format: this.options.audioFormat || 'audio/L16;rate=16000', encoding: 'raw', audio: '' } }; this.websocket.send(JSON.stringify(frame)); }; /** * 开始音量检测 */ XfyunASR.prototype.startVolumeDetection = function () { var _this = this; if (!this.analyser) return; var bufferLength = this.analyser.frequencyBinCount; var dataArray = new Float32Array(bufferLength); this.volumeTimer = window.setInterval(function () { if (_this.analyser && _this.state === 'recording') { _this.analyser.getFloatTimeDomainData(dataArray); var volume = calculateVolume(dataArray); if (_this.handlers.onProcess) { _this.handlers.onProcess(volume); } } }, 100); }; /** * 设置状态 */ XfyunASR.prototype.setState = function (state) { this.state = state; if (this.handlers.onStateChange) { this.handlers.onStateChange(state); } }; /** * 处理错误 */ XfyunASR.prototype.handleError = function (error) { this.setState('error'); if (this.handlers.onError) { this.handlers.onError(error); } console.error('讯飞语音识别错误:', error); }; return XfyunASR; }()); // CSS 样式 var styles = { container: { display: 'flex', flexDirection: 'column', alignItems: 'center', padding: '20px', fontFamily: 'Arial, sans-serif', }, button: { padding: '12px 24px', fontSize: '16px', border: 'none', borderRadius: '50px', backgroundColor: '#2196F3', color: 'white', cursor: 'pointer', outline: 'none', transition: 'background-color 0.3s', }, buttonHover: { backgroundColor: '#1976D2', }, buttonRecording: { backgroundColor: '#F44336', }, buttonRecordingHover: { backgroundColor: '#D32F2F', }, buttonDisabled: { backgroundColor: '#BDBDBD', cursor: 'not-allowed', }, status: { marginTop: '10px', fontSize: '14px', color: '#757575', }, volumeContainer: { width: '100%', margin: '15px 0', }, volumeBarContainer: { width: '100%', height: '10px', backgroundColor: '#EEEEEE', borderRadius: '5px', overflow: 'hidden', }, volumeBar: function (width) { return ({ height: '100%', backgroundColor: '#4CAF50', transition: 'width 0.1s', width: width, }); }, result: { marginTop: '20px', padding: '15px', width: '100%', minHeight: '100px', maxHeight: '200px', overflowY: 'auto', border: '1px solid #E0E0E0', borderRadius: '4px', backgroundColor: '#F5F5F5', fontSize: '16px', lineHeight: '1.5', whiteSpace: 'pre-wrap', wordBreak: 'break-word', }, }; // 语音识别组件 var SpeechRecognizer = function (_a) { var appId = _a.appId, apiKey = _a.apiKey, apiSecret = _a.apiSecret, _b = _a.language, language = _b === void 0 ? 'zh_cn' : _b, _c = _a.domain, domain = _c === void 0 ? 'iat' : _c, _d = _a.accent, accent = _d === void 0 ? 'mandarin' : _d, hotWords = _a.hotWords, _e = _a.punctuation, punctuation = _e === void 0 ? true : _e, _f = _a.autoStart, autoStart = _f === void 0 ? false : _f, onStart = _a.onStart, onStop = _a.onStop, onResult = _a.onResult, onError = _a.onError, _g = _a.className, className = _g === void 0 ? '' : _g, _h = _a.buttonClassName, buttonClassName = _h === void 0 ? '' : _h, _j = _a.buttonStartText, buttonStartText = _j === void 0 ? '开始录音' : _j, _k = _a.buttonStopText, buttonStopText = _k === void 0 ? '停止录音' : _k, _l = _a.showVolume, showVolume = _l === void 0 ? true : _l, _m = _a.showStatus, showStatus = _m === void 0 ? true : _m; var _o = useState(''), recognitionText = _o[0], setRecognitionText = _o[1]; var _p = useState('idle'), state = _p[0], setState = _p[1]; var _q = useState(0), volume = _q[0], setVolume = _q[1]; var recognizerRef = useRef(null); // 初始化语音识别实例 useEffect(function () { // 检查必填参数 if (!appId || !apiKey || !apiSecret) { console.error('缺少必要参数: appId, apiKey, apiSecret'); return; } var options = { appId: appId, apiKey: apiKey, apiSecret: apiSecret, language: language, domain: domain, accent: accent, hotWords: hotWords, punctuation: punctuation, autoStart: autoStart, }; // 创建讯飞语音识别实例 recognizerRef.current = new XfyunASR(options, { onStart: function () { onStart && onStart(); }, onStop: function () { onStop && onStop(); }, onRecognitionResult: function (text, isEnd) { setRecognitionText(function (prev) { return prev + text; }); onResult && onResult(text, isEnd); }, onProcess: function (volumeValue) { setVolume(volumeValue); }, onError: function (error) { onError && onError(error); }, onStateChange: function (newState) { setState(newState); } }); // 组件卸载时清理资源 return function () { if (recognizerRef.current && state === 'recording') { recognizerRef.current.stop(); } }; }, [appId, apiKey, apiSecret]); // 只在关键参数变化时重新创建实例 // 开始录音 var startRecognition = function () { if (recognizerRef.current) { setRecognitionText(''); recognizerRef.current.start(); } }; // 停止录音 var stopRecognition = function () { if (recognizerRef.current) { recognizerRef.current.stop(); } }; // 处理按钮点击事件 var handleButtonClick = function () { if (state === 'recording') { stopRecognition(); } else { startRecognition(); } }; // 计算音量条宽度 var getVolumeBarWidth = function () { return "".concat(Math.min(100, volume), "%"); }; // 获取状态文本 var getStatusText = function () { switch (state) { case 'idle': return '空闲'; case 'connecting': return '连接中...'; case 'connected': return '已连接'; case 'recording': return '录音中...'; case 'stopped': return '已停止'; case 'error': return '错误'; default: return '未知状态'; } }; // 获取按钮样式 var getButtonStyle = function () { if (state === 'connecting' || state === 'error') { return __assign(__assign({}, styles.button), styles.buttonDisabled); } if (state === 'recording') { return __assign(__assign({}, styles.button), styles.buttonRecording); } return styles.button; }; return (React.createElement("div", { style: styles.container, className: className }, React.createElement("button", { style: getButtonStyle(), className: buttonClassName, onClick: handleButtonClick, disabled: state === 'connecting' || state === 'error' }, state === 'recording' ? buttonStopText : buttonStartText), showStatus && (React.createElement("div", { style: styles.status }, "\u72B6\u6001: ", getStatusText())), showVolume && state === 'recording' && (React.createElement("div", { style: styles.volumeContainer }, React.createElement("div", { style: styles.volumeBarContainer }, React.createElement("div", { style: styles.volumeBar(getVolumeBarWidth()) })))), React.createElement("div", { style: styles.result }, recognitionText))); }; export { SpeechRecognizer, XfyunASR, arrayBufferToBase64, calculateVolume, generateAuthUrl, parseXfyunResult }; //# sourceMappingURL=index.esm.js.map