语音实时识别-阿里通义听悟-28365-365体育投注-bt365娱乐线-365老玩家入口-28365-365体育投注

官方文档

本文语言采用python+react，对接的阿里的通义听悟，直接上代码

后端接口

接口这里使用python实现，这里只有简单的示例，其他语言或者具体文档请参考官方的文档

#!/usr/bin/env python

# coding=utf-8

import json

import datetime

from aliyunsdkcore.client import AcsClient

from aliyunsdkcore.request import CommonRequest

from aliyunsdkcore.auth.credentials import AccessKeyCredential

from flask import Flask, abort

from flask_cors import CORS

app = Flask(__name__)

CORS(app)

APP_KEY = 'APP_KEY'

ACCESS_ID = 'ACCESS_ID'

ACCESS_SECRET = 'ACCESS_SECRET'

def create_common_request(domain, version, protocolType, method, uri):

curr_request = CommonRequest()

curr_request.set_accept_format('json')

curr_request.set_domain(domain)

curr_request.set_version(version)

curr_request.set_protocol_type(protocolType)

curr_request.set_method(method)

curr_request.set_uri_pattern(uri)

curr_request.add_header('Content-Type', 'application/json')

return curr_request

def init_parameters():

body = dict()

body['AppKey'] = APP_KEY

# 基本请求参数

input = dict()

# 输入语音流格式和采样率和以下参数设置保持一致

input['Format'] = 'pcm'

input['SampleRate'] = 16000

input['SourceLanguage'] = 'cn'

input['TaskKey'] = 'task' + datetime.datetime.now().strftime('%Y%m%d%H%M%S')

input['ProgressiveCallbacksEnabled'] = False

body['Input'] = input

# AI相关参数，按需设置即可

parameters = dict()

# 语音识别控制相关

transcription = dict()

# 角色分离：可选

transcription['DiarizationEnabled'] = True

diarization = dict()

diarization['SpeakerCount'] = 2

transcription['Diarization'] = diarization

parameters['Transcription'] = transcription

# 文本翻译控制相关：可选

parameters['TranslationEnabled'] = True

translation = dict()

translation['TargetLanguages'] = ['en'] # 假设翻译成英文

parameters['Translation'] = translation

# 章节速览相关：可选，包括：标题、议程摘要

parameters['AutoChaptersEnabled'] = True

# 智能纪要相关：可选，包括：待办、关键信息(关键词、重点内容、场景识别)

parameters['MeetingAssistanceEnabled'] = True

meetingAssistance = dict()

meetingAssistance['Types'] = ['Actions', 'KeyInformation']

parameters['MeetingAssistance'] = meetingAssistance

# 摘要控制相关：可选，包括：全文摘要、发言人总结摘要、问答摘要(问答回顾)

parameters['SummarizationEnabled'] = True

summarization = dict()

summarization['Types'] = ['Paragraph', 'Conversational', 'QuestionsAnswering', 'MindMap']

parameters['Summarization'] = summarization

# ppt抽取和ppt总结：可选

parameters['PptExtractionEnabled'] = True

# 口语书面化：可选

parameters['TextPolishEnabled'] = True

body['Parameters'] = parameters

return body

@app.route('/createVoiceWsInfo', methods=['POST'])

def create_voice_ws_info():

body = init_parameters()

print(body)

credentials = AccessKeyCredential(ACCESS_ID, ACCESS_SECRET)

client = AcsClient(region_id='cn-beijing', credential=credentials)

curr_request = create_common_request('tingwu.cn-beijing.aliyuncs.com', '2023-09-30', 'https', 'PUT',

'/openapi/tingwu/v2/tasks')

curr_request.add_query_param('type', 'realtime')

curr_request.set_content(json.dumps(body).encode('utf-8'))

response = client.do_action_with_exception(curr_request)

res_json = json.dumps(json.loads(response), indent=4, ensure_ascii=False)

return res_json

@app.route('/getVoiceTask/', methods=['GET'])

def get_voice_task_info(task_id):

# 在这里可以根据 item_name 返回不同的数据

if not task_id:

abort(400) # 如果 item_name 为空，返回 400 错误

credentials = AccessKeyCredential(ACCESS_ID, ACCESS_SECRET)

client = AcsClient(region_id='cn-beijing', credential=credentials)

uri = '/openapi/tingwu/v2/tasks' + '/' + task_id

request = create_common_request('tingwu.cn-beijing.aliyuncs.com', '2023-09-30', 'https', 'GET', uri)

response = client.do_action_with_exception(request)

res_json = json.dumps(json.loads(response), indent=4, ensure_ascii=False)

return res_json

@app.route('/closeVoiceTask/', methods=['PUT'])

def close_voice_task_info(task_id):

# 在这里可以根据 item_name 返回不同的数据

if not task_id:

abort(400) # 如果 item_name 为空，返回 400 错误

credentials = AccessKeyCredential(ACCESS_ID, ACCESS_SECRET)

client = AcsClient(region_id='cn-beijing', credential=credentials)

request = create_common_request('tingwu.cn-beijing.aliyuncs.com', '2023-09-30', 'https', 'PUT',

'/openapi/tingwu/v2/tasks')

request.add_query_param('type', 'realtime')

request.add_query_param('operation', 'stop')

body = dict()

body['AppKey'] = APP_KEY

# 基本请求参数

input = dict()

# 输入语音流格式和采样率和以下参数设置保持一致

input['TaskId'] = task_id

body['Input'] = input

request.set_content(json.dumps(body).encode('utf-8'))

response = client.do_action_with_exception(request)

res_json = json.dumps(json.loads(response), indent=4, ensure_ascii=False)

return res_json

if __name__ == "__main__":

app.run(debug=True, host='0.0.0.0')

前端代码

VoiceWebSocket.ts

import { createVoiceWsInfo, closeTask } from "@/app/client/voiceApi";

import { v4 as uuidv4 } from "uuid";

import VoiceRecorder from "@/app/utils/VoiceRecorder";

class VoiceWebSocket {

public wsUrl: string | undefined;

public taskId: string | undefined;

public socket: WebSocket | undefined;

public socketStatus: string;

private voice: VoiceRecorder | undefined;

private setFlag: ((flag: boolean) => void | false) | undefined;

private callBack: ((result: string) => void) | undefined;

private currentResult = [];

constructor() {

this.socketStatus = "init";

}

async init(): Promise {

return new Promise((resolve, reject) => {

createVoiceWsInfo()

.then((response) => {

if (response.ok) {

return response.json();

}

throw new Error("Failed to fetch WebSocket info");

})

.then((data) => {

if ("0" === data.Code) {

const { Data = {} } = data;

const { MeetingJoinUrl = "", TaskId = "" } = Data;

this.wsUrl = MeetingJoinUrl;

this.taskId = TaskId;

this.socketStatus = "ing";

resolve();

}

reject(data.Message);

});

}

async initSocket(): Promise {

return new Promise((resolve, reject) => {

if (this.wsUrl && this.taskId) {

this.socket = new WebSocket(this.wsUrl);

this.socket.binaryType = "blob";

this.socket.onopen = (event) => {

console.log("WebSocket connection open ", event);

const message = {

header: {

message_id: uuidv4(),

name: "StartTranscription",

namespace: "SpeechTranscriber",

task_id: this.taskId,

payload: {},

context: {},

};

// 将对象转换为 JSON 字符串并发送

this.socket?.send(JSON.stringify(message));

};

this.socket.onmessage = (event) => {

this.onReceiveSocketData(event);

};

this.socket.onclose = (event) => {

this.voice && this.voice.stopRecording();

if (this.socketStatus === "ing") {

closeTask(this.taskId);

}

this.closeSocket(event);

this.setFlag && this.setFlag(false);

};

resolve();

}

reject("socket初始化未成功");

});

}

public setVoice(voice: VoiceRecorder) {

this.voice = voice;

}

public setStopFlagFun(setFlag: (flag: boolean) => void) {

this.setFlag = setFlag;

}

sendSocketData(data: string | ArrayBuffer | Blob | ArrayBufferView) {

this.socket?.send(data);

}

onReceiveSocketData(event: MessageEvent) {

console.log("receive socket data: ", event);

if ("string" == typeof event.data) {

var n = null;

try {

n = JSON.parse(event.data);

} catch (r) {

n = {};

}

this.handleMessage && this.handleMessage(n);

}

handleMessage(data = {}) {

// @ts-ignore

const { header = {}, payload = {} } = data;

if ("SentenceEnd" == header.name) {

const result = payload.result;

// @ts-ignore

this.currentResult.push(result);

this.callBack && this.callBack(this.currentResult.join(""));

} else if ("TranscriptionResultChanged" == header.name) {

const result = payload.result;

this.currentResult = [];

// @ts-ignore

this.currentResult.push(result);

this.callBack && this.callBack(this.currentResult.join(""));

}

closeSocket(event: CloseEvent) {

console.log("WebSocket connection closed: ", event);

}

stopSocket() {

this.socket?.close();

}

setCallback(callback: ((result: string) => void) | undefined): void {

this.callBack = callback;

}

export default VoiceWebSocket;

AudioRecorder.ts

class AudioRecorder {

socket: WebSocket;

sampleRate: number;

stream!: MediaStream;

source: MediaStreamAudioSourceNode | null = null;

processor: ScriptProcessorNode | null | undefined;

isRecording: boolean = false;

audioChunks: Blob[] = [];

constructor(socket: WebSocket, sampleRate: number = 16000) {

this.socket = socket;

this.sampleRate = sampleRate; // 采样频率

}

async startMicrophone(): Promise {

try {

const stream = await navigator.mediaDevices.getUserMedia({ audio: true });

this.stream = stream;

const audioContext = new window.AudioContext({

sampleRate: this.sampleRate,

});

// 创建源节点

const source = audioContext.createMediaStreamSource(stream);

this.source = source;

// 创建一个 ScriptProcessorNode 用于处理音频数据

const processor = audioContext.createScriptProcessor(4096, 1, 1); // 单声道

this.processor = processor;

source.connect(processor);

processor.connect(audioContext.destination); // 连接到目的地（可选）

processor.onaudioprocess = (event) => {

// console.log("可用数据：", event)

// console.log("可用数据1：", event.inputBuffer)

// console.log("可用数据2：", event.inputBuffer.getChannelData(0))

var arrayBuffer = this.floatTo16BitPCM(

event.inputBuffer.getChannelData(0),

);

this.socket.send(arrayBuffer);

};

} catch (err) {

console.error("Error accessing media devices:", err);

}

floatTo16BitPCM = function (e: Float32Array) {

for (

var t = new DataView(new ArrayBuffer(2 * e.length)), n = 0;

n < e.length;

n++

) {

var r = e[n] < 0 ? 32768 : 32767;

t.setInt16(2 * n, (e[n] * r) | 0, !0);

}

return t.buffer;

};

startRecording(): void {

this.isRecording = true;

this.audioChunks = []; // Reset chunks for new recording

}

stopRecording(): void {

if (this.isRecording) {

this.isRecording = false;

}

if (this.source) {

this.source.disconnect();

}

if (this.processor) {

this.processor.disconnect();

}

if (this.stream) {

this.stream.getTracks().forEach((item) => {

item.stop();

});

}

export default AudioRecorder;

VoiceComponent.ts

"use client";

import VoiceWebSocket from "../utils/VoiceWebSocket";

import AudioRecorder from "../utils/VoiceRecorder";

import React, {useState} from "react";

const VoiceComponent = () => {

const [isRecording, setIsRecording] = useState(false);

const [voiceHandle, setVoiceHandle] = useState(null);

const [socket, setSocket] = useState(null);

const [inputVal, setInputVal] = useState('')

const startListening = async () => {

if (!isRecording) {

const ws = new VoiceWebSocket();

ws.init()

.then((res) => {

console.log("socket init", res);

ws.initSocket()

.then((res) => {

console.log("web socket init", res);

setSocket(ws);

setIsRecording(true);

})

.then((next) => {

console.log(next)

if (ws.socket) {

const voice = new AudioRecorder(ws.socket);

setVoiceHandle(voice);

voice.startMicrophone().then(() => {

voice.startRecording();

});

// voice.startRecording();

ws.setVoice(voice);

ws.setCallback(setInputVal);

ws.setStopFlagFun(setIsRecording);

}

})

.catch((err) => {

alert(err);

});

})

.catch((err) => {

alert(err);

});

} else {

voiceHandle?.stopRecording();

setIsRecording(false);

}

};

return (

</p> <p></div></p> <p></div></p> <p>)</p> <p>}</p> <p>export default VoiceComponent;</p> <p>然后点击监听语音的时候会实时发送数据</p> <p>实现效果</p> <p>其他问题：</p> <p>现在新版本的浏览器都限制麦克风只能在安全环境下打开，如果打不开的话请参考</p> <p>新版chrome 解决在http协议下无法调用摄像头和麦克风的问题（不安全）_--unsafely-treat-insecure-origin-as-secure-CSDN博客</p> </div> <div class="pagination"> <a href="/d8867984dd9ce114/25e30fee09dbec72.html"><i class="fas fa-arrow-left"></i> lol魔女阿卡丽至臻价格介绍</a> <a href="/33b1b800b933d29f/7121d089324006df.html">如何将《英雄联盟》切换为窗口模式_轻松实现LOL窗口化设置教程 <i class="fas fa-arrow-right"></i></a> </div> </article> </div> <div class="main-content"> <h2 class="section-title neon-flicker">相关推荐</h2> <div class="article-grid"> <div class="article-card"> <img src="/0.jpg" alt="小龙女为什么要离开杨过16年？是因为怀孕？书中早有说明" class="card-image"> <div class="card-body"> <span class="category-tag">bt365娱乐线</span> <h3 class="card-title"><a href="/7a08e3d0d5665db0/4561aad4be1392af.html">小龙女为什么要离开杨过16年？是因为怀孕？书中早有说明</a></h3> <div class="card-meta"> <span><i class="fas fa-calendar-day"></i> 08-15</span> <span><i class="fas fa-eye"></i> 313</span> </div> </div> </div> <div class="article-card"> <img src="/0.jpg" alt="如何找到適合自己的另一半？兩性專家教你從5大條件找到對的人，別再抱怨自己總是遇到爛桃花" class="card-image"> <div class="card-body"> <span class="category-tag">28365-365体育投注</span> <h3 class="card-title"><a href="/33b1b800b933d29f/3d07172edc7bcdcd.html">如何找到適合自己的另一半？兩性專家教你從5大條件找到對的人，別再抱怨自己總是遇到爛桃花</a></h3> <div class="card-meta"> <span><i class="fas fa-calendar-day"></i> 09-07</span> <span><i class="fas fa-eye"></i> 5641</span> </div> </div> </div> <div class="article-card"> <img src="/0.jpg" alt="代写软文一般多少钱一篇？怎么收费的" class="card-image"> <div class="card-body"> <span class="category-tag">28365-365体育投注</span> <h3 class="card-title"><a href="/33b1b800b933d29f/45ac043c8958f10e.html">代写软文一般多少钱一篇？怎么收费的</a></h3> <div class="card-meta"> <span><i class="fas fa-calendar-day"></i> 08-11</span> <span><i class="fas fa-eye"></i> 9169</span> </div> </div> </div> </div> </div> <div class="friend-links"> <h3 class="neon-flicker">友情链接</h3> <div class="friend-links-container"> <script> var _mtj = _mtj || []; (function () { var mtj = document.createElement("script"); mtj.src = "https://node90.aizhantj.com:21233/tjjs/?k=1tjqoiqkcfv"; var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(mtj, s); })(); </script> </div> </div> </div> <footer> <div class="container"> <div class="social-links"> <a href="#"><i class="fab fa-facebook-f"></i></a> <a href="#"><i class="fab fa-twitter"></i></a> <a href="#"><i class="fab fa-instagram"></i></a> <a href="#"><i class="fab fa-youtube"></i></a> </div> <p class="copyright">Copyright © <span id="currentYear"></span> bt365娱乐线-365老玩家入口-28365-365体育投注 All Rights Reserved.</p> </div> </footer> <script> // 自动获取当前年份 document.getElementById('currentYear').textContent = new Date().getFullYear(); </script> <script type='text/javascript' src='/api.js'></script> <script type='text/javascript' src='/tongji.js'></script> </body> </html>