• 大模型交互-超拟人合成


    1、超拟人合成:将文字转化为自然流畅的人声,在实时语音合成的基础上,精准模拟人类的副语言现象,如呼吸、叹气、语速变化等,使得语音不仅流畅自然,更富有情感和生命力
    2、唤醒的持久运行--->合成能力加持(唤醒成功后语音答复:主人 我在)--->调用在线或离线听写能力(建议用讯飞在线效果好)--->识别用户说的语音成文字后发给大模型--->建议调用讯飞星火认知大模型--->获取大模型答案后调用语音合成(合成在线离线均可)进行答案输出。这样就顺利实现了用纯语音与大模型进行交互!
    3、在获取大模型答案后调用语音合成(合成在线离线均可)进行答案输出环节,讯飞推出超拟人合成,交互更像真人。
    4、通过对大模型返回及时性与合成及时性的结合,逻辑编排使得模拟真人交互成为可能。
    5、超拟人合成Java调用示例给大家!

    1. package com.iflytek;
    2. import com.google.gson.Gson;
    3. import okhttp3.HttpUrl;
    4. import org.java_websocket.WebSocket;
    5. import org.java_websocket.client.WebSocketClient;
    6. import org.java_websocket.handshake.ServerHandshake;
    7. import javax.crypto.Mac;
    8. import javax.crypto.spec.SecretKeySpec;
    9. import java.io.*;
    10. import java.net.URI;
    11. import java.net.URL;
    12. import java.nio.charset.StandardCharsets;
    13. import java.text.SimpleDateFormat;
    14. import java.util.*;
    15. /**
    16. * 语音合成流式 WebAPI 接口调用示例 接口文档(必看):https://www.xfyun.cn/doc/tts/online_tts/API.html
    17. * 发音人使用方式:登陆开放平台https://www.xfyun.cn/后,到控制台-我的应用-语音合成-添加试用或购买发音人,添加后即显示该发音人参数值
    18. * 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看)
    19. * 小语种需要传输小语种文本、使用小语种发音人vcn、tte=unicode以及修改文本编码方式
    20. *
    21. */
    22. public class WebTtsWs {
    23. // 地址与鉴权信息
    24. // public static final String hostUrl = "https://tts-api.xfyun.cn/v2/tts"; // 普通合成
    25. public static final String hostUrl = "https://cbm01.cn-huabei-1.xf-yun.com/v1/private/medd90fec"; // 超拟人合成
    26. // 均到控制台-语音合成页面获取
    27. public static final String appid = "";
    28. public static final String apiSecret = "";
    29. public static final String apiKey = "";
    30. // 合成文本
    31. public static final String TEXT = "商鞅(约公元前390年—前338年),卫国人,公孙氏,姬姓,名鞅。他是一位重要的战国时期政治家、改革家、思想家和军事家。作为法家派别的代表人物,商鞅积极实行变法。在秦孝公的请求下,他提出并实施了一系列深远影响的改革措施,这被称为“商鞅变法”。这些措施包括废除井田制、重视农业、奖励军功、统一度量衡和建立县制等。虽然在他的领导下,秦国强大并繁荣,但他也因其严苛的法律而饱受争议。总的来说,商鞅是一位影响力深远的历史人物。";
    32. // 合成文本编码格式
    33. public static final String TTE = "UTF8"; // 小语种必须使用UNICODE编码作为值
    34. // 发音人参数。到控制台-我的应用-语音合成-添加试用或购买发音人,添加后即显示该发音人参数值,若试用未添加的发音人会报错11200
    35. public static final String VCN = "xiaoyan";
    36. // 合成文件名称
    37. public static final String OUTPUT_FILE_PATH = "src/main/resources/tts/" + System.currentTimeMillis() + ".pcm";
    38. // json
    39. public static final Gson gson = new Gson();
    40. public static boolean wsCloseFlag = false;
    41. public static long startTime = System.currentTimeMillis();
    42. public static long endTime = System.currentTimeMillis();
    43. public static boolean ttsWorkingFlag = false;
    44. public static void main(String[] args) throws Exception {
    45. String wsUrl = getAuthUrl(hostUrl, apiKey, apiSecret).replace("https://", "wss://");
    46. OutputStream outputStream = new FileOutputStream(OUTPUT_FILE_PATH);
    47. websocketWork(wsUrl, outputStream);
    48. }
    49. // Websocket方法
    50. public static void websocketWork(String wsUrl, OutputStream outputStream) {
    51. try {
    52. URI uri = new URI(wsUrl);
    53. WebSocketClient webSocketClient = new WebSocketClient(uri) {
    54. @Override
    55. public void onOpen(ServerHandshake serverHandshake) {
    56. try { //实时播放
    57. Constants.TTS_SOURCE_DATA_LINE.open(Constants.TTS_AUDIO_FORMAT);
    58. Constants.TTS_SOURCE_DATA_LINE.start();
    59. } catch (Exception e) {
    60. e.printStackTrace();
    61. }
    62. ttsWorkingFlag = true;
    63. startTime = System.currentTimeMillis();
    64. System.out.println("ws建立连接成功...");
    65. }
    66. @Override
    67. public void onMessage(String text) {
    68. System.out.println("返回结果打印:" + text);
    69. JsonParse myJsonParse = gson.fromJson(text, JsonParse.class);
    70. if (myJsonParse.header.code != 0) {
    71. System.out.println("发生错误,错误码为:" + myJsonParse.header.code);
    72. System.out.println("本次请求的sid为:" + myJsonParse.header.sid);
    73. }
    74. /* if (myJsonParse.header.status == 2) {
    75. try {
    76. outputStream.close();
    77. } catch (IOException e) {
    78. e.printStackTrace();
    79. }
    80. endTime = System.currentTimeMillis();
    81. System.out.println("本次合成耗时:" + (endTime - startTime) + "ms");
    82. System.out.println("本次请求的sid==>" + myJsonParse.header.sid);
    83. System.out.println("合成成功,文件保存路径为==>" + OUTPUT_FILE_PATH);
    84. // 可以关闭连接,释放资源
    85. ttsWorkingFlag = false;
    86. wsCloseFlag = true;
    87. Constants.TTS_SOURCE_DATA_LINE.stop();
    88. Constants.TTS_SOURCE_DATA_LINE.close();
    89. }*/
    90. if (myJsonParse.payload.audio.audio != null) {
    91. try {
    92. byte[] textBase64Decode = Base64.getDecoder().decode(myJsonParse.payload.audio.audio);
    93. outputStream.write(textBase64Decode);
    94. outputStream.flush();
    95. } catch (Exception e) {
    96. e.printStackTrace();
    97. }
    98. // 实时播报
    99. try {
    100. byte[] textBase64Decode = Base64.getDecoder().decode(myJsonParse.payload.audio.audio);
    101. /* outputStream.write(textBase64Decode);
    102. outputStream.flush();*/
    103. if (ttsWorkingFlag) {
    104. Constants.TTS_SOURCE_DATA_LINE.write(textBase64Decode, 0, textBase64Decode.length); //实时写音频流
    105. // System.err.println("写入");
    106. } else {
    107. Constants.TTS_SOURCE_DATA_LINE.stop();
    108. Constants.TTS_SOURCE_DATA_LINE.close();
    109. }
    110. } catch (Exception e) {
    111. e.printStackTrace();
    112. }
    113. }
    114. }
    115. @Override
    116. public void onClose(int i, String s, boolean b) {
    117. System.out.println("ws链接已关闭,本次请求完成...");
    118. }
    119. @Override
    120. public void onError(Exception e) {
    121. if (e.getMessage() != null) {
    122. System.out.println("发生错误 " + e.getMessage());
    123. }
    124. }
    125. };
    126. // 建立连接
    127. webSocketClient.connect();
    128. while (!webSocketClient.getReadyState().equals(WebSocket.READYSTATE.OPEN)) {
    129. //System.out.println("正在连接...");
    130. Thread.sleep(100);
    131. }
    132. MyThread webSocketThread = new MyThread(webSocketClient);
    133. webSocketThread.start();
    134. } catch (Exception e) {
    135. System.out.println(e.getMessage());
    136. }
    137. }
    138. // 线程来发送音频与参数
    139. static class MyThread extends Thread {
    140. WebSocketClient webSocketClient;
    141. public MyThread(WebSocketClient webSocketClient) {
    142. this.webSocketClient = webSocketClient;
    143. }
    144. public void run() {
    145. String requestJson;//请求参数json串
    146. try {
    147. requestJson = "{\n" + " \"common\": {\n" + " \"app_id\": \"" + appid + "\"\n" + " },\n" + " \"business\": {\n" + " \"aue\": \"raw\",\n" + " \"tte\": \"" + TTE + "\",\n" + " \"ent\": \"intp65\",\n" + " \"vcn\": \"" + VCN + "\",\n" + " \"pitch\": 50,\n" + " \"speed\": 50\n" + " },\n" + " \"data\": {\n" + " \"status\": 2,\n" + " \"text\": \"" + Base64.getEncoder().encodeToString(TEXT.getBytes(StandardCharsets.UTF_8)) + "\"\n" +
    148. //" \"text\": \"" + Base64.getEncoder().encodeToString(TEXT.getBytes("UTF-16LE")) + "\"\n" +
    149. " }\n" + "}";
    150. requestJson = "{\n" + " \"header\": {\n" + " \"app_id\": \"" + appid + "\",\n" + " \"status\": 0\n" + " },\n" + " \"parameter\": {\n" + " \"oral\": {\n" + " \"spark_assist\": 1,\n" + " \"oral_level\": \"mid\"\n" + " },\n" + " \"tts\": {\n" + " \"vcn\": \"x4_lingxiaoxuan_oral\",\n" + " \"speed\": 50,\n" + " \"volume\": 50,\n" + " \"pitch\": 50,\n" + " \"bgs\": 0,\n" + " \"reg\": 0,\n" + " \"rdn\": 0,\n" + " \"rhy\": 0,\n" + " \"scn\": 0,\n" + " \"version\": 0,\n" + " \"L5SilLen\": 1000,\n" + " \"ParagraphSilLen\": 0,\n" + " \"audio\": {\n" + " \"encoding\": \"raw\",\n" + " \"sample_rate\": 16000,\n" + " \"channels\": 1,\n" + " \"bit_depth\": 16,\n" + " \"frame_size\": 0\n" + " },\n" + " \"pybuf\": {\n" + " \"encoding\": \"utf8\",\n" + " \"compress\": \"raw\",\n" + " \"format\": \"plain\"\n" + " }\n" + " }\n" + " },\n" + " \"payload\": {\n" + " \"text\": {\n" + " \"encoding\": \"utf8\",\n" + " \"compress\": \"raw\",\n" + " \"format\": \"json\",\n" + " \"status\": 0,\n" + " \"seq\": 0,\n" + " \"text\": \"" + Base64.getEncoder().encodeToString(TEXT.getBytes(StandardCharsets.UTF_8)) + "\"\n" + " },\n" + " \"user_text\": {\n" + " \"encoding\": \"utf8\",\n" + " \"compress\": \"raw\",\n" + " \"format\": \"json\",\n" + " \"status\": 0,\n" + " \"seq\": 0,\n" + " \"text\": \"" + Base64.getEncoder().encodeToString(TEXT.getBytes(StandardCharsets.UTF_8)) + "\"\n" + " }\n" + " }\n" + "}";
    151. System.err.println(requestJson);
    152. webSocketClient.send(requestJson);
    153. // 等待服务端返回完毕后关闭
    154. while (!wsCloseFlag) {
    155. Thread.sleep(200);
    156. }
    157. webSocketClient.close();
    158. } catch (Exception e) {
    159. e.printStackTrace();
    160. }
    161. }
    162. }
    163. // 鉴权方法
    164. public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
    165. URL url = new URL(hostUrl);
    166. // 时间
    167. SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
    168. format.setTimeZone(TimeZone.getTimeZone("GMT"));
    169. String date = format.format(new Date());
    170. // 拼接
    171. String preStr = "host: " + url.getHost() + "\n" + "date: " + date + "\n" + "GET " + url.getPath() + " HTTP/1.1";
    172. //System.out.println(preStr);
    173. // SHA256加密
    174. Mac mac = Mac.getInstance("hmacsha256");
    175. SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(StandardCharsets.UTF_8), "hmacsha256");
    176. mac.init(spec);
    177. byte[] hexDigits = mac.doFinal(preStr.getBytes(StandardCharsets.UTF_8));
    178. // Base64加密
    179. String sha = Base64.getEncoder().encodeToString(hexDigits);
    180. // 拼接
    181. String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
    182. // 拼接地址
    183. HttpUrl httpUrl = Objects.requireNonNull(HttpUrl.parse("https://" + url.getHost() + url.getPath())).newBuilder().//
    184. addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(StandardCharsets.UTF_8))).//
    185. addQueryParameter("date", date).//
    186. addQueryParameter("host", url.getHost()).//
    187. build();
    188. return httpUrl.toString();
    189. }
    190. //返回的json结果拆解
    191. class JsonParse {
    192. Header header;
    193. Payload payload;
    194. }
    195. class Header {
    196. int code;
    197. String sid;
    198. int status;
    199. }
    200. class Payload {
    201. Audio audio;
    202. }
    203. class Audio {
    204. String audio;
    205. int seq;
    206. }
    207. }

  • 相关阅读:
    安全性第一!OpenWRT配置SFTP远程文件传输,实现数据安全保护
    进阶JAVA篇-深入了解内部类
    【论文写作】RSA算法的实现总体设计参考
    史上第一款AOSP开发的IDE (支持Java/Kotlin/C++/Jni/Native/Shell/Python)
    MySQL深度剖析及面试秘籍(必知必会30题全)
    pre-commit 提交检查代码 检查暂存区
    VR点亮元宇宙丨酷雷曼与你相约2022世界VR产业大会
    python 爱心代码
    计算机组成原理 | 总线
    【WSN】无线传感器网络 X-Y 坐标到图形视图和位字符串前缀嵌入方法研究(Matlab代码实现)
  • 原文地址:https://blog.csdn.net/p6448777/article/details/136518960