• JAVA麦克风实时录音调用听写并存储音频到本地


    一、麦克风传可以实现对着电脑说话转成文字

    语音听写是一种将说话内容转换为可读文字的技术,广泛应用于各种语音输入和语音操控的场景中。随着人工智能和语音识别技术的发展,语音听写服务提供了高识别率和高准确率,为用户带来便捷的交互体验。语音听写在各行各业都有广泛应用,从聊天输入到游戏娱乐,再到人机交互,它使人们的生活更加便利和高效。本技术不仅可以实现实时送流给听写能力,而且还能把录音的音频文件存储到本地,方便验证产生的音频格式是否正确、设备录音效果是否符合能力要求等等。

    二、具体实现录音的代码

    1. package day;
    2. import com.google.gson.Gson;
    3. import com.google.gson.JsonObject;
    4. import day.constants.Constants;
    5. import okhttp3.*;
    6. import javax.crypto.Mac;
    7. import javax.crypto.spec.SecretKeySpec;
    8. import javax.sound.sampled.AudioInputStream;
    9. import java.io.FileNotFoundException;
    10. import java.io.FileOutputStream;
    11. import java.io.IOException;
    12. import java.io.OutputStream;
    13. import java.net.URL;
    14. import java.nio.charset.Charset;
    15. import java.text.SimpleDateFormat;
    16. import java.util.*;
    17. // 麦克风传流听写
    18. public class IatMic extends WebSocketListener {
    19. private static final String hostUrl = "https://iat-api.xfyun.cn/v2/iat"; //中英文,http url 不支持解析 ws/wss schema
    20. // private static final String hostUrl = "https://iat-niche-api.xfyun.cn/v2/iat";//小语种
    21. private static final String appid = ""; //在控制台-我的应用获取
    22. private static final String apiSecret = ""; //在控制台-我的应用-语音听写(流式版)获取
    23. private static final String apiKey = ""; //在控制台-我的应用-语音听写(流式版)获取
    24. //private static final String file = "./zMusic/pcm/科大讯飞.pcm"; // 中文
    25. public static final int StatusFirstFrame = 0;
    26. public static final int StatusContinueFrame = 1;
    27. public static final int StatusLastFrame = 2;
    28. public static final Gson json = new Gson();
    29. Decoder decoder = new Decoder();
    30. // 开始时间
    31. private static Date dateBegin = new Date();
    32. // 结束时间
    33. private static Date dateEnd = new Date();
    34. private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");
    35. static int status = 0; // 音频的状态
    36. public static boolean IAT_FLAG = true;
    37. public static int len;
    38. public static byte[] audioDataByteArray;
    39. public static final String RECORD_FILE_PATH = "src/main/resources/record/" + System.currentTimeMillis() + ".pcm";
    40. public static final OutputStream outputStream; // 录音文件写出
    41. static {
    42. try {
    43. outputStream = new FileOutputStream(RECORD_FILE_PATH);
    44. } catch (FileNotFoundException e) {
    45. throw new RuntimeException(e);
    46. }
    47. }
    48. public static void main(String[] args) throws Exception {
    49. // 用线程方式启动听写
    50. Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
    51. Constants.IVW_ASR_TARGET_DATA_LINE.start();
    52. MyThread myThread = new MyThread();
    53. myThread.start();
    54. }
    55. static class MyThread extends Thread {
    56. public void run() {
    57. // 需要初始化的参数都在这里添加
    58. IatMic.IAT_FLAG = true;
    59. status = 0;
    60. // 结束初始化
    61. IatMic iatMic = new IatMic();
    62. // 构建鉴权url
    63. String authUrl = null;
    64. try {
    65. authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);
    66. } catch (Exception e) {
    67. throw new RuntimeException(e);
    68. }
    69. OkHttpClient client = new OkHttpClient.Builder().build();
    70. //将url中的 schema http://和https://分别替换为ws:// 和 wss://
    71. String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
    72. // System.err.println(url);
    73. Request request = new Request.Builder().url(url).build();
    74. WebSocket webSocket = client.newWebSocket(request, iatMic);
    75. }
    76. }
    77. @Override
    78. public void onOpen(WebSocket webSocket, Response response) {
    79. // System.out.println("建立连接成功");
    80. System.out.println(Constants.YELLOW_BACKGROUND + "机器人正在听,您请说:" + Constants.RESET);
    81. super.onOpen(webSocket, response);
    82. new Thread(() -> {
    83. //连接成功,开始发送数据
    84. // int interval = 200;
    85. try {
    86. /* Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
    87. Constants.IVW_ASR_TARGET_DATA_LINE.start();*/
    88. while (true) {
    89. audioDataByteArray = new byte[Constants.IVW_FRAME_SIZE];
    90. len = new AudioInputStream(Constants.IVW_ASR_TARGET_DATA_LINE).read(audioDataByteArray);
    91. outputStream.write(Arrays.copyOf(audioDataByteArray, len));
    92. outputStream.flush();
    93. // System.err.println(AIMain.len + "" + AIMain.audioDataByteArray);
    94. if (len == -1) {
    95. status = 2;// 标志读取完毕
    96. }
    97. switch (status) {
    98. case StatusFirstFrame: // 第一帧音频status = 0
    99. JsonObject frame = new JsonObject();
    100. JsonObject business = new JsonObject(); //第一帧必须发送
    101. JsonObject common = new JsonObject(); //第一帧必须发送
    102. JsonObject data = new JsonObject(); //每一帧都要发送
    103. // 填充common
    104. common.addProperty("app_id", appid);
    105. //填充business
    106. business.addProperty("language", "zh_cn");//
    107. //business.addProperty("language", "en_us");//英文
    108. //business.addProperty("language", "ja_jp");//日语,在控制台可添加试用或购买
    109. //business.addProperty("language", "ko_kr");//韩语,在控制台可添加试用或购买
    110. //business.addProperty("language", "ru-ru");//俄语,在控制台可添加试用或购买
    111. //business.addProperty("ptt", 1);
    112. business.addProperty("domain", "iat");
    113. //mandarin中文普通话 广东话cantonese
    114. business.addProperty("accent", "mandarin");//中文方言请在控制台添加试用,添加后即展示相应参数值cantonese//mandarin
    115. //business.addProperty("nunum", 0);
    116. //business.addProperty("ptt", 1);//标点符号
    117. //business.addProperty("rlang", "zh-hk"); // zh-cn :简体中文(默认值)zh-hk :繁体香港(若未授权不生效,在控制台可免费开通)
    118. business.addProperty("vinfo", 1);
    119. business.addProperty("dwa", "wpgs");//动态修正(若未授权不生效,在控制台可免费开通)
    120. business.addProperty("vad_eos", 3000);
    121. //business.addProperty("fa_nbest", true);
    122. //business.addProperty("fa_sch", true);
    123. //business.addProperty("vinfo", 1);
    124. //business.addProperty("speex_size", 70);
    125. //business.addProperty("nbest", 5);// 句子多候选(若未授权不生效,在控制台可免费开通)
    126. //business.addProperty("wbest", 3);// 词级多候选(若未授权不生效,在控制台可免费开通)
    127. //填充data
    128. data.addProperty("status", StatusFirstFrame);
    129. data.addProperty("format", "audio/L16;rate=16000");
    130. //data.addProperty("encoding", "speex-wb");
    131. data.addProperty("encoding", "raw");
    132. data.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len)));
    133. //填充frame
    134. frame.add("common", common);
    135. frame.add("business", business);
    136. frame.add("data", data);
    137. // System.out.println("即将发送第一帧数据...");
    138. // System.err.println(frame.toString());
    139. webSocket.send(frame.toString());
    140. status = StatusContinueFrame; // 发送完第一帧改变status 为 1
    141. break;
    142. case StatusContinueFrame: //中间帧status = 1
    143. JsonObject frame1 = new JsonObject();
    144. JsonObject data1 = new JsonObject();
    145. data1.addProperty("status", StatusContinueFrame);
    146. data1.addProperty("format", "audio/L16;rate=16000");
    147. //data1.addProperty("encoding", "speex-wb");
    148. data1.addProperty("encoding", "raw");
    149. String temp = Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len));
    150. data1.addProperty("audio", temp);
    151. frame1.add("data", data1);
    152. //System.out.println(temp);
    153. webSocket.send(frame1.toString());
    154. break;
    155. }
    156. try {
    157. Thread.sleep(200);
    158. if (!IAT_FLAG) {
    159. //System.out.println("本次会话结束");
    160. break;
    161. }
    162. } catch (Exception e) {
    163. e.printStackTrace();
    164. }
    165. }
    166. //说明读完了
    167. status = StatusLastFrame;
    168. JsonObject frame2 = new JsonObject();
    169. JsonObject data2 = new JsonObject();
    170. data2.addProperty("status", StatusLastFrame);
    171. data2.addProperty("audio", "");
    172. data2.addProperty("format", "audio/L16;rate=16000");
    173. //data2.addProperty("encoding", "speex-wb");
    174. data2.addProperty("encoding", "raw");
    175. frame2.add("data", data2);
    176. webSocket.send(frame2.toString());
    177. // System.err.println(frame2.toString());
    178. // System.out.println("all data is send");
    179. } catch (Exception e) {
    180. // TODO Auto-generated catch block
    181. e.printStackTrace();
    182. }
    183. }).start();
    184. }
    185. @Override
    186. public void onMessage(WebSocket webSocket, String text) {
    187. // System.out.println(text);
    188. super.onMessage(webSocket, text);
    189. ResponseData resp = json.fromJson(text, ResponseData.class);
    190. if (resp != null) {
    191. if (resp.getCode() != 0) {
    192. System.out.println("code=>" + resp.getCode() + " error=>" + resp.getMessage() + " sid=" + resp.getSid());
    193. System.out.println("错误码查询链接:https://www.xfyun.cn/document/error-code");
    194. return;
    195. }
    196. if (resp.getData() != null) {
    197. if (resp.getData().getResult() != null) {
    198. Text te = resp.getData().getResult().getText();
    199. //System.out.println(te.toString());
    200. try {
    201. decoder.decode(te);
    202. dateEnd = new Date();
    203. // System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
    204. System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别中:" + decoder.toString() + Constants.RESET);
    205. //System.err.println("中间识别JSON结果 ----" + text);
    206. } catch (Exception e) {
    207. e.printStackTrace();
    208. }
    209. }
    210. if (resp.getData().getStatus() == 2) {
    211. // todo resp.data.status ==2 说明数据全部返回完毕,可以关闭连接,释放资源
    212. //System.err.println("我的getStatus() == 2");
    213. // System.out.println("session end ");
    214. dateEnd = new Date();
    215. // System.out.println(sdf.format(dateBegin) + "开始");
    216. // System.out.println(sdf.format(dateEnd) + "结束");
    217. // System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
    218. System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别最终结果:" + decoder.toString() + Constants.RESET);
    219. // System.out.println("本次识别sid ==》" + resp.getSid());
    220. decoder.discard();
    221. webSocket.close(1000, "");
    222. IatMic.IAT_FLAG = false;
    223. // System.exit(0);
    224. } else {
    225. // todo 根据返回的数据处理
    226. }
    227. }
    228. }
    229. }
    230. @Override
    231. public void onFailure(WebSocket webSocket, Throwable t, Response response) {
    232. super.onFailure(webSocket, t, response);
    233. try {
    234. if (null != response) {
    235. int code = response.code();
    236. System.out.println("onFailure code:" + code);
    237. System.out.println("onFailure body:" + response.body().string());
    238. if (101 != code) {
    239. System.out.println("connection failed");
    240. System.exit(0);
    241. }
    242. }
    243. } catch (IOException e) {
    244. // TODO Auto-generated catch block
    245. e.printStackTrace();
    246. }
    247. }
    248. public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
    249. URL url = new URL(hostUrl);
    250. SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
    251. format.setTimeZone(TimeZone.getTimeZone("GMT"));
    252. String date = format.format(new Date());
    253. //String date = format.format(new Date());
    254. //System.err.println(date);
    255. StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").//
    256. append("date: ").append(date).append("\n").//
    257. append("GET ").append(url.getPath()).append(" HTTP/1.1");
    258. //System.err.println(builder);
    259. Charset charset = Charset.forName("UTF-8");
    260. Mac mac = Mac.getInstance("hmacsha256");
    261. SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");
    262. mac.init(spec);
    263. byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));
    264. String sha = Base64.getEncoder().encodeToString(hexDigits);
    265. //System.err.println(sha);
    266. String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
    267. //System.err.println(authorization);
    268. HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().//
    269. addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(charset))).//
    270. addQueryParameter("date", date).//
    271. addQueryParameter("host", url.getHost()).//
    272. build();
    273. return httpUrl.toString();
    274. }
    275. public static class ResponseData {
    276. private int code;
    277. private String message;
    278. private String sid;
    279. private Data data;
    280. public int getCode() {
    281. return code;
    282. }
    283. public String getMessage() {
    284. return this.message;
    285. }
    286. public String getSid() {
    287. return sid;
    288. }
    289. public Data getData() {
    290. return data;
    291. }
    292. }
    293. public static class Data {
    294. private int status;
    295. private Result result;
    296. public int getStatus() {
    297. return status;
    298. }
    299. public Result getResult() {
    300. return result;
    301. }
    302. }
    303. public static class Result {
    304. int bg;
    305. int ed;
    306. String pgs;
    307. int[] rg;
    308. int sn;
    309. Ws[] ws;
    310. boolean ls;
    311. JsonObject vad;
    312. public Text getText() {
    313. Text text = new Text();
    314. StringBuilder sb = new StringBuilder();
    315. for (Ws ws : this.ws) {
    316. sb.append(ws.cw[0].w);
    317. }
    318. text.sn = this.sn;
    319. text.text = sb.toString();
    320. text.sn = this.sn;
    321. text.rg = this.rg;
    322. text.pgs = this.pgs;
    323. text.bg = this.bg;
    324. text.ed = this.ed;
    325. text.ls = this.ls;
    326. text.vad = this.vad == null ? null : this.vad;
    327. return text;
    328. }
    329. }
    330. public static class Ws {
    331. Cw[] cw;
    332. int bg;
    333. int ed;
    334. }
    335. public static class Cw {
    336. int sc;
    337. String w;
    338. }
    339. public static class Text {
    340. int sn;
    341. int bg;
    342. int ed;
    343. String text;
    344. String pgs;
    345. int[] rg;
    346. boolean deleted;
    347. boolean ls;
    348. JsonObject vad;
    349. @Override
    350. public String toString() {
    351. return "Text{" + "bg=" + bg + ", ed=" + ed + ", ls=" + ls + ", sn=" + sn + ", text='" + text + '\'' + ", pgs=" + pgs + ", rg=" + Arrays.toString(rg) + ", deleted=" + deleted + ", vad=" + (vad == null ? "null" : vad.getAsJsonArray("ws").toString()) + '}';
    352. }
    353. }
    354. //解析返回数据,仅供参考
    355. public static class Decoder {
    356. private Text[] texts;
    357. private int defc = 10;
    358. public Decoder() {
    359. this.texts = new Text[this.defc];
    360. }
    361. public synchronized void decode(Text text) {
    362. if (text.sn >= this.defc) {
    363. this.resize();
    364. }
    365. if ("rpl".equals(text.pgs)) {
    366. for (int i = text.rg[0]; i <= text.rg[1]; i++) {
    367. this.texts[i].deleted = true;
    368. }
    369. }
    370. this.texts[text.sn] = text;
    371. }
    372. public String toString() {
    373. StringBuilder sb = new StringBuilder();
    374. for (Text t : this.texts) {
    375. if (t != null && !t.deleted) {
    376. sb.append(t.text);
    377. }
    378. }
    379. return sb.toString();
    380. }
    381. public void resize() {
    382. int oc = this.defc;
    383. this.defc <<= 1;
    384. Text[] old = this.texts;
    385. this.texts = new Text[this.defc];
    386. for (int i = 0; i < oc; i++) {
    387. this.texts[i] = old[i];
    388. }
    389. }
    390. public void discard() {
    391. for (int i = 0; i < this.texts.length; i++) {
    392. this.texts[i] = null;
    393. }
    394. }
    395. }
    396. }

  • 相关阅读:
    【yolov7系列三】实战从0构建训练自己的数据集
    P2432 zxbsmk爱查错,字符串线性dp
    01 初识FPGA
    C语言学习-数组应用-三子棋(4.1)
    猿创征文|【Maven】分模块开发、依赖管理、聚合、继承、属性
    2022年6月21日cloudflare 故障宕机事件记录 恭喜CF凭借一己之力,拉闸全球50%网络分发
    解密 JavaScript 中的 this:作用、行为和陷阱
    JuiceFS 在多云存储架构中的应用 | 深势科技分享
    [面试篇]Mysql 索引 BTree 与 B+Tree 的区别
    C++ Reference: Standard C++ Library reference: C Library: cmath: atan
  • 原文地址:https://blog.csdn.net/p6448777/article/details/140094131