• Java离线视频提取音频+音频提取文案


    需引入依赖javacv、vosk相关依赖,

    至于javacv依赖,网上有很多缩减方案,注释部分是可行的缩减方案,至于视频提取视频这里无需安装ffmpeg,只需引入依赖。而vosk需要下载模型方可使用,并且下载比较慢,可先用小模型跑通。

    1. <properties>
    2. <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
    3. <javacv.version>1.5.6javacv.version>
    4. <system.windowsx64>windows-x86_64system.windowsx64>
    5. properties>
    6. <dependencies>
    7. <dependency>
    8. <groupId>org.bytedecogroupId>
    9. <artifactId>javacv-platformartifactId>
    10. <version>1.5.10version>
    11. dependency>
    12. <dependency>
    13. <groupId>orggroupId>
    14. <artifactId>jaudiotaggerartifactId>
    15. <version>2.0.3version>
    16. dependency>
    17. <dependency>
    18. <groupId>net.java.dev.jnagroupId>
    19. <artifactId>jnaartifactId>
    20. <version>5.13.0version>
    21. dependency>
    22. <dependency>
    23. <groupId>com.alphacepheigroupId>
    24. <artifactId>voskartifactId>
    25. <version>0.3.45version>
    26. dependency>
    27. <dependency>
    28. <groupId>ws.schildgroupId>
    29. <artifactId>jave-coreartifactId>
    30. <version>3.1.1version>
    31. dependency>
    32. <dependency>
    33. <groupId>com.alibabagroupId>
    34. <artifactId>fastjsonartifactId>
    35. <version>1.2.83version>
    36. dependency>
    37. dependencies>

    视频提取音频

    1. package org.example;
    2. import org.bytedeco.ffmpeg.global.avcodec;
    3. import org.bytedeco.javacv.FFmpegFrameGrabber;
    4. import org.bytedeco.javacv.FFmpegFrameRecorder;
    5. import org.bytedeco.javacv.Frame;
    6. public class Test {
    7. public static void extractVoice(String sourceFileName, String audioUrl) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
    8. //抓取资源
    9. FFmpegFrameGrabber frameGrabber = new FFmpegFrameGrabber(sourceFileName);
    10. Frame frame = null;
    11. FFmpegFrameRecorder recorder = null;
    12. frameGrabber.start();
    13. //转录为单轨, 16K采样率, wav格式
    14. recorder = new FFmpegFrameRecorder(audioUrl, frameGrabber.getAudioChannels());
    15. recorder.setFormat(frameGrabber.getFormat());
    16. recorder.setSampleRate(frameGrabber.getSampleRate());//frameGrabber.getSampleRate()
    17. //recorder.setAudioBitrate(128000);// 音频比特率
    18. recorder.setTimestamp(frameGrabber.getTimestamp());
    19. recorder.setVideoCodec(avcodec.AV_CODEC_ID_NONE); // 不录制视频
    20. recorder.start();
    21. int index = 0;
    22. while (true) {
    23. frame = frameGrabber.grabSamples();
    24. if (frame == null) break;
    25. if (frame.samples != null) {
    26. recorder.recordSamples(frame.sampleRate, frame.audioChannels, frame.samples);
    27. recorder.setTimestamp(frameGrabber.getTimestamp());
    28. }
    29. index++;
    30. }
    31. recorder.stop();
    32. recorder.release();
    33. frameGrabber.stop();
    34. frameGrabber.release();
    35. }
    36. public static void main(String[] args) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
    37. String videoFilePath = "I:\\workspace\\test.mp4"; // 视频文件路径
    38. String audioOutputPath = "I:\\workspace\\test_audio.wav"; // 输出的音频文件路径
    39. long s = System.currentTimeMillis();
    40. extractVoice(videoFilePath, audioOutputPath);
    41. System.out.println(System.currentTimeMillis() - s);
    42. }
    43. }

    音频提取文字

    至于model可去此网站下载,解压使用。大模型下载较慢

    VOSK Models

    1. package org.example;
    2. import com.alibaba.fastjson.JSON;
    3. import org.vosk.LibVosk;
    4. import org.vosk.LogLevel;
    5. import org.vosk.Model;
    6. import org.vosk.Recognizer;
    7. import javax.sound.sampled.*;
    8. import java.io.*;
    9. import java.util.Optional;
    10. public class Test3 {
    11. public static void main(String[] args) {
    12. StringBuilder result = new StringBuilder();
    13. LibVosk.setLogLevel(LogLevel.DEBUG);
    14. AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 44100, 16, 2, 4, 44100, false);
    15. DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    16. TargetDataLine microphone;
    17. SourceDataLine speakers;
    18. try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
    19. InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
    20. Recognizer recognizer = new Recognizer(model, 120000)) {
    21. try {
    22. microphone = (TargetDataLine) AudioSystem.getLine(info);
    23. microphone.open(format);
    24. microphone.start();
    25. ByteArrayOutputStream out = new ByteArrayOutputStream();
    26. int numBytesRead;
    27. int CHUNK_SIZE = 1024;
    28. int bytesRead = 0;
    29. DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
    30. speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
    31. speakers.open(format);
    32. speakers.start();
    33. byte[] b = new byte[4096];
    34. while (bytesRead <= 100000000) {
    35. byte[] audioData = new byte[CHUNK_SIZE];
    36. numBytesRead = ais.read(audioData, 0, CHUNK_SIZE);
    37. bytesRead += numBytesRead;
    38. out.write(audioData, 0, numBytesRead);
    39. speakers.write(audioData, 0, numBytesRead);
    40. if (recognizer.acceptWaveForm(audioData, numBytesRead)) {
    41. result.append(getResult(recognizer.getResult()));
    42. } else {
    43. result.append(getResult(recognizer.getPartialResult()));
    44. }
    45. }
    46. result.append(getResult(recognizer.getFinalResult()));
    47. speakers.drain();
    48. speakers.close();
    49. microphone.close();
    50. } catch (Exception e) {
    51. e.printStackTrace();
    52. }
    53. System.out.println(result.toString());
    54. } catch (IOException e) {
    55. throw new RuntimeException(e);
    56. } catch (UnsupportedAudioFileException e) {
    57. throw new RuntimeException(e);
    58. }
    59. }
    60. /**
    61. * 获取返回结果
    62. *
    63. * @param result
    64. * @return
    65. */
    66. private static String getResult(String result) {
    67. VoskResult vr = JSON.parseObject(result,VoskResult.class);
    68. return Optional.ofNullable(vr).map(VoskResult::getText).orElse("");
    69. }
    70. public static void main1(String[] argv) throws IOException, UnsupportedAudioFileException {
    71. LibVosk.setLogLevel(LogLevel.DEBUG);
    72. StringBuilder result = new StringBuilder();
    73. try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
    74. InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
    75. Recognizer recognizer = new Recognizer(model, 120000)) {
    76. int nbytes;
    77. byte[] b = new byte[4096];
    78. while ((nbytes = ais.read(b)) >= 0) {
    79. if (recognizer.acceptWaveForm(b, nbytes)) {
    80. result.append(getResult(recognizer.getResult()));
    81. } else {
    82. result.append(getResult(recognizer.getPartialResult()));
    83. }
    84. }
    85. result.append(getResult(recognizer.getFinalResult()));
    86. }
    87. System.out.println(result);
    88. }
    89. }

    感谢网上各位大佬能分享这些信息

    测试可行,识别率没有做过对比、大模型也没有试过。这里也就提供一种可行的离线解决方案。

  • 相关阅读:
    新型超导Fluxonium量子比特正加速量子计算机的创建
    知乎问题:如何说服技术老大用 Redis ?
    Unity3D学习笔记11——后处理
    Vue + Element-UI —— 项目实战(八)(完结)
    springcloud+nacos+dubbo服务器部署问题
    数据结构---课后习题(第一章)
    为什么Transformer模型中使用Layer Normalization(Layer Norm)而不是Batch Normalization(BN)
    利用python找出偏序集中极大元、极小元、最大元和最小元
    QGIS下载在线地图(Google 卫星、esri 卫星)
    应用服务器部署:安装Docker及摘取镜像
  • 原文地址:https://blog.csdn.net/z275598733/article/details/138043006