至于javacv依赖,网上有很多缩减方案,注释部分是可行的缩减方案,至于视频提取视频这里无需安装ffmpeg,只需引入依赖。而vosk需要下载模型方可使用,并且下载比较慢,可先用小模型跑通。
-
-
- <properties>
- <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
- <javacv.version>1.5.6javacv.version>
- <system.windowsx64>windows-x86_64system.windowsx64>
- properties>
-
-
- <dependencies>
-
- <dependency>
- <groupId>org.bytedecogroupId>
- <artifactId>javacv-platformartifactId>
- <version>1.5.10version>
- dependency>
-
-
- <dependency>
- <groupId>orggroupId>
- <artifactId>jaudiotaggerartifactId>
- <version>2.0.3version>
- dependency>
- <dependency>
- <groupId>net.java.dev.jnagroupId>
- <artifactId>jnaartifactId>
- <version>5.13.0version>
- dependency>
- <dependency>
- <groupId>com.alphacepheigroupId>
- <artifactId>voskartifactId>
- <version>0.3.45version>
- dependency>
-
- <dependency>
- <groupId>ws.schildgroupId>
- <artifactId>jave-coreartifactId>
- <version>3.1.1version>
- dependency>
-
- <dependency>
- <groupId>com.alibabagroupId>
- <artifactId>fastjsonartifactId>
- <version>1.2.83version>
- dependency>
- dependencies>
- package org.example;
-
- import org.bytedeco.ffmpeg.global.avcodec;
- import org.bytedeco.javacv.FFmpegFrameGrabber;
- import org.bytedeco.javacv.FFmpegFrameRecorder;
- import org.bytedeco.javacv.Frame;
-
-
- public class Test {
-
- public static void extractVoice(String sourceFileName, String audioUrl) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
- //抓取资源
- FFmpegFrameGrabber frameGrabber = new FFmpegFrameGrabber(sourceFileName);
- Frame frame = null;
- FFmpegFrameRecorder recorder = null;
- frameGrabber.start();
- //转录为单轨, 16K采样率, wav格式
- recorder = new FFmpegFrameRecorder(audioUrl, frameGrabber.getAudioChannels());
- recorder.setFormat(frameGrabber.getFormat());
- recorder.setSampleRate(frameGrabber.getSampleRate());//frameGrabber.getSampleRate()
- //recorder.setAudioBitrate(128000);// 音频比特率
- recorder.setTimestamp(frameGrabber.getTimestamp());
- recorder.setVideoCodec(avcodec.AV_CODEC_ID_NONE); // 不录制视频
-
- recorder.start();
- int index = 0;
- while (true) {
- frame = frameGrabber.grabSamples();
- if (frame == null) break;
- if (frame.samples != null) {
- recorder.recordSamples(frame.sampleRate, frame.audioChannels, frame.samples);
- recorder.setTimestamp(frameGrabber.getTimestamp());
- }
- index++;
- }
- recorder.stop();
- recorder.release();
- frameGrabber.stop();
- frameGrabber.release();
- }
-
- public static void main(String[] args) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
- String videoFilePath = "I:\\workspace\\test.mp4"; // 视频文件路径
- String audioOutputPath = "I:\\workspace\\test_audio.wav"; // 输出的音频文件路径
- long s = System.currentTimeMillis();
- extractVoice(videoFilePath, audioOutputPath);
- System.out.println(System.currentTimeMillis() - s);
- }
-
- }
至于model可去此网站下载,解压使用。大模型下载较慢
- package org.example;
-
- import com.alibaba.fastjson.JSON;
- import org.vosk.LibVosk;
- import org.vosk.LogLevel;
- import org.vosk.Model;
- import org.vosk.Recognizer;
-
- import javax.sound.sampled.*;
- import java.io.*;
- import java.util.Optional;
-
- public class Test3 {
- public static void main(String[] args) {
- StringBuilder result = new StringBuilder();
- LibVosk.setLogLevel(LogLevel.DEBUG);
-
- AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 44100, 16, 2, 4, 44100, false);
- DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
- TargetDataLine microphone;
- SourceDataLine speakers;
-
- try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
- InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
- Recognizer recognizer = new Recognizer(model, 120000)) {
- try {
-
- microphone = (TargetDataLine) AudioSystem.getLine(info);
- microphone.open(format);
- microphone.start();
-
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- int numBytesRead;
- int CHUNK_SIZE = 1024;
- int bytesRead = 0;
-
- DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
- speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
- speakers.open(format);
- speakers.start();
- byte[] b = new byte[4096];
-
- while (bytesRead <= 100000000) {
- byte[] audioData = new byte[CHUNK_SIZE];
- numBytesRead = ais.read(audioData, 0, CHUNK_SIZE);
- bytesRead += numBytesRead;
-
- out.write(audioData, 0, numBytesRead);
-
- speakers.write(audioData, 0, numBytesRead);
-
- if (recognizer.acceptWaveForm(audioData, numBytesRead)) {
- result.append(getResult(recognizer.getResult()));
- } else {
- result.append(getResult(recognizer.getPartialResult()));
- }
- }
- result.append(getResult(recognizer.getFinalResult()));
- speakers.drain();
- speakers.close();
- microphone.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- System.out.println(result.toString());
- } catch (IOException e) {
- throw new RuntimeException(e);
- } catch (UnsupportedAudioFileException e) {
- throw new RuntimeException(e);
- }
- }
-
- /**
- * 获取返回结果
- *
- * @param result
- * @return
- */
- private static String getResult(String result) {
- VoskResult vr = JSON.parseObject(result,VoskResult.class);
- return Optional.ofNullable(vr).map(VoskResult::getText).orElse("");
-
- }
-
- public static void main1(String[] argv) throws IOException, UnsupportedAudioFileException {
- LibVosk.setLogLevel(LogLevel.DEBUG);
- StringBuilder result = new StringBuilder();
- try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
- InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
- Recognizer recognizer = new Recognizer(model, 120000)) {
-
- int nbytes;
- byte[] b = new byte[4096];
- while ((nbytes = ais.read(b)) >= 0) {
- if (recognizer.acceptWaveForm(b, nbytes)) {
- result.append(getResult(recognizer.getResult()));
- } else {
- result.append(getResult(recognizer.getPartialResult()));
- }
- }
- result.append(getResult(recognizer.getFinalResult()));
- }
- System.out.println(result);
- }
- }
测试可行,识别率没有做过对比、大模型也没有试过。这里也就提供一种可行的离线解决方案。