有次上班时小伙伴和我吐槽Java实现word转pdf太麻烦,我灵机一动Java调用python,python实现转换操作不就行了。
Java:JDK1.8
python:3.12
- import docx2pdf
- import sys
- import glob
- import os
-
-
- def w2ps(d):
- word_file = d
- pdf_file = d.replace('.docx', '.pdf').replace('.doc', '.pdf')
- docx2pdf.convert(word_file, pdf_file)
- print(f"转换完成,PDF文件已保存为:{pdf_file}")
-
-
- def each():
- # 获取当前工作目录
- current_directory = os.getcwd()
-
- # 使用glob查找所有.doc文件
- doc_files = glob.glob(os.path.join(current_directory, '*.doc'))
- doc_files1 = glob.glob(os.path.join(current_directory, '*.docx'))
-
- # 遍历文件列表并打印文件路径
- for file_path in doc_files:
- w2ps(file_path)
- for file_path in doc_files1:
- w2ps(file_path)
-
-
- if __name__ == "__main__":
- if len(sys.argv) > 1:
- w2ps(sys.argv[1])
- else:
- each()
- import java.io.IOException;
-
- public class PythonCaller {
-
- public static void main111(String[] args) {
- String pythonScriptPath = "D:\\WorkSpace\\python\\pycorrector-master\\shany\\W2P.py"; // Python脚本的路径
- String wordFilePath = "E:\\新建文件夹 (22)\\问题排查.docx"; // 要转换的Word文件的路径
-
- try {
- String command = "python " + pythonScriptPath + " \"" + wordFilePath+"\"";
- Process process = Runtime.getRuntime().exec(command);
-
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public static void main(String[] args) {
- String executablePath = "D:\\新建文件夹\\a\\W2P.exe"; // 替换为你的a.exe文件的实际路径
- String wordFilePath = "E:\\新建文件夹 (22)\\问题排查.docx"; // 替换为你的Word文件路径
-
- try {
- // 将Word文件路径作为命令行参数传递给a.exe
- String[] command = {executablePath, wordFilePath};
- Process process = Runtime.getRuntime().exec(command);
-
- // 等待进程完成
- int exitCode = process.waitFor();
- System.out.println("Process exited with code " + exitCode);
-
- } catch (IOException | InterruptedException e) {
- e.printStackTrace();
- }
- }
- }
这里的python文件后来为了防止缺少三方依赖,单独打包成exe文件。Java代码中额外加了对exe文件的调用。
顺手额外写了一个pdf转word的,功能用法基本一样
- from pdf2docx import Converter
- import sys
- import glob
- import os
-
-
- def pdf_to_word(pdf_path, word_path):
- cv = Converter(pdf_path)
- cv.convert(word_path, start=0, end=None)
- cv.close()
-
-
- def p2ws(file_path):
- pdf_file = file_path
- word_file = file_path.replace('.pdf', '.docx')
- pdf_to_word(pdf_file, word_file)
- print(f"转换完成,WORD文件已保存为:{pdf_file}")
-
-
- def each():
- # 获取当前工作目录
- current_directory = os.getcwd()
-
- # 使用glob查找所有.doc文件
- doc_files = glob.glob(os.path.join(current_directory, '*.pdf'))
-
- # 遍历文件列表并打印文件路径
- for file_path in doc_files:
- p2ws(file_path)
-
-
- if __name__ == "__main__":
- if len(sys.argv) > 1:
- p2ws(sys.argv[1])
- else:
- each()