Aspose.Words 是提供了专门针对文档处理的 API 工具,用于以 Word、OpenDocument、Markdown、HTML、PDF 等流行的文件格式的创建、读取、编辑、打印和保存。
Aspose.Words 除了可以转换这些流行的文档格式外,Aspose.Words 还支持使用文档对象模型 (DOM) 对任何文档元素进行渲染、打印、报告、邮件合并选项和高级格式化。
DOC, DOCX, DOT, DOTX, DOCM, DOTM, Word 6.0 or Word 95
XML, WordML, XAML, Flat OPC, Flat OPC Macro-Enabled, Flat OPC Template, Flat OPC Macro-Enabled Template
HTML, MHTML, MD
EPUB, MOBI, CHM, AZW3
SVG, TIFF, PNG, BMP, JPEG, GIF, EMF
XPS, OpenXPS
TXT
RTF
ODT, OTT
PS
PCL
jar 包引入
下载 jar 包,在 src 同级目录下新建 lib 文件夹,将下载的 jar 包拷贝进去
在 pom 文件中引入刚下载的 jar 包
com.aspose aspose-words 19.1 system ${project.basedir}/lib/aspose-words-19.1.jar
使用代码
转换操作工具类代码
- import com.aspose.words.*;
- import com.google.common.collect.ImmutableMap;
- import lombok.extern.slf4j.Slf4j;
-
- import javax.imageio.ImageIO;
- import javax.imageio.stream.ImageInputStream;
- import java.awt.image.BufferedImage;
- import java.io.*;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Map;
-
- /**
- * aspose words 操作工具类
- *
- * @author wuxianglong
- */
- @Slf4j
- public class WordUtils {
- private static final String OS_NAME_STR = "os.name";
- private static final String WINDOWS_STR = "windows";
- private static final String FORM_TEXT = "FORMTEXT";
-
- /**
- * linux系统下pdf操作需要指定字体库
- * Centos8 字体库文件目录
- */
- private static final String LINUX_FONTS_PATH = "/usr/share/fonts";
-
- public static void main(String[] args) throws Exception {
- checkLicense();
- String inPath = "C:\\Users\\username\\Desktop\\test.docx";
- String outPath = "C:\\Users\\username\\Desktop\\test.html";
- docToPdf(inPath, outPath);
- }
-
- /**
- * word转html
- *
- * @param inPath 输入文件路径
- * @param outPath 输出文件路径
- * @throws Exception 操作异常
- */
- public static void docToHtml(String inPath, String outPath) throws Exception {
- long start = System.currentTimeMillis();
- Document doc = new Document(inPath);
- HtmlSaveOptions opts = new HtmlSaveOptions(SaveFormat.HTML);
- opts.setHtmlVersion(HtmlVersion.XHTML);
- opts.setExportImagesAsBase64(true);
- opts.setExportPageMargins(true);
- opts.setExportXhtmlTransitional(true);
- opts.setExportDocumentProperties(true);
- doc.save(outPath, opts);
- log.info("WORD转HTML成功,耗时:{}", System.currentTimeMillis() - start);
- }
-
- /**
- * word转pdf
- *
- * @param inPath 输入文件路径
- * @param outPath 输出文件路径
- * @throws Exception 操作异常
- */
- public static void docToPdf(String inPath, String outPath) throws Exception {
- long start = System.currentTimeMillis();
- log.info("WORD转PDF保存路径:{}", outPath);
- FileOutputStream os = getFileOutputStream(outPath);
- Document doc = new Document(inPath);
- doc.save(os, SaveFormat.PDF);
- os.close();
- log.info("WORD转PDF成功,耗时:{}", System.currentTimeMillis() - start);
- }
-
- /**
- * word转pdf
- *
- * @param inputStream 文件输入流
- * @param outPath 输出文件路径
- * @throws Exception 操作异常
- */
- public static void docToPdf(InputStream inputStream, String outPath) throws Exception {
- long start = System.currentTimeMillis();
- FileOutputStream os = getFileOutputStream(outPath);
- Document doc = new Document(inputStream);
- doc.save(os, SaveFormat.PDF);
- os.close();
- log.info("WORD转PDF成功,耗时:{}", System.currentTimeMillis() - start);
- }
-
- /**
- * word转换为图片,每页一张图片
- *
- * @param inPath word文件路径
- * @throws Exception 操作异常
- */
- public static void docToImage(String inPath) throws Exception {
- long start = System.currentTimeMillis();
- log.info("根据WORD页数转换多张图片");
- InputStream inputStream = Files.newInputStream(Paths.get(inPath));
- File file = new File(inPath);
- String name = file.getName();
- String fileName = name.substring(0, name.lastIndexOf("."));
- // 文件父级路径
- String parent = file.getParent();
- log.info("parent:{}", parent);
- // 创建目录
- boolean mkdir = new File(parent + "/" + fileName).mkdir();
- log.info("mkdir:{}", mkdir);
- List
bufferedImages = wordToImg(inputStream); - for (int i = 0; i < bufferedImages.size(); i++) {
- // 写入文件
- ImageIO.write(bufferedImages.get(i), "png", new File(parent + "/" + fileName + "/" + "第" + i + "页" + fileName + ".png"));
- }
- inputStream.close();
- log.info("WORD转图片成功,耗时:{}", System.currentTimeMillis() - start);
- }
-
- /**
- * word转换为图片,合并为一张图片
- *
- * @param inPath word文件路径
- * @throws Exception 操作异常
- */
- public static void docToOneImage(String inPath) throws Exception {
- long start = System.currentTimeMillis();
- log.info("WORD转换为一张图片");
- InputStream inputStream = Files.newInputStream(Paths.get(inPath));
- File file = new File(inPath);
- String name = file.getName();
- String fileName = name.substring(0, name.lastIndexOf("."));
- String parent = file.getParent();
- List
bufferedImages = wordToImg(inputStream); - // 合并为一张图片
- BufferedImage image = MergeImage.mergeImage(false, bufferedImages);
- ImageIO.write(image, "png", new File(parent + "/" + fileName + ".png"));
- inputStream.close();
- log.info("WORD转图片成功,耗时:{}", System.currentTimeMillis() - start);
- }
-
- /**
- * html转word
- *
- * @param inPath 输入文件路径
- * @param outPath 输出文件路径
- * @throws Exception 操作异常
- */
- public static void htmlToWord(String inPath, String outPath) throws Exception {
- Document wordDoc = new Document(inPath);
- DocumentBuilder builder = new DocumentBuilder(wordDoc);
- for (Field field : wordDoc.getRange().getFields()) {
- if (field.getFieldCode().contains(FORM_TEXT)) {
- // 去除掉文字型窗体域
- builder.moveToField(field, true);
- builder.write(field.getResult());
- field.remove();
- }
- }
- wordDoc.save(outPath, SaveFormat.DOCX);
- }
-
- /**
- * html转word,并替换指定字段内容
- *
- * @param inPath 输入文件路径
- * @param outPath 输出文件路径
- * @throws Exception 操作异常
- */
- public static void htmlToWordAndReplaceField(String inPath, String outPath) throws Exception {
- Document wordDoc = new Document(inPath);
- Range range = wordDoc.getRange();
- // 把张三替换成李四,把20替换成40
- ImmutableMap
map = ImmutableMap.of("张三", "李四", "20", "40"); - for (Map.Entry
str : map.entrySet()) { - range.replace(str.getKey(), str.getValue(), new FindReplaceOptions());
- }
- wordDoc.save(outPath, SaveFormat.DOCX);
- }
-
- /**
- * word转pdf,linux下设置字体库文件路径,并返回FileOutputStream
- *
- * @param outPath pdf输出路径
- * @return pdf输出路径 -> FileOutputStream
- * @throws FileNotFoundException FileNotFoundException
- */
- private static FileOutputStream getFileOutputStream(String outPath) throws FileNotFoundException {
- if (!System.getProperty(OS_NAME_STR).toLowerCase().startsWith(WINDOWS_STR)) {
- // linux 需要配置字体库
- log.info("【WordUtils -> docToPdf】linux字体库文件路径:{}", LINUX_FONTS_PATH);
- FontSettings.getDefaultInstance().setFontsFolder(LINUX_FONTS_PATH, false);
- }
- return new FileOutputStream(outPath);
- }
-
- /**
- * word转图片
- *
- * @param inputStream word input stream
- * @return BufferedImage list
- * @throws Exception exception
- */
- private static List
wordToImg(InputStream inputStream) throws Exception { - Document doc = new Document(inputStream);
- ImageSaveOptions options = new ImageSaveOptions(SaveFormat.PNG);
- options.setPrettyFormat(true);
- options.setUseAntiAliasing(true);
- options.setUseHighQualityRendering(true);
- int pageCount = doc.getPageCount();
- List
imageList = new ArrayList<>(); - for (int i = 0; i < pageCount; i++) {
- OutputStream output = new ByteArrayOutputStream();
- options.setPageIndex(i);
- doc.save(output, options);
- ImageInputStream imageInputStream = ImageIO.createImageInputStream(parse(output));
- imageList.add(ImageIO.read(imageInputStream));
- }
- return imageList;
- }
-
- /**
- * outputStream转inputStream
- *
- * @param out OutputStream
- * @return inputStream
- */
- private static ByteArrayInputStream parse(OutputStream out) {
- return new ByteArrayInputStream(((ByteArrayOutputStream) out).toByteArray());
- }
-
- /**
- * 校验许可文件
- */
- private static void checkLicense() {
- try {
- InputStream is = com.aspose.words.Document.class.getResourceAsStream("/com.aspose.words.lic_2999.xml");
- if (is == null) {
- return;
- }
- License asposeLicense = new License();
- asposeLicense.setLicense(is);
- is.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- }
图片合并工具类
- import java.awt.image.BufferedImage;
- import java.util.List;
-
- /**
- * 图片合并工具
- *
- * @author wuxianglong
- */
- public class MergeImage {
-
- /**
- * 合并任数量的图片成一张图片
- *
- * @param isHorizontal true代表水平合并,false代表垂直合并
- * @param images 待合并的图片数组
- * @return BufferedImage
- */
- public static BufferedImage mergeImage(boolean isHorizontal, List
images) { - // 生成新图片
- BufferedImage destImage;
- // 计算新图片的长和高
- int allWidth = 0, allHeight = 0, allWidthMax = 0, allHeightMax = 0;
- // 获取总长、总宽、最长、最宽
- for (int i = 0; i < images.size(); i++) {
- BufferedImage img = images.get(i);
- allWidth += img.getWidth();
- if (images.size() != i + 1) {
- allHeight += img.getHeight() + 2;
- } else {
- allHeight += img.getHeight();
- }
- if (img.getWidth() > allWidthMax) {
- allWidthMax = img.getWidth();
- }
- if (img.getHeight() > allHeightMax) {
- allHeightMax = img.getHeight();
- }
- }
- // 创建新图片
- if (isHorizontal) {
- destImage = new BufferedImage(allWidth, allHeightMax, BufferedImage.TYPE_INT_RGB);
- } else {
- destImage = new BufferedImage(allWidthMax, allHeight, BufferedImage.TYPE_INT_RGB);
- }
- // 合并所有子图片到新图片
- int wx = 0, wy = 0;
- for (BufferedImage img : images) {
- int w1 = img.getWidth();
- int h1 = img.getHeight();
- // 从图片中读取RGB
- int[] imageArrayOne = new int[w1 * h1];
- // 逐行扫描图像中各个像素的RGB到数组中
- imageArrayOne = img.getRGB(0, 0, w1, h1, imageArrayOne, 0, w1);
- if (isHorizontal) {
- // 水平方向合并
- // 设置上半部分或左半部分的RGB
- destImage.setRGB(wx, 0, w1, h1, imageArrayOne, 0, w1);
- } else {
- // 垂直方向合并
- // 设置上半部分或左半部分的RGB
- destImage.setRGB(0, wy, w1, h1, imageArrayOne, 0, w1);
- }
- wx += w1;
- wy += h1 + 2;
- }
- return destImage;
- }
-
- }