• Python实现Word、Excel、PPT批量转为PDF


    今天看见了一个有意思的脚本Python批量实现Word、EXCLE、PPT转PDF文件

    因为我平时word用的比较的多,所以深有体会,具体怎么实现的我们就不讨论了,因为这个去学了也没什么提升,不然也不会当作脚本了。这里我将其放入了pyzjr库中,也方便大家进行调用。

    你可以去下载pyzjr:

    pip install pyzjr -i https://pypi.tuna.tsinghua.edu.cn/simple

    调用方法:

    1. import pyzjr as pz
    2. # 实例化对象
    3. Mpdf = pz.Microsoft2PDF()
    4. # 调用类的方法
    5. Mpdf.Word2Pdf() # word -> pdf
    6. Mpdf.Excel2Pdf() # excel -> pdf
    7. Mpdf.PPt2Pdf() # ppt -> pdf
    8. Mpdf.WEP2Pdf() # word,excel,ppt -> pdf

    上面就是api的调用了,统一会将文件存放在目标文件夹下新建的名为pdf文件夹中。

    pyzjr中的源码:

    1. import win32com.client, gc, os
    2. class Microsoft2PDF():
    3. """Convert Microsoft Office documents (Word, Excel, PowerPoint) to PDF format"""
    4. def __init__(self,filePath = ""):
    5. """
    6. :param filePath: 如果默认是空字符,就默认当前路径
    7. """
    8. self.flagW = self.flagE = self.flagP = 1
    9. self.words = []
    10. self.ppts = []
    11. self.excels = []
    12. if filePath == "":
    13. filePath = os.getcwd()
    14. folder = filePath + '\\pdf\\'
    15. self.folder = CreateFolder(folder,debug=False)
    16. self.filePath = filePath
    17. for i in os.listdir(self.filePath):
    18. if i.endswith(('.doc', 'docx')):
    19. self.words.append(i)
    20. if i.endswith(('.ppt', 'pptx')):
    21. self.ppts.append(i)
    22. if i.endswith(('.xls', 'xlsx')):
    23. self.excels.append(i)
    24. if len(self.words) < 1:
    25. print("\n[pyzjr]:No Word files\n")
    26. self.flagW = 0
    27. if len(self.ppts) < 1:
    28. print("\n[pyzjr]:No PPT file\n")
    29. self.flagE = 0
    30. if len(self.excels) < 1:
    31. print("\n[pyzjr]:No Excel file\n")
    32. self.flagP = 0
    33. def Word2Pdf(self):
    34. if self.flagW == 0:
    35. return 0
    36. else:
    37. print("\n[Start Word ->PDF conversion]")
    38. try:
    39. print("Open Word Process...")
    40. word = win32com.client.Dispatch("Word.Application")
    41. word.Visible = 0
    42. word.DisplayAlerts = False
    43. doc = None
    44. for i in range(len(self.words)):
    45. print(i)
    46. fileName = self.words[i] # file name
    47. fromFile = os.path.join(self.filePath, fileName) # file address
    48. toFileName = self.changeSufix2Pdf(fileName) # Generated file name
    49. toFile = self.toFileJoin(toFileName) # Generated file address
    50. print("Conversion:" + fileName + "in files...")
    51. try:
    52. doc = word.Documents.Open(fromFile)
    53. doc.SaveAs(toFile, 17)
    54. print("Convert to:" + toFileName + "file completion")
    55. except Exception as e:
    56. print(e)
    57. print("All Word files have been printed")
    58. print("End Word Process...\n")
    59. doc.Close()
    60. doc = None
    61. word.Quit()
    62. word = None
    63. except Exception as e:
    64. print(e)
    65. finally:
    66. gc.collect()
    67. def Excel2Pdf(self):
    68. if self.flagE == 0:
    69. return 0
    70. else:
    71. print("\n[Start Excel -> PDF conversion]")
    72. try:
    73. print("open Excel Process...")
    74. excel = win32com.client.Dispatch("Excel.Application")
    75. excel.Visible = 0
    76. excel.DisplayAlerts = False
    77. wb = None
    78. ws = None
    79. for i in range(len(self.excels)):
    80. print(i)
    81. fileName = self.excels[i]
    82. fromFile = os.path.join(self.filePath, fileName)
    83. print("Conversion:" + fileName + "in files...")
    84. try:
    85. wb = excel.Workbooks.Open(fromFile)
    86. for j in range(wb.Worksheets.Count): # Number of worksheets, one workbook may have multiple worksheets
    87. toFileName = self.addWorksheetsOrder(fileName, j + 1)
    88. toFile = self.toFileJoin(toFileName)
    89. ws = wb.Worksheets(j + 1)
    90. ws.ExportAsFixedFormat(0, toFile)
    91. print("Convert to:" + toFileName + "file completion")
    92. except Exception as e:
    93. print(e)
    94. # 关闭 Excel 进程
    95. print("All Excel files have been printed")
    96. print("Ending Excel process...\n")
    97. ws = None
    98. wb.Close()
    99. wb = None
    100. excel.Quit()
    101. excel = None
    102. except Exception as e:
    103. print(e)
    104. finally:
    105. gc.collect()
    106. def PPt2Pdf(self):
    107. if self.flagP == 0:
    108. return 0
    109. else:
    110. print("\n[Start PPT ->PDF conversion]")
    111. try:
    112. print("Opening PowerPoint process...")
    113. powerpoint = win32com.client.Dispatch("PowerPoint.Application")
    114. ppt = None
    115. for i in range(len(self.ppts)):
    116. print(i)
    117. fileName = self.ppts[i]
    118. fromFile = os.path.join(self.filePath, fileName)
    119. toFileName = self.changeSufix2Pdf(fileName)
    120. toFile = self.toFileJoin(toFileName)
    121. print("Conversion:" + fileName + "in files...")
    122. try:
    123. ppt = powerpoint.Presentations.Open(fromFile, WithWindow=False)
    124. if ppt.Slides.Count > 0:
    125. ppt.SaveAs(toFile, 32)
    126. print("Convert to:" + toFileName + "file completion")
    127. else:
    128. print("Error, unexpected: This file is empty, skipping this file")
    129. except Exception as e:
    130. print(e)
    131. print("All PPT files have been printed")
    132. print("Ending PowerPoint process...\n")
    133. ppt.Close()
    134. ppt = None
    135. powerpoint.Quit()
    136. powerpoint = None
    137. except Exception as e:
    138. print(e)
    139. finally:
    140. gc.collect()
    141. def WEP2Pdf(self):
    142. """
    143. Word, Excel and PPt are all converted to PDF.
    144. If there are many files, it may take some time
    145. """
    146. print("Convert Microsoft Three Musketeers to PDF")
    147. self.Word2Pdf()
    148. self.Excel2Pdf()
    149. self.PPt2Pdf()
    150. print(f"All files have been converted, you can find them in the {self.folder}")
    151. def changeSufix2Pdf(self,file):
    152. """将文件后缀更改为.pdf"""
    153. return file[:file.rfind('.')] + ".pdf"
    154. def addWorksheetsOrder(self,file, i):
    155. """在文件名中添加工作表顺序"""
    156. return file[:file.rfind('.')] + "_worksheet" + str(i) + ".pdf"
    157. def toFileJoin(self, file):
    158. """将文件路径和文件名连接为完整的文件路径"""
    159. return os.path.join(self.filePath, 'pdf', file[:file.rfind('.')] + ".pdf")

     这里我对原先博主的代码进行了一定的优化,使其可供我们调用。

    这是控制台打印出来的信息,我们可以发现在调用WEP2Pdf时,如果当前文件夹中没有word的文件也能继续去转换。 

  • 相关阅读:
    Vue3表单组件el-form校验规则rules属性
    什么是ETLT?他是新一代数据集成平台?
    memcpy和memmove的模拟实现,思路详解+代码实现
    python考研志愿填报模拟系统vue
    【服务器数据恢复】raid5崩溃导致lvm信息和VXFS文件系统损坏的数据恢复案例
    图像分割数据集的相关操作(二)—— albumentations 数据增强
    人工智能、深度学习、机器学习常见面试题83~100
    Ribbon 添加右侧区域菜单项
    这应该是Linux用户与用户组最详细的知识了吧!
    系统架构设计师(第二版)学习笔记----计算机系统基础
  • 原文地址:https://blog.csdn.net/m0_62919535/article/details/132679590