代码环境基于python3
引用:
https://www.cnblogs.com/loveprogramme/p/11247037.html
https://blog.csdn.net/weixin_46426157/article/details/108110894
https://github.com/phasedOut/pdf2pptx
https://github.com/Derekchen147/pdf2ppt
直接上代码
下载需要的包
# vim requirements.txt
fitz
os
time
tqdm
datetime
Image
Presentation
Inches
# pip3 install -r requirements.txt -i https://pypi.douban.com/simple
如果没有pip3命令,就用pip命令。
安装fitz报错
running install
running build
running build_py
running build_ext
building 'fitz._fitz' extension
swigging fitz/fitz.i to fitz/fitz_wrap.c
swig -python -o fitz/fitz_wrap.c fitz/fitz.i
unable to execute 'swig': No such file or directory
error: command 'swig' failed with exit status 1
----------------------------------------
Command "/usr/bin/python3 -u -c "import setuptools, tokenize;__file__='/tmp/pip-install-ijzpcyaz/PyMuPDF/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record /tmp/pip-record-atf0rquy/install-record.txt --single-version-externally-managed --compile" failed with error code 1 in /tmp/pip-install-ijzpcyaz/PyMuPDF/
------
# 安装swig 然后重试
yum install swig
或
apt install swig
------
Failed building wheel for scipy
Running setup.py clean for scipy
Complete output from command /usr/bin/python3 -u -c "import setuptools, tokenize;__file__='/tmp/pip-install-pykkl320/scipy/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" clean --all:
`setup.py clean` is not supported, use one of the following instead:
- `git clean -xdf` (cleans all files)
- `git clean -Xdf` (cleans all versioned files, doesn't touch
files that aren't checked into the git repo)
Add `--force` to your command to use it anyway if you must (unsupported).
# 升级pip wheel setuptools
pip3 install --upgrade pip setuptools wheel -i https://pypi.douban.com/simple
安装完后,重试
安装panda报错
ERROR: Cannot unpack file /tmp/pip-unpack-xq_hmo_x/simple.htm (downloaded from /tmp/pip-req-build-df6dsm0s, content-type: text/html); cannot detect archive format
ERROR: Cannot determine archive format of /tmp/pip-req-build-df6dsm0s
解决方法:
指定 --trusted-host pypi.tuna.tsinghua.edu.cn 然后重试
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn 模块名
安装完成后,执行下面的py程序
import fitz
import time
from tqdm import tqdm
import os
from PIL import Image
from pptx import Presentation
from pptx.util import Inches
base_path = input("请输入要转换的文件路径:")
filenames = os.listdir(base_path)
#for filename in os.listdir('source_files/'):
for filename in filenames:
# 将pdf一张张切割成jpg
timestamp = time.time()
print('切割pdf为jpg...')
# full_path = "./source_files/%s" % filename # 拼接,得到PDF文件的绝对路径
full_path = os.path.join(base_path, filename)
print(full_path)
doc = fitz.open(full_path)
rotate = int(0) # 设置图片的旋转角度,如果图片方向不对的话改这个
zoom_x = 2.0 # 设置图片相对于PDF文件在X轴上的缩放比例
zoom_y = 2.0 # 设置图片相对于PDF文件在Y轴上的缩放比例
trans = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate) #旋转图片,如果图片方向不对的话改前面rotate的旋转角度
print("%s开始转换..." % filename)
if doc.page_count > 1: # 获取PDF的页数
for pg in tqdm(range(doc.page_count)):
page = doc[pg] # 获得第pg页
pm = page.get_pixmap(matrix=trans, alpha=False) # 将其转化为光栅文件(位数)
new_full_name = filename.split(".")[0] # 保证输出的文件名不变
if not os.path.exists('%s/%s' % (base_path, new_full_name)):
os.mkdir('%s/%s' % (base_path, new_full_name))
pm.save("%s/%s/%s-%s.jpg" % (base_path, new_full_name, new_full_name, pg)) # 将其输入为相应的图片格式,可以为位图,也可以为矢量图
# 我本来想输出为jpg文件,但是在网页中都是png格式(即调用writePNG),再转换成别的图像文件前,最好查一下是否支持
else:
page = doc[0]
pm = page.get_pixmap(matrix=trans, alpha=False)
new_full_name = full_path.split(".")[0]
if not os.path.exists('/%s/%s' % (base_path, new_full_name)):
os.mkdir('%s/%s' % (base_path, new_full_name))
pm.save("%s/%s/%s-%s.jpg" % (base_path, new_full_name, new_full_name, pg))
# pm.save("%s.jpg" % new_full_name)
print("%s转换jpg完成!" % filename)
print('耗时:', time.time() - timestamp, 's')
print('\n\n')
# 将分割好的jpg图片整合到ppt
timestamp = time.time()
print('整合jpg为ppt...')
# 将filename改成不带格式名字的str,如file.pdf则filename = file
filename = filename.split(".")[0]
# jpg保存的地址
jpg_path = '%s/%s' % (base_path, filename)
pages = os.listdir(jpg_path)
prs = Presentation()
# 在这里修改ppt长宽格式。默认是16*9
prs.slide_width = Inches(16)
prs.slide_height = Inches(9)
for index, page in enumerate(tqdm(pages)):
# 得到单张jpg图片的地址
jpg_file = "%s/%s/%s-%d.jpg" % (base_path, filename, filename, index)
# 获取jpg文件的长宽
image = Image.open(jpg_file)
height = image.height
width = image.width
# #Rotate 270 degrees if horizontal
# if height > width:
# adjusted = image.rotate(270, expand=True)
# adjusted.save(jpg_file)
# 设置ppt的slide
title_slide_layout = prs.slide_layouts[0]
slide = prs.slides.add_slide(title_slide_layout)
# 将图片插入slide中。
# left和top表示图片与slide边框的距离,默认为0
# height=prs.slide_height, width=prs.slide_width调整图片大小,默认塞满整个slide
left = top = 0
slide.shapes.add_picture(jpg_file, left, top, height=prs.slide_height, width=prs.slide_width)
prs.save('%s/%s.pptx' % (base_path, filename))
print("成功保存ppt文件 %s.pptx", filename)
print('耗时:', time.time() - timestamp, 's')
print('\n\n')
** 使用方法**
输入的方式为请输入要转换的文件路径:/home/zclinux/Desktop/pdf2ppt 如果路径最后多添加一条/则会报错。