工具代码,背景是某门课的ppt章节分类得过于详细,比如1.01,然后一份pdf文件只有几页,故借助PyPDF2进行pdf文件合并。
import os
import copy
from PyPDF2 import PdfMerger
target_path = 'D:\\study\\大四上\\生物信息\\ppt'
pdf_lst = [f for f in os.listdir(target_path) if f.endswith('.pdf')]
classified_lst = []
t = []
prefix = None
for filename in pdf_lst:
if not filename[0].isdigit():
continue
if prefix is None or filename[0] == prefix:
prefix = filename[0]
else:
prefix = filename[0]
classified_lst.append(copy.deepcopy(t)) # 深浅拷贝问题
t.clear()
t.append(filename)
classified_lst.append(t)
fclassified_lst = []
for x in classified_lst:
t = []
for y in x:
t.append(os.path.join(target_path, y))
fclassified_lst.append(t)
# 将相同的章节合并到一个pdf中
num = 1
file_merger = PdfMerger()
for chapter in fclassified_lst:
for pdf in chapter:
file_merger.append(pdf)
file_merger.write(target_path + "\\sum\\chapter" + str(num) + ".pdf")
num += 1
file_merger = PdfMerger()
print("done!")