已经获取到了大量的pdf在download文件夹中,但是我需要的是txt文件和word文件~
pip install pdf2docx pdfminer.six
# pip install pdf2docx pdfminer.six
import os
from pdf2docx import Converter
from pdfminer.high_level import extract_text
# 忽略警告
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pdf2docx")
# pdf转txt
def pdf_to_txt(pdf_path, txt_path):
text = extract_text(pdf_path)
with open(txt_path, 'w', encoding='utf-8') as f:
f.write(text)
# pdf转word
def pdf_to_docx(pdf_path, docx_path):
cv = Con