.docx文件是一种由Microsoft Word处理的二进制文件格式,取代了早期版本的.doc格式。它包含文本、图像、表格和其他文档元素,并用于Microsoft Office Word 2007及更高版本。
.docx文件通常使用Office Open XML标准(ISO/IEC 29500)进行编码,并使用ZIP存档格式进行封装。可以使用Microsoft Word或兼容的第三方应用程序打开和编辑.docx文件。
- from docx import Document
- from docx.shared import Pt
- from docx.shared import RGBColor
- from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
-
- # 创建一个新的文档
- doc = Document()
-
- # 添加标题,level为标题等级
- doc.add_heading('Document Title', level=0)
-
- # 添加段落
- p = doc.add_paragraph('A plain paragraph having some ')
- #加粗
- p.add_run('bold').bold = True
- p.add_run(' and some ')
- #斜体
- p.add_run('italic.').italic = True
- #分页
- doc.add_page_break()
- # 添加带有样式的段落
- styled_para = doc.add_paragraph()
- styled_para.add_run('This paragraph has a style.').bold = True
- styled_para.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
-
- # 添加带有图片的段落
- doc.add_picture(r'C:\Users\yh\Pictures\rm.png', width=Pt(120))
-
- # 添加表格
- table = doc.add_table(rows=3, cols=3)
- for i in range(3):
- for j in range(3):
- table.cell(i, j).text = f'Cell {i + 1}-{j + 1}'
-
- # 添加带有颜色的文本
- pcolor=doc.add_paragraph('This text is colored.', style='IntenseQuote')
- pcolor.add_run('This text is also colored and in bold.').bold = True
- pcolor.add_run('This text is also colored and in italic.').italic = True
- pcolor.add_run('This text is also colored and underlined.').underline = True
-
- # 保存文档
- doc.save('demo.docx')


- import docx
-
-
- def getText(fileName):
- doc = docx.Document(fileName)
- TextList = []
- for paragraph in doc.paragraphs:
- TextList.append(paragraph.text)
-
- return '\n'.join(TextList)
- p1='''子曰:“学而时习之,不亦说乎?”'''
- p2='''子曰:“温故而知新,可以为师。”'''
- p3='''子曰:“学而不思则罔,思而不学则殆。”'''
- p4='''子曰:“由,诲女知之乎!知之为知之,不知为不知,是知也”'''
- doc = docx.Document()
- print(len(doc.paragraphs))
- doc.add_paragraph(p1)
- doc.add_paragraph(p2)
- doc.add_paragraph(p3)
- doc.add_paragraph(p4)
- doc.add_heading("论语十则",0)
- doc.save(r'C:\\Users\\user\\Documents\\1.docx')
- print("over!")
- import docx
- import matplotlib.pyplot as plt
- import base64
- import io
-
- # 打开文档
- doc = docx.Document('demo.docx')
-
- # 遍历文档中的段落
- for para in doc.paragraphs:
- # 打印段落的文本内容
- print(para.text)
-
- # 遍历文档中的表格
- for table in doc.tables:
- # 遍历表格中的行
- for row in table.rows:
- # 遍历行中的单元格
- for cell in row.cells:
- # 打印单元格的文本内容
- print(cell.text)
-
Document Title
A plain paragraph having some bold and some italic.
This paragraph has a style.
This text is colored.This text is also colored and in bold.This text is also colored and in italic.This text is also colored and underlined.
Cell 1-1
Cell 1-2
Cell 1-3
Cell 2-1
Cell 2-2
Cell 2-3
Cell 3-1
Cell 3-2
Cell 3-3