1. 程式人生 > >python-docx操作word文件(*.docx)

python-docx操作word文件(*.docx)

tin comment turn spl end nta document .sh font

目錄

  • 基礎操作
  • 對象關系
  • 添加樣式
    • 中文字體微軟雅黑,西文字體Times New Roman
    • 首行縮進
    • 單獨設置標題樣式
    • 設置超鏈接
  • 參考文檔

基礎操作

from docx import Document
from docx.shared import Inches

# 創建空文檔
document = Document()

# 添加標題,設置級別level,0為Title,1或省略為Heading 1,0<=level<=9
document.add_heading('Document Title', 0)
# 添加段落,參數為text=''和style=None
p = document.add_paragraph('A plain paragraph having some ')
# 添加run對象,參數為text=None和style=None,
# run對象有bold(加粗)和italic(斜體)這兩個屬性
p.add_run('bold').bold = True
p.add_run(' and some ')
p.add_run('italic.').italic = True

document.add_heading('Heading, level 1', level=1)
document.add_paragraph('Intense quote', style='Intense Quote')

document.add_paragraph(
    'first item in unordered list', style='List Bullet'
)
document.add_paragraph(
    'first item in ordered list', style='List Number'
)
# 添加圖片
document.add_picture('monty-truth.png', width=Inches(1.25))

# 添加表格
records = (
    (3, '101', 'Spam'),
    (7, '422', 'Eggs'),
    (4, '631', 'Spam, spam, eggs, and spam')
)

table = document.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
for qty, id, desc in records:
    row_cells = table.add_row().cells
    row_cells[0].text = str(qty)
    row_cells[1].text = id
    row_cells[2].text = desc

document.add_page_break()

對象關系

技術分享圖片

document.add_paragraph()之後,默認paragraph的內容到第一個run中。

添加樣式

中文字體微軟雅黑,西文字體Times New Roman

import docx
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
from docx.shared import Cm, Pt

document = Document()
# 設置一個空白樣式
style = document.styles['Normal']
# 設置西文字體
style.font.name = 'Times New Roman'
# 設置中文字體
style.element.rPr.rFonts.set(qn('w:eastAsia'), '微軟雅黑')

首行縮進

# 獲取段落樣式
paragraph_format = style.paragraph_format
# 首行縮進0.74厘米,即2個字符
paragraph_format.first_line_indent = Cm(0.74)

單獨設置標題樣式

# 設置標題
title_ = document.add_heading(level=0)
# 標題居中
title_.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 添加標題內容
title_run = title_.add_run(title)
# 設置標題字體大小
title_run.font.size = Pt(14)
# 設置標題西文字體
title_run.font.name = 'Times New Roman'
# 設置標題中文字體
title_run.element.rPr.rFonts.set(qn('w:eastAsia'), '微軟雅黑')

設置超鏈接

def add_hyperlink(paragraph, url, text, color, underline):
    """
    A function that places a hyperlink within a paragraph object.

    :param paragraph: The paragraph we are adding the hyperlink to.
    :param url: A string containing the required url
    :param text: The text displayed for the url
    :return: The hyperlink object
    """

    # This gets access to the document.xml.rels file and gets a new relation id value
    part = paragraph.part
    r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)

    # Create the w:hyperlink tag and add needed values
    hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')
    hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, )

    # Create a w:r element
    new_run = docx.oxml.shared.OxmlElement('w:r')

    # Create a new w:rPr element
    rPr = docx.oxml.shared.OxmlElement('w:rPr')

    # Add color if it is given
    if not color is None:
        c = docx.oxml.shared.OxmlElement('w:color')
        c.set(docx.oxml.shared.qn('w:val'), color)
        rPr.append(c)

    # Remove underlining if it is requested
    if not underline:
        u = docx.oxml.shared.OxmlElement('w:u')
        u.set(docx.oxml.shared.qn('w:val'), 'none')
        rPr.append(u)

    # Join all the xml elements together add add the required text to the w:r element
    new_run.append(rPr)
    new_run.text = text
    hyperlink.append(new_run)

    paragraph._p.append(hyperlink)

    return hyperlink

document = docx.Document()
p = document.add_paragraph()

#add a hyperlink with the normal formatting (blue underline)
hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', None, True)

#add a hyperlink with a custom color and no underline
hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', 'FF8822', False)

document.save('demo.docx')

上面的函數是對整段內容直接添加鏈接,日常使用的時候,超鏈接多為關鍵詞,或<a>標簽的格式,用paragraph和run這兩個對象的關系來解決。

比如有文本內容如下,將其中的<a>標簽換為超鏈接:

"""I am trying to add an hyperlink in a MS Word document using docx module for <a href="python.org">Python</a>. Just do it."""

# 判斷字段是否為鏈接
def is_text_link(text):
    for i in ['http', '://', 'www.', '.com', '.org', '.cn', '.xyz', '.htm']:
        if i in text:
            return True
        else:
            return False

# 對段落中的鏈接加上超鏈接
def add_text_link(document, text):
    paragraph = document.add_paragraph()
    # 根據<a>標簽拆分文本內容
    text = re.split(r'<a href="|">|</a>',text)
    keyword = None
    for i in range(len(text)):
        # 對非鏈接和非關鍵詞的內容,通過run直接加入段落中
        if not is_text_link(text[i]):
            if text[i] != keyword:
                paragraph.add_run(text[i])
        # 對鏈接和關鍵詞,使用add_hyperlink插入超鏈接
        elif i + 1<len(text):
            url=text[i]
            keyword=text[i + 1]
            add_hyperlink(paragraph, url, keyword, None, True)

參考文檔

  1. https://python-docx.readthedocs.io/en/latest/index.html
  2. https://github.com/python-openxml/python-docx/issues/74
  3. http://www.warmeng.com/2018/12/02/auto_report/

python-docx操作word文件(*.docx)