Fundamentals 6 min read

Python‑docx Guide: Extract, Create, and Manipulate Word Documents

This tutorial demonstrates how to use the python‑docx library to extract text and images, create new documents, replace content, add pictures, build and fill tables, delete paragraphs, set styles, and insert headers and footers in Word files, providing ready‑to‑run code examples for each operation.

Test Development Learning Exchange
Test Development Learning Exchange
Test Development Learning Exchange
Python‑docx Guide: Extract, Create, and Manipulate Word Documents

This guide shows how to work with Microsoft Word files using the python-docx library, covering common tasks such as extracting text, creating documents, modifying content, and customizing layout.

Extract all text from a Word document

from docx import Document
def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    return '
'.join([para.text for para in doc.paragraphs])

docx_path = 'path/to/your/document.docx'
text = extract_text_from_docx(docx_path)
print(text)

Create a new Word document and add text

from docx import Document
def create_new_docx_with_text(file_path, text):
    doc = Document()
    doc.add_paragraph(text)
    doc.save(file_path)

file_path = 'path/to/new_document.docx'
text = "Hello, this is a new document."
create_new_docx_with_text(file_path, text)

Replace text in a Word document

from docx import Document
def replace_text_in_docx(docx_path, old_text, new_text):
    doc = Document(docx_path)
    for paragraph in doc.paragraphs:
        if old_text in paragraph.text:
            inline = paragraph.runs
            for i in range(len(inline)):
                if old_text in inline[i].text:
                    text = inline[i].text.replace(old_text, new_text)
                    inline[i].text = text
    doc.save(docx_path)

docx_path = 'path/to/your/document.docx'
replace_text_in_docx(docx_path, "old text", "new text")

Add an image to a Word document

from docx import Document
def add_image_to_docx(file_path, image_path):
    doc = Document()
    doc.add_picture(image_path, width=None, height=None)
    doc.save(file_path)

file_path = 'path/to/new_document_with_image.docx'
image_path = 'path/to/your/image.png'
add_image_to_docx(file_path, image_path)

Extract all images from a Word document

from docx import Document
def extract_images_from_docx(docx_path, output_dir):
    doc = Document(docx_path)
    images = []
    for rel in doc.part.rels.values():
        if "image" in rel.reltype:
            images.append(rel.target_part.blob)
    for index, image in enumerate(images):
        with open(f"{output_dir}/image_{index}.png", "wb") as f:
            f.write(image)


docx_path = 'path/to/your/document.docx'
output_dir = 'path/to/output/images'
extract_images_from_docx(docx_path, output_dir)

Create a table in a Word document

from docx import Document
def create_table_in_docx(file_path):
    doc = Document()
    table = doc.add_table(rows=1, cols=3)
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = 'Qty'
    hdr_cells[1].text = 'Id'
    hdr_cells[2].text = 'Desc'
    doc.save(file_path)

file_path = 'path/to/new_document_with_table.docx'
create_table_in_docx(file_path)

Fill a table with data

from docx import Document
def fill_table_in_docx(file_path, data):
    doc = Document(file_path)
    table = doc.tables[0]
    for item in data:
        row_cells = table.add_row().cells
        row_cells[0].text = str(item['Qty'])
        row_cells[1].text = item['Id']
        row_cells[2].text = item['Desc']
    doc.save(file_path)

file_path = 'path/to/document_with_table.docx'
data = [
    {'Qty': 1, 'Id': '123', 'Desc': 'First item'},
    {'Qty': 2, 'Id': '456', 'Desc': 'Second item'}
]
fill_table_in_docx(file_path, data)

Delete specific paragraphs

from docx import Document
def delete_paragraph(paragraph):
    p = paragraph._element
    p.getparent().remove(p)
    p._p = p._element = None

def remove_paragraphs_in_docx(docx_path, to_delete):
    doc = Document(docx_path)
    for para in doc.paragraphs:
        if para.text in to_delete:
            delete_paragraph(para)
    doc.save(docx_path)


docx_path = 'path/to/your/document.docx'
to_delete = ["This is the text to be deleted"]
remove_paragraphs_in_docx(docx_path, to_delete)

Set document style

from docx import Document
from docx.shared import Pt

def set_style_in_docx(docx_path):
    doc = Document(docx_path)
    style = doc.styles['Normal']
    font = style.font
    font.name = 'Times New Roman'
    font.size = Pt(12)
    doc.save(docx_path)


docx_path = 'path/to/your/document.docx'
set_style_in_docx(docx_path)

Add header and footer

from docx import Document
def add_header_footer(docx_path):
    doc = Document(docx_path)
    section = doc.sections[0]
    header = section.header
    footer = section.footer
    header.paragraphs[0].text = "This is the header"
    footer.paragraphs[0].text = "This is the footer"
    doc.save(docx_path)


docx_path = 'path/to/your/document.docx'
add_header_footer(docx_path)
Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Sign in to view source
Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactadmin@besthub.devand we will review it promptly.

Automationfile-iodocxword processing
Test Development Learning Exchange
Written by

Test Development Learning Exchange

Test Development Learning Exchange

0 followers
Reader feedback

How this landed with the community

Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.