Python‑docx Guide: Extract, Create, and Manipulate Word Documents
This tutorial demonstrates how to use the python‑docx library to extract text and images, create new documents, replace content, add pictures, build and fill tables, delete paragraphs, set styles, and insert headers and footers in Word files, providing ready‑to‑run code examples for each operation.
This guide shows how to work with Microsoft Word files using the python-docx library, covering common tasks such as extracting text, creating documents, modifying content, and customizing layout.
Extract all text from a Word document
from docx import Document
def extract_text_from_docx(docx_path):
doc = Document(docx_path)
return '
'.join([para.text for para in doc.paragraphs])
docx_path = 'path/to/your/document.docx'
text = extract_text_from_docx(docx_path)
print(text)Create a new Word document and add text
from docx import Document
def create_new_docx_with_text(file_path, text):
doc = Document()
doc.add_paragraph(text)
doc.save(file_path)
file_path = 'path/to/new_document.docx'
text = "Hello, this is a new document."
create_new_docx_with_text(file_path, text)Replace text in a Word document
from docx import Document
def replace_text_in_docx(docx_path, old_text, new_text):
doc = Document(docx_path)
for paragraph in doc.paragraphs:
if old_text in paragraph.text:
inline = paragraph.runs
for i in range(len(inline)):
if old_text in inline[i].text:
text = inline[i].text.replace(old_text, new_text)
inline[i].text = text
doc.save(docx_path)
docx_path = 'path/to/your/document.docx'
replace_text_in_docx(docx_path, "old text", "new text")Add an image to a Word document
from docx import Document
def add_image_to_docx(file_path, image_path):
doc = Document()
doc.add_picture(image_path, width=None, height=None)
doc.save(file_path)
file_path = 'path/to/new_document_with_image.docx'
image_path = 'path/to/your/image.png'
add_image_to_docx(file_path, image_path)Extract all images from a Word document
from docx import Document
def extract_images_from_docx(docx_path, output_dir):
doc = Document(docx_path)
images = []
for rel in doc.part.rels.values():
if "image" in rel.reltype:
images.append(rel.target_part.blob)
for index, image in enumerate(images):
with open(f"{output_dir}/image_{index}.png", "wb") as f:
f.write(image)
docx_path = 'path/to/your/document.docx'
output_dir = 'path/to/output/images'
extract_images_from_docx(docx_path, output_dir)Create a table in a Word document
from docx import Document
def create_table_in_docx(file_path):
doc = Document()
table = doc.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
doc.save(file_path)
file_path = 'path/to/new_document_with_table.docx'
create_table_in_docx(file_path)Fill a table with data
from docx import Document
def fill_table_in_docx(file_path, data):
doc = Document(file_path)
table = doc.tables[0]
for item in data:
row_cells = table.add_row().cells
row_cells[0].text = str(item['Qty'])
row_cells[1].text = item['Id']
row_cells[2].text = item['Desc']
doc.save(file_path)
file_path = 'path/to/document_with_table.docx'
data = [
{'Qty': 1, 'Id': '123', 'Desc': 'First item'},
{'Qty': 2, 'Id': '456', 'Desc': 'Second item'}
]
fill_table_in_docx(file_path, data)Delete specific paragraphs
from docx import Document
def delete_paragraph(paragraph):
p = paragraph._element
p.getparent().remove(p)
p._p = p._element = None
def remove_paragraphs_in_docx(docx_path, to_delete):
doc = Document(docx_path)
for para in doc.paragraphs:
if para.text in to_delete:
delete_paragraph(para)
doc.save(docx_path)
docx_path = 'path/to/your/document.docx'
to_delete = ["This is the text to be deleted"]
remove_paragraphs_in_docx(docx_path, to_delete)Set document style
from docx import Document
from docx.shared import Pt
def set_style_in_docx(docx_path):
doc = Document(docx_path)
style = doc.styles['Normal']
font = style.font
font.name = 'Times New Roman'
font.size = Pt(12)
doc.save(docx_path)
docx_path = 'path/to/your/document.docx'
set_style_in_docx(docx_path)Add header and footer
from docx import Document
def add_header_footer(docx_path):
doc = Document(docx_path)
section = doc.sections[0]
header = section.header
footer = section.footer
header.paragraphs[0].text = "This is the header"
footer.paragraphs[0].text = "This is the footer"
doc.save(docx_path)
docx_path = 'path/to/your/document.docx'
add_header_footer(docx_path)Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
