Backend Development 8 min read

Python PDF Manipulation Guide: Merging, Splitting, Encrypting, Decrypting, Text Extraction, Adding Text, Watermarking, Page Removal, Rotation, and HTML‑to‑PDF Conversion

This tutorial demonstrates how to use Python libraries such as PyPDF2, ReportLab, and WeasyPrint to merge, split, encrypt, decrypt, extract text from, add text to, watermark, delete, rotate PDF pages, and convert HTML files into PDFs, providing complete code examples for each operation.

Test Development Learning Exchange
Test Development Learning Exchange
Test Development Learning Exchange
Python PDF Manipulation Guide: Merging, Splitting, Encrypting, Decrypting, Text Extraction, Adding Text, Watermarking, Page Removal, Rotation, and HTML‑to‑PDF Conversion

Merge multiple PDF files

from PyPDF2 import PdfMerger

def merge_pdfs(paths, output):
    merger = PdfMerger()
    for pdf in paths:
        merger.append(pdf)
    merger.write(output)
    merger.close()

paths = ['/path/to/file1.pdf', '/path/to/file2.pdf']
output = '/path/to/merged.pdf'
merge_pdfs(paths, output)

Split a PDF file

from PyPDF2 import PdfReader, PdfWriter

def split_pdf(input_path, start_page, end_page, output_path):
    with open(input_path, 'rb') as file:
        pdf = PdfReader(file)
        writer = PdfWriter()
        for i in range(start_page - 1, end_page):
            writer.add_page(pdf.pages[i])
        with open(output_path, 'wb') as output:
            writer.write(output)

input_path = '/path/to/input.pdf'
start_page = 1
end_page = 5
output_path = '/path/to/output.pdf'
split_pdf(input_path, start_page, end_page, output_path)

Encrypt a PDF file

from PyPDF2 import PdfWriter, PdfReader

def encrypt_pdf(input_path, output_path, password):
    with open(input_path, 'rb') as file:
        pdf = PdfReader(file)
        writer = PdfWriter()
        for page in pdf.pages:
            writer.add_page(page)
        writer.encrypt(password)
        with open(output_path, 'wb') as output:
            writer.write(output)

input_path = '/path/to/input.pdf'
output_path = '/path/to/encrypted.pdf'
password = 'mysecretpassword'
encrypt_pdf(input_path, output_path, password)

Decrypt a PDF file

from PyPDF2 import PdfReader, PdfWriter

def decrypt_pdf(input_path, output_path, password):
    with open(input_path, 'rb') as file:
        pdf = PdfReader(file)
        if not pdf.is_encrypted or pdf.decrypt(password) == 0:
            raise ValueError("Password incorrect or file not encrypted.")
        writer = PdfWriter()
        for page in pdf.pages:
            writer.add_page(page)
        with open(output_path, 'wb') as output:
            writer.write(output)

input_path = '/path/to/encrypted.pdf'
output_path = '/path/to/decrypted.pdf'
password = 'mysecretpassword'
decrypt_pdf(input_path, output_path, password)

Extract text from a PDF

from PyPDF2 import PdfReader

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        pdf = PdfReader(file)
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
        return text

pdf_path = '/path/to/input.pdf'
text = extract_text_from_pdf(pdf_path)
print(text)

Add text to a PDF page (using ReportLab)

import io
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def add_text_to_pdf(pdf_path, output_path, text, x, y):
    packet = io.BytesIO()
    can = canvas.Canvas(packet, pagesize=letter)
    can.drawString(x, y, text)
    can.save()
    packet.seek(0)
    new_pdf = PdfReader(packet)
    existing_pdf = PdfReader(open(pdf_path, "rb"))
    output = PdfWriter()
    page = existing_pdf.pages[0]
    page.merge_page(new_pdf.pages[0])
    output.add_page(page)
    with open(output_path, "wb") as out_stream:
        output.write(out_stream)

pdf_path = '/path/to/input.pdf'
output_path = '/path/to/output_with_text.pdf'
text = 'This is some text.'
x = 50
y = 750
add_text_to_pdf(pdf_path, output_path, text, x, y)

Add a watermark to a PDF (using ReportLab)

from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter

def add_watermark(input_pdf, output_pdf, watermark_text):
    watermark = canvas.Canvas("/tmp/watermark.pdf", pagesize=letter)
    watermark.saveState()
    watermark.setFont("Helvetica", 80)
    watermark.rotate(45)
    watermark.setFillAlpha(0.1)
    watermark.drawString(150, 300, watermark_text)
    watermark.restoreState()
    watermark.save()
    watermark_pdf = PdfReader(open("/tmp/watermark.pdf", "rb"))
    input_reader = PdfReader(open(input_pdf, "rb"))
    output_writer = PdfWriter()
    watermark_page = watermark_pdf.pages[0]
    for page in input_reader.pages:
        page.merge_page(watermark_page)
        output_writer.add_page(page)
    with open(output_pdf, "wb") as out_stream:
        output_writer.write(out_stream)

input_pdf = '/path/to/input.pdf'
output_pdf = '/path/to/output_with_watermark.pdf'
watermark_text = 'Confidential'
add_watermark(input_pdf, output_pdf, watermark_text)

Remove specific pages from a PDF

from PyPDF2 import PdfReader, PdfWriter

def remove_pages(input_pdf, output_pdf, page_numbers):
    reader = PdfReader(open(input_pdf, "rb"))
    writer = PdfWriter()
    for i, page in enumerate(reader.pages):
        if i + 1 not in page_numbers:
            writer.add_page(page)
    with open(output_pdf, "wb") as out_stream:
        writer.write(out_stream)

input_pdf = '/path/to/input.pdf'
output_pdf = '/path/to/output_without_pages.pdf'
page_numbers = [1, 3]  # pages to remove
remove_pages(input_pdf, output_pdf, page_numbers)

Rotate selected PDF pages

from PyPDF2 import PdfReader, PdfWriter

def rotate_pages(input_pdf, output_pdf, page_numbers, angle):
    reader = PdfReader(open(input_pdf, "rb"))
    writer = PdfWriter()
    for i, page in enumerate(reader.pages):
        if i + 1 in page_numbers:
            page.rotate(angle)
        writer.add_page(page)
    with open(output_pdf, "wb") as out_stream:
        writer.write(out_stream)

input_pdf = '/path/to/input.pdf'
output_pdf = '/path/to/output_rotated.pdf'
page_numbers = [2, 4]  # pages to rotate
angle = 90  # rotation angle (90, 180, 270)
rotate_pages(input_pdf, output_pdf, page_numbers, angle)

Convert HTML to PDF using WeasyPrint

from weasyprint import HTML

def html_to_pdf(html_path, output_path):
    HTML(html_path).write_pdf(output_path)

html_path = '/path/to/input.html'
output_path = '/path/to/output.pdf'
html_to_pdf(html_path, output_path)
Tutorialpdf-manipulationPyPDF2ReportLabweasyprint
Test Development Learning Exchange
Written by

Test Development Learning Exchange

Test Development Learning Exchange

0 followers
Reader feedback

How this landed with the community

login Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.