Fundamentals 12 min read

10 Essential Python Libraries for Image and PDF Processing with Ready‑to‑Use Code

This guide presents ten widely used Python libraries—Pillow, imageio, scikit‑image, OpenCV, pytesseract, pdfminer.six, ReportLab, WeasyPrint, PyPDF2, and PyMuPDF—each illustrated with practical code snippets for tasks such as image manipulation, OCR, PDF creation, merging, and extraction.

Test Development Learning Exchange

Oct 21, 2024

10 Essential Python Libraries for Image and PDF Processing with Ready‑to‑Use Code

Pillow (PIL)

Basic image manipulation: open, crop, rotate and add a semi‑transparent watermark.

from PIL import Image, ImageDraw, ImageFont

# Open an image file
image = Image.open("example.jpg")

# Crop to a rectangle (left, upper, right, lower)
cropped_image = image.crop((100, 100, 400, 400))
cropped_image.save("cropped_example.jpg")

# Rotate clockwise by 90 degrees (expand=False keeps original size)
rotated_image = image.rotate(90)
rotated_image.save("rotated_example.jpg")

# Add a watermark using ImageDraw
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("arial.ttf", 36)
# Position (10,10), red colour with 50 % opacity (alpha=128)
draw.text((10, 10), "Sample Watermark", font=font, fill=(255, 0, 0, 128))
image.save("watermarked_example.jpg")

imageio

Read and write images in many formats and create a simple animated GIF from random frames.

import imageio
import numpy as np

# Load a JPEG image as a NumPy array (shape: H×W×C)
image = imageio.imread('example.jpg')
print('Image shape:', image.shape)

# Write the same data to PNG (lossless)
imageio.imwrite('output.png', image)

# Build an animated GIF from 10 random frames (100×100 RGB)
frames = []
for i in range(10):
    frame = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
    frames.append(frame)
# duration is seconds per frame
imageio.mimsave('animation.gif', frames, duration=0.5)

scikit‑image

Enhance contrast with histogram equalisation and detect edges using the Canny algorithm.

from skimage import io, exposure, feature
import matplotlib.pyplot as plt

# Load as greyscale (as_gray=True returns a 2‑D array)
image = io.imread('example.jpg', as_gray=True)

# Contrast stretching via histogram equalisation
equalized = exposure.equalize_hist(image)

# Canny edge detection; sigma controls Gaussian smoothing
edges = feature.canny(equalized, sigma=3)

# Visualise original, equalised and edge map side‑by‑side
fig, ax = plt.subplots(1, 3, figsize=(12, 4))
ax[0].imshow(image, cmap='gray')
ax[0].set_title('Original')
ax[0].axis('off')
ax[1].imshow(equalized, cmap='gray')
ax[1].set_title('Equalised')
ax[1].axis('off')
ax[2].imshow(edges, cmap='gray')
ax[2].set_title('Canny Edges')
ax[2].axis('off')
plt.show()

opencv‑python

Detect faces with a Haar cascade, draw bounding boxes and blur each face region using a Gaussian kernel.

import cv2

# Load colour image and convert to grayscale for detection
image = cv2.imread('example.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Haar cascade provided by OpenCV (path is platform‑independent)
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)

# Detect faces – scaleFactor controls image pyramid, minNeighbors reduces false positives
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

for (x, y, w, h) in faces:
    # Draw a blue rectangle around the face
    cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)
    # Extract the face ROI and apply a strong blur (kernel size must be odd)
    face_roi = image[y:y + h, x:x + w]
    blurred = cv2.GaussianBlur(face_roi, (99, 99), sigmaX=30)
    image[y:y + h, x:x + w] = blurred

cv2.imshow('Face Detection & Blur', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

pytesseract

Perform OCR on an image file using the Tesseract engine.

import pytesseract
from PIL import Image

# Path to the Tesseract executable (required on Windows)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Load the image containing printed text
image = Image.open('text_image.png')

# Extract Unicode text; additional config options can be passed via the config argument
text = pytesseract.image_to_string(image)
print('Extracted Text:', text)

pdfminer.six

Extract plain text from a PDF document while preserving line breaks.

from pdfminer.high_level import extract_text

# Reads the PDF and returns a single string with extracted text
text = extract_text('example.pdf')
print('Extracted Text:', text)

ReportLab

Generate a PDF containing a styled table. The example demonstrates page size, style sheets and table formatting.

from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors

# Create a document with US Letter dimensions
doc = SimpleDocTemplate('report.pdf', pagesize=letter)
styles = getSampleStyleSheet()

# Table data (header row + three data rows)
data = [
    ['Name', 'Age', 'Country'],
    ['Alice', 30, 'USA'],
    ['Bob', 25, 'Canada'],
    ['Charlie', 35, 'UK']
]

table = Table(data)
# Apply background, text colour, alignment, font, padding and grid lines
table.setStyle(TableStyle([
    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
    ('FONTSIZE', (0, 0), (-1, 0), 14),
    ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
    ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
    ('GRID', (0, 0), (-1, -1), 1, colors.black)
]))

# Add a title paragraph and the table to the document flow
content = [Paragraph('Report Title', styles['Heading1']), table]

doc.build(content)

WeasyPrint

Convert an HTML string with CSS styling into a PDF file. The HTML markup is escaped so that it does not interfere with the surrounding document structure.

from weasyprint import HTML, CSS

html_content = """
<!DOCTYPE html>
<html>
<head>
    <title>Sample Report</title>
</head>
<body>
    <h1>Report Title</h1>
    <p>This is a sample report generated using WeasyPrint.</p>
    <table style='border: 1px solid black; border-collapse: collapse;'>
        <tr><th>Name</th><th>Age</th><th>Country</th></tr>
        <tr><td>Alice</td><td>30</td><td>USA</td></tr>
        <tr><td>Bob</td><td>25</td><td>Canada</td></tr>
        <tr><td>Charlie</td><td>35</td><td>UK</td></tr>
    </table>
</body>
</html>
"""

css_content = """
body {font-family: Arial, sans-serif;}
h1 {color: #333;}
table, th, td {border: 1px solid black; padding: 5px;}
th {background-color: #f2f2f2;}
"""

HTML(string=html_content).write_pdf('weasyprint_report.pdf', stylesheets=[CSS(string=css_content)])

PyPDF2

Merge two PDF files and overlay a single‑page watermark on every page of the combined document.

import PyPDF2

writer = PyPDF2.PdfFileWriter()

# Append pages from the first source PDF
with open('file1.pdf', 'rb') as f1:
    reader1 = PyPDF2.PdfFileReader(f1)
    for i in range(reader1.getNumPages()):
        writer.addPage(reader1.getPage(i))

# Append pages from the second source PDF
with open('file2.pdf', 'rb') as f2:
    reader2 = PyPDF2.PdfFileReader(f2)
    for i in range(reader2.getNumPages()):
        writer.addPage(reader2.getPage(i))

# Load the watermark (single‑page PDF)
watermark = PyPDF2.PdfFileReader(open('watermark.pdf', 'rb')).getPage(0)

# Apply the watermark to each page of the merged document
for page_num in range(writer.getNumPages()):
    page = writer.getPage(page_num)
    page.mergePage(watermark)

# Write the final PDF to disk
with open('merged_with_watermark.pdf', 'wb') as out:
    writer.write(out)

PyMuPDF (fitz)

Extract both textual content and embedded images from each page of a PDF.

import fitz  # PyMuPDF

# Open the PDF document
doc = fitz.open('example.pdf')

# ----- Text extraction -----
for page_number in range(len(doc)):
    page = doc.load_page(page_number)
    text = page.get_text('text')
    print(f'Page {page_number + 1} Text:
{text}
')

# ----- Image extraction -----
for page_number in range(len(doc)):
    page = doc.load_page(page_number)
    for img_index, img in enumerate(page.get_images(full=True)):
        xref = img[0]
        base = doc.extract_image(xref)
        img_bytes = base['image']
        img_ext = base['ext']
        filename = f'page{page_number + 1}_img{img_index + 1}.{img_ext}'
        with open(filename, 'wb') as img_file:
            img_file.write(img_bytes)

Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.

Python Image processing open source libraries Code examples tutorial pdf-generation

Written by

Test Development Learning Exchange

0 followers

Reader feedback

How this landed with the community

Rate this article

Was this worth your time?

Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.