Backend Development 8 min read

Python Library Examples for YAML, TOML, JSON Schema, XML, HTML, and CSS Processing

This article provides concise Python code examples demonstrating how to read, write, and manipulate data using popular libraries such as PyYAML, toml, jsonschema, xmltodict, lxml, BeautifulSoup4, html5lib, and cssutils, covering scenarios from configuration files to XML/HTML parsing and CSS styling.

Test Development Learning Exchange

Oct 20, 2024

Python Library Examples for YAML, TOML, JSON Schema, XML, HTML, and CSS Processing

1. PyYAML

Scenario: Use PyYAML to read and write YAML files, handling complex nested structures.

import yaml
# Define a complex nested structure
data = {
    'employees': [
        {'name': 'Alice', 'age': 30, 'department': 'HR'},
        {'name': 'Bob', 'age': 25, 'department': 'Engineering'}
    ],
    'company': {
        'name': 'Example Corp',
        'location': 'New York'
    }
}
# Write YAML file
with open('data.yaml', 'w') as file:
    yaml.dump(data, file, default_flow_style=False)
# Read YAML file
with open('data.yaml', 'r') as file:
    loaded_data = yaml.safe_load(file)
    print(loaded_data)

2. toml

Scenario: Use the TOML library to read and write configuration files with complex data structures.

import toml
# Define a complex nested structure
data = {
    'server': {
        'host': 'localhost',
        'port': 8080,
        'ssl': True
    },
    'database': {
        'host': 'db.example.com',
        'port': 5432,
        'user': 'admin',
        'password': 'secret'
    }
}
# Write TOML file
with open('config.toml', 'w') as file:
    toml.dump(data, file)
# Read TOML file
with open('config.toml', 'r') as file:
    loaded_data = toml.load(file)
    print(loaded_data)

3. jsonschema

Scenario: Validate JSON data against a predefined schema using the jsonschema library.

import json
import jsonschema
from jsonschema import validate
# Define a JSON Schema
schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer", "minimum": 0},
        "address": {
            "type": "object",
            "properties": {
                "street": {"type": "string"},
                "city": {"type": "string"}
            },
            "required": ["street", "city"]
        }
    },
    "required": ["name", "age", "address"]
}
# Define a JSON data instance
data = {
    "name": "Alice",
    "age": 30,
    "address": {
        "street": "123 Main St",
        "city": "Anytown"
    }
}
# Validate data
try:
    validate(instance=data, schema=schema)
    print("Data is valid.")
except jsonschema.exceptions.ValidationError as err:
    print(f"Data is invalid: {err}")

4. xmltodict

Scenario: Convert XML to a dictionary and back to XML using xmltodict and dicttoxml.

import xmltodict
import dicttoxml
from xml.dom.minidom import parseString

# XML string
xml_string = """
<root>
    <person>
        <name>Alice</name>
        <age>30</age>
        <address>
            <street>123 Main St</street>
            <city>Anytown</city>
        </address>
    </person>
</root>
"""
# Convert XML to dict
data_dict = xmltodict.parse(xml_string)
print(data_dict)
# Modify dict
data_dict['root']['person']['age'] = 31
# Convert dict back to XML
new_xml = dicttoxml.dicttoxml(data_dict, custom_root='root', attr_type=False)
pretty_xml = parseString(new_xml).toprettyxml()
print(pretty_xml)

5. lxml

Scenario: Parse and modify an XML document with lxml, performing XPath queries.

from lxml import etree
# XML string
xml_string = """
<root>
    <person>
        <name>Alice</name>
        <age>30</age>
        <address>
            <street>123 Main St</street>
            <city>Anytown</city>
        </address>
    </person>
</root>
"""
root = etree.fromstring(xml_string)
# XPath query
names = root.xpath('//name/text()')
print("Names:", names)
# Modify XML
for name in root.xpath('//name'):
    name.text = "Bob"
# Serialize XML
modified_xml = etree.tostring(root, pretty_print=True, encoding='unicode')
print(modified_xml)

6. beautifulsoup4

Scenario: Parse an HTML document and extract specific information using BeautifulSoup.

from bs4 import BeautifulSoup

html_string = """
<html>
<head>
    <title>Sample Page</title>
</head>
<body>
    <h1>Welcome to the Sample Page</h1>
    <p>This is a sample paragraph.</p>
    <div class="content">
        <p>Content 1</p>
        <p>Content 2</p>
    </div>
</body>
</html>
"""
# Parse HTML
soup = BeautifulSoup(html_string, 'html.parser')
# Extract title
title = soup.title.string
print("Title:", title)
# Extract all paragraph texts
paragraphs = [p.get_text() for p in soup.find_all('p')]
print("Paragraphs:", paragraphs)
# Extract content div paragraphs
content_div = soup.find('div', class_='content')
content_paragraphs = [p.get_text() for p in content_div.find_all('p')]
print("Content Paragraphs:", content_paragraphs)

7. html5lib

Scenario: Parse an HTML5 document with html5lib and extract title, main content, and article details.

import html5lib
from bs4 import BeautifulSoup

html_string = """
<!DOCTYPE html>
<html>
<head>
    <title>Sample Page</title>
</head>
<body>
    <header>
        <h1>Welcome to the Sample Page</h1>
    </header>
    <main>
        <section>
            <p>This is a sample paragraph.</p>
        </section>
        <article>
            <h2>Article Title</h2>
            <p>Article content.</p>
        </article>
    </main>
</body>
</html>
"""
# Parse HTML5
soup = BeautifulSoup(html_string, 'html5lib')
# Extract title
title = soup.title.string
print("Title:", title)
# Extract main content
main_content = soup.main.get_text(strip=True)
print("Main Content:", main_content)
# Extract article title and content
article_title = soup.article.h2.get_text()
article_content = soup.article.p.get_text()
print("Article Title:", article_title)
print("Article Content:", article_content)

8. cssutils

Scenario: Parse and modify a CSS stylesheet, then add a new rule using cssutils.

import cssutils

css_string = """
body {
    font-family: Arial, sans-serif;
    background-color: #f0f0f0;
}

h1 {
    color: #333;
}

.content {
    margin: 20px;
    padding: 10px;
    border: 1px solid #ccc;
}
"""
# Parse CSS
css = cssutils.parseString(css_string)
# Modify styles
for rule in css:
    if rule.type == rule.STYLE_RULE:
        if rule.selectorText == 'body':
            rule.style.backgroundColor = '#ffffff'  # change background
        elif rule.selectorText == 'h1':
            rule.style.color = '#0000ff'  # change heading color
# Add new style
new_rule = cssutils.css.CSSStyleRule()
new_rule.selectorText = '.highlight'
new_rule.style.color = '#ff0000'
new_rule.style.fontWeight = 'bold'
css.add(new_rule)
# Output modified CSS
print(css.cssText.decode())

Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.

Python JSON Schema XML YAML CSS HTML TOML

Written by

Test Development Learning Exchange

0 followers

Reader feedback

How this landed with the community

Rate this article

Was this worth your time?

Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.