How to Convert XML to JSON in Python: A Step‑by‑Step Guide
This article explains the structure of XML, outlines the general rules for converting XML to JSON using Python's ElementTree, highlights key conversion nuances such as attributes, text nodes, repeated tags, CDATA and namespaces, and provides a complete Tkinter‑based GUI implementation.
Original XML Overview
XML is a tree‑structured data format ideal for representing hierarchical information such as a company's organization, employee details, and project data.
Basic Rules for XML to JSON Conversion
After parsing XML with Python's xml.etree.ElementTree, each element is recursively transformed into a dictionary, ultimately producing a JSON object. The conversion follows these general rules:
✅ Example: XML snippet and its resulting JSON structure.
<?xml version="1.0" encoding="UTF-8"?>
<department id="D101" name="研发部">
<manager id="EMP2001">张伟</manager>
<employees>
<employee id="EMP1001" role="高级开发工程师">
<fullname>李娜</fullname>
<skills>
<skill level="expert">Python</skill>
</skills>
</employee>
</employees>
</department> {
"department": {
"@attributes": {
"id": "D101",
"name": "研发部"
},
"manager": {
"@attributes": {"id": "EMP2001"},
"text": "张伟"
},
"employees": {
"employee": {
"@attributes": {"id": "EMP1001", "role": "高级开发工程师"},
"fullname": {"text": "李娜"},
"skills": {
"skill": {
"@attributes": {"level": "expert"},
"text": "Python"
}
}
}
}
}
}Key Points Explained
1. Root Element – Becomes the outermost JSON object that wraps all content.
2. Attributes – Converted to an @attributes object. Example:
"@attributes": {
"id": "D101",
"name": "研发部"
}3. Text Content – Stored under a text key, e.g., "ABC科技有限公司" becomes: "name": {"text": "ABC科技有限公司"} 4. Repeated Tags – The first occurrence is an object; subsequent identical tags are automatically turned into an array.
"projects": {
"project": [
{"@attributes": {"status": "进行中"}, ...},
{"@attributes": {"status": "已完成"}, ...}
]
}5. CDATA Sections – Treated as ordinary text.
"description": {"text": "一个基于深度学习的对话式 AI 系统"}6. Namespaces – Preserved in full form, e.g., {http://example.com/hr}id. They can be stripped beforehand if desired.
Code Implementation
The following Python script provides a Tkinter GUI that displays XML input on the left, JSON output on the right, highlights parsing errors, and includes utility buttons for conversion and clearing.
# xml_str_to_json_gui_final.py
import tkinter as tk
from tkinter import messagebox, ttk
import xml.etree.ElementTree as ET
import json
def xml_to_dict(element):
"""Recursively convert an XML element to a dictionary"""
node = {}
if element.attrib:
node['@attributes'] = element.attrib
if element.text and element.text.strip():
node['text'] = element.text.strip()
for child in element:
child_data = xml_to_dict(child)
if child.tag in node:
if not isinstance(node[child.tag], list):
node[child.tag] = [node[child.tag]]
node[child.tag].append(child_data)
else:
node[child.tag] = child_data
return node
def convert_xml_str_to_json(xml_str):
try:
root = ET.fromstring(xml_str)
data = {root.tag: xml_to_dict(root)}
return json.dumps(data, indent=4, ensure_ascii=False)
except ET.ParseError as pe:
return {'error': str(pe), 'exception': pe}
except Exception as e:
return {'error': f"转换失败: {e}"}
def on_convert():
xml_input = xml_text.get("1.0", tk.END).strip()
if not xml_input:
messagebox.showwarning("警告", "请输入有效的 XML 内容")
return
result = convert_xml_str_to_json(xml_input)
if isinstance(result, dict) and 'error' in result:
error_msg = result['error']
messagebox.showerror("解析错误", error_msg)
if 'exception' in result and isinstance(result['exception'], ET.ParseError):
exc = result['exception']
line, column = exc.position
try:
start_index = f"1.0 + {line-1} lines + {column} chars"
end_index = f"{start_index} + 1 chars"
xml_text.tag_remove("error", "1.0", tk.END)
xml_text.tag_add("error", start_index, end_index)
xml_text.tag_config("error", background="yellow", foreground="red")
xml_text.see(start_index)
except tk.TclError:
pass
return
else:
xml_text.tag_remove("error", "1.0", tk.END)
json_text.delete("1.0", tk.END)
json_text.insert(tk.END, result)
def on_clear():
xml_text.delete("1.0", tk.END)
json_text.delete("1.0", tk.END)
xml_text.tag_remove("error", "1.0", tk.END)
app = tk.Tk()
app.title("XML 转 JSON 工具 - 完整版")
app.geometry("1200x700")
style = ttk.Style()
style.configure("Convert.TButton", font=("微软雅黑", 12, "bold"), foreground="white", background="#2E7D32", padding=10, relief="flat")
style.map("Convert.TButton", background=[('active', '#1B5E20')], foreground=[('active', 'white')])
paned_window = tk.PanedWindow(app, orient=tk.HORIZONTAL, sashrelief=tk.SUNKEN)
paned_window.pack(fill="both", expand=True, padx=10, pady=10)
left_frame = ttk.Frame(paned_window)
tk.Label(left_frame, text="输入 XML 内容:", font=("微软雅黑", 12)).pack(anchor="w", padx=5, pady=5)
xml_text = tk.Text(left_frame, wrap="word", font=("Consolas", 12), undo=True)
xml_text.pack(fill="both", expand=True, padx=5, pady=5)
paned_window.add(left_frame, width=600)
right_frame = ttk.Frame(paned_window)
tk.Label(right_frame, text="JSON 输出内容:", font=("微软雅黑", 12)).pack(anchor="w", padx=5, pady=5)
json_text = tk.Text(right_frame, wrap="word", font=("Consolas", 12))
json_text.pack(fill="both", expand=True, padx=5, pady=5)
paned_window.add(right_frame, width=600)
btn_frame = ttk.Frame(app)
btn_frame.pack(pady=10)
convert_btn = ttk.Button(btn_frame, text="🔄 转换为 JSON", width=20, command=on_convert, style="Convert.TButton")
convert_btn.grid(row=0, column=0, padx=10)
clear_btn = ttk.Button(btn_frame, text="🧹 清空内容", width=15, command=on_clear)
clear_btn.grid(row=0, column=1, padx=10)
app.mainloop()The GUI presents a side‑by‑side layout for clear input/output visualization, highlights errors directly in the XML editor, and includes additional features such as content clearing and automatic resizing.
Summary
This guide provides a comprehensive overview of XML structure, detailed conversion rules to JSON, explanations of handling attributes, text, repeated elements, CDATA, and namespaces, and supplies a ready‑to‑run Python/Tkinter application that demonstrates the entire conversion process.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
