Build an Extensible Python Test Data Factory with Faker and Strategy Pattern
This guide presents a Python‑based, object‑oriented test data factory that leverages the Faker library and the strategy pattern to generate business‑rule‑aware, globally unique, and scenario‑driven data such as users and orders, with support for concurrency safety, extensibility, and future AI‑driven natural‑language commands.
Background and Motivation
In fast‑iteration projects, reliable test data is essential for automation stability. Traditional Faker generates random data but cannot satisfy business rule constraints, cross‑field relationships, global uniqueness, or dynamic abnormal scenarios.
Core Design Goals
Business‑aware : Data follows real business rules instead of being purely random.
Global Uniqueness : Guarantees no duplicate values even under concurrent generation.
Scenario‑driven : One‑click generation of normal, boundary, and abnormal data.
Flexible Extensibility : New data types can be added without modifying core logic.
AI‑friendly : Placeholder for natural‑language command generation.
Overall Architecture
[DataFactory]
│
├── [UserGenerator] → generate user data (with risk tags)
├── [OrderGenerator] → generate orders (linked to user level)
├── [PaymentGenerator]→ generate payments (linked to order risk)
│
└── [UniquenessManager] → global uniqueness control
└── [RedisStorage] (optional) → distributed deduplicationStep 1: Basic Components
Global Uniqueness Manager
# utils/uniqueness.py
import threading
import hashlib
from typing import Set
class UniquenessManager:
_instance = None
_lock = threading.Lock()
_used_values: Set[str] = set()
def __new__(cls):
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def ensure_unique(self, value: str, max_retries: int = 5) -> str:
"""Ensure a value is globally unique, appending a suffix if needed."""
base_value = str(value)
if base_value not in self._used_values:
self._used_values.add(base_value)
return base_value
for i in range(1, max_retries + 1):
candidate = f"{base_value}_{i}"
if candidate not in self._used_values:
self._used_values.add(candidate)
return candidate
raise RuntimeError(f"Unable to generate unique value: {base_value}")
def reset(self):
"""Clear stored values (useful between test runs)."""
with self._lock:
self._used_values.clear()Base Generator Abstract Class
# generators/base.py
from abc import ABC, abstractmethod
from typing import Dict, Any
from faker import Faker
from utils.uniqueness import UniquenessManager
class BaseGenerator(ABC):
def __init__(self, locale: str = "zh_CN"):
self.faker = Faker(locale)
self.uniqueness = UniquenessManager()
@abstractmethod
def generate(self, scenario: str = "normal", **kwargs) -> Dict[str, Any]:
"""Generate data.
:param scenario: normal | boundary | abnormal
:param kwargs: business parameters (e.g., user_level="VIP")
"""
passStep 2: Business‑Aware Generators
User Generator
# generators/user.py
from generators.base import BaseGenerator
import random
class UserGenerator(BaseGenerator):
USER_LEVELS = ["NORMAL", "VIP", "BLACKLIST"]
RISK_PROFILES = {
"NORMAL": {"risk_score": 10, "max_order": 5000},
"VIP": {"risk_score": 30, "max_order": 50000},
"BLACKLIST": {"risk_score": 90, "max_order": 0},
}
def generate(self, scenario: str = "normal", **kwargs) -> dict:
level = kwargs.get("level")
if not level:
level = self._determine_level_by_scenario(scenario)
username_base = self.faker.user_name()
email_base = self.faker.free_email()
username = self.uniqueness.ensure_unique(f"auto_{level.lower()}_{username_base}")
email = self.uniqueness.ensure_unique(f"{username}@{email_base.split('@')[1]}")
id_card = self.faker.ssn()
profile = self.RISK_PROFILES[level]
return {
"user_id": None, # to be filled after registration
"username": username,
"email": email,
"phone": self.faker.phone_number(),
"id_card": id_card,
"level": level,
"risk_score": profile["risk_score"],
"max_order_amount": profile["max_order"],
}
def _determine_level_by_scenario(self, scenario: str) -> str:
if scenario == "abnormal":
return random.choice(["BLACKLIST", "VIP"]) # high‑risk users
elif scenario == "boundary":
return "VIP"
else:
return "NORMAL"Order Generator
# generators/order.py
from generators.base import BaseGenerator
from typing import Dict
import random
class OrderGenerator(BaseGenerator):
PRODUCTS = ["LAPTOP", "PHONE", "BOOK"]
def generate(self, scenario: str = "normal", user: Dict = None, **kwargs) -> dict:
if not user:
raise ValueError("Order must be associated with a user")
amount = self._calculate_amount(scenario, user, kwargs.get("amount"))
order_no_base = f"ORD{self.faker.random_int(100000, 999999)}"
order_no = self.uniqueness.ensure_unique(order_no_base)
return {
"order_no": order_no,
"user_id": user["user_id"],
"amount": amount,
"product": random.choice(self.PRODUCTS),
"risk_level": "HIGH" if user["risk_score"] > 50 else "LOW",
}
def _calculate_amount(self, scenario: str, user: Dict, override: float = None) -> float:
if override is not None:
return override
base = user["max_order_amount"] or 1000
if scenario == "abnormal":
return base * random.uniform(1.0, 2.0) # exceed limit
elif scenario == "boundary":
return base
else:
return base * random.uniform(0.1, 0.8) # normal rangeStep 3: DataFactory Entry Point
# factory/data_factory.py
from generators.user import UserGenerator
from generators.order import OrderGenerator
from typing import Dict, Any
class DataFactory:
_generators = {
"user": UserGenerator(),
"order": OrderGenerator(),
}
@classmethod
def create(cls, data_type: str, scenario: str = "normal", **kwargs) -> Dict[str, Any]:
"""Smartly generate test data.
Example:
user = DataFactory.create("user", scenario="abnormal", level="VIP")
order = DataFactory.create("order", user=user, scenario="boundary")
"""
if data_type not in cls._generators:
raise ValueError(f"Unsupported data type: {data_type}")
return cls._generators[data_type].generate(scenario=scenario, **kwargs)
@classmethod
def create_scenario(cls, scenario_name: str) -> Dict[str, Any]:
"""Generate a full business scenario (future AI hook)."""
scenarios = {
"high_risk_payment": {
"user": {"scenario": "abnormal", "level": "VIP"},
"order": {"scenario": "abnormal"},
},
"normal_checkout": {
"user": {"scenario": "normal"},
"order": {"scenario": "normal"},
},
}
config = scenarios.get(scenario_name)
if not config:
raise ValueError(f"Unknown scenario: {scenario_name}")
result = {}
result["user"] = cls.create("user", **config["user"])
result["order"] = cls.create("order", user=result["user"], **config["order"])
return resultStep 4: Using the Factory in Tests
Example 1 – High‑Risk Payment Blocked
# test_payment.py
def test_high_risk_payment_blocked():
# One‑line full scenario generation
scenario = DataFactory.create_scenario("high_risk_payment")
# Register user (assume it returns a user_id)
user_resp = register_user(scenario["user"])
scenario["user"]["user_id"] = user_resp["user_id"]
# Create large order
order_resp = create_order(scenario["order"])
# Attempt payment – should be blocked by risk control
pay_resp = pay_order(order_resp["order_id"])
assert pay_resp["code"] == "RISK_BLOCKED"Example 2 – Boundary Value Order
# test_order_boundary.py
def test_order_amount_boundary():
user = DataFactory.create("user", level="VIP")
# Generate an order whose amount equals the user's max limit
order = DataFactory.create("order", user=user, scenario="boundary")
assert order["amount"] == user["max_order_amount"]Advanced Capability – AI‑Era Extension Point
The factory reserves an interface for natural‑language prompts that could be parsed by a large language model (LLM) to produce a scenario configuration.
# pseudo‑code for LLM‑driven creation
def create_from_nlp(prompt: str):
# Call LLM: "Generate a black‑list user with an over‑limit order"
# → returns {"data_type": "scenario", "name": "high_risk_payment"}
config = llm_parse(prompt)
return DataFactory.create_scenario(config["name"])Uniqueness and Concurrency Validation
# test_uniqueness.py
import threading
def test_concurrent_uniqueness():
results = set()
lock = threading.Lock()
def worker():
for _ in range(10):
data = DataFactory.create("user")
with lock:
assert data["username"] not in results
results.add(data["username"])
threads = [threading.Thread(target=worker) for _ in range(5)]
for t in threads:
t.start()
for t in threads:
t.join()
assert len(results) == 50 # 5 threads * 10 creationsConclusion
The presented data factory moves beyond random Faker output by embedding business rules, cross‑entity relationships, global deduplication, and scenario awareness. Its plugin‑style architecture makes adding new generators trivial, while the built‑in uniqueness manager ensures safe concurrent use. Future extensions can expose natural‑language interfaces for AI‑driven data creation, turning test data into truly intelligent ammunition for automated testing.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
