Fundamentals 8 min read

Common Data Validation Functions in Python Using Regular Expressions

This article provides a collection of Python functions that use regular expressions to validate common data types such as phone numbers, ID cards, names, emails, URLs, IP addresses, credit cards, dates, passwords, filenames, numeric strings, postal codes, HTML content, domains, repeated words, IPv6 addresses, and also demonstrates scheduled tasks with the schedule library.

Test Development Learning Exchange
Test Development Learning Exchange
Test Development Learning Exchange
Common Data Validation Functions in Python Using Regular Expressions

1. Phone number validation

import re

def validate_phone_number(phone):
    pattern = r'^1[3-9]\d{9}$'
    return bool(re.match(pattern, phone))
# Example
print(validate_phone_number('13800138000'))  # True

2. ID card validation

def validate_id_card(id_card):
    pattern = r'(^\d{15}$)|(^\d{17}([0-9]|X)$)'
    return bool(re.match(pattern, id_card))
# Example
print(validate_id_card('11010519491231002X'))  # True

3. Chinese/English name validation

def validate_name(name):
    pattern = r'^[\u4e00-\u9fa5a-zA-Z\s]+$'
    return bool(re.match(pattern, name))
# Example
print(validate_name('张三'))  # True
print(validate_name('Zhang San'))  # True

4. Email address validation

def validate_email(email):
    pattern = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
    return bool(re.match(pattern, email))
# Example
print(validate_email('[email protected]'))  # True

5. URL validation

def validate_url(url):
    pattern = r'^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$'
    return bool(re.match(pattern, url))
# Example
print(validate_url('http://www.example.com'))  # True

6. IP address validation

def validate_ip(ip):
    pattern = r'^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$'
    return bool(re.match(pattern, ip))
# Example
print(validate_ip('192.168.0.1'))  # True

7. Scheduled task using the schedule library

import schedule
import time

def job():
    print("I'm working...")

schedule.every(10).minutes.do(job)

while True:
    schedule.run_pending()
    time.sleep(1)

8. Credit card number validation

def validate_credit_card(card_number):
    pattern = r'^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35)\d{3}\d{11})$'
    return bool(re.match(pattern, card_number))
# Example
print(validate_credit_card('4111111111111111'))  # True

9. Date format (YYYY‑MM‑DD) validation

def validate_date(date_text):
    pattern = r'^\d{4}-\d{2}-\d{2}$'
    if re.match(pattern, date_text):
        try:
            datetime.datetime.strptime(date_text, '%Y-%m-%d')
            return True
        except ValueError:
            return False
    else:
        return False
# Example
print(validate_date('2025-05-05'))  # True

10. Password strength validation (at least one uppercase, one lowercase, one digit, one special character)

def validate_password(password):
    pattern = r'^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$'
    return bool(re.match(pattern, password))
# Example
print(validate_password('Password@123'))  # True

11. Filename validation (no special characters)

def validate_filename(filename):
    pattern = r'^[\w,\s-]+\.[A-Za-z]{2,}$'
    return bool(re.match(pattern, filename))
# Example
print(validate_filename('example.txt'))  # True

12. Integer validation

def validate_integer(integer_str):
    pattern = r'^-?\d+$'
    return bool(re.match(pattern, integer_str))
# Example
print(validate_integer('123'))  # True
print(validate_integer('-123'))  # True

13. Float validation

def validate_float(float_str):
    pattern = r'^-?\d+(\.\d+)?$'
    return bool(re.match(pattern, float_str))
# Example
print(validate_float('123.45'))  # True
print(validate_float('-123.45'))  # True

14. Chinese postal code validation

def validate_postal_code(postal_code):
    pattern = r'^\d{6}$'
    return bool(re.match(pattern, postal_code))
# Example
print(validate_postal_code('100000'))  # True

15. HTML tag removal

def remove_html_tags(text):
    clean = re.compile('<.*?>')
    return re.sub(clean, '', text)
# Example
html_content = "Hello, world!"
print(remove_html_tags(html_content))  # Hello, world!

16. Extract domain from URL

def extract_domain(url):
    pattern = r'https?://([A-Za-z_0-9.-]+).*'
    match = re.match(pattern, url)
    if match:
        return match.group(1)
    return None
# Example
print(extract_domain("http://www.example.com/path"))  # www.example.com

17. Check for repeated words in a string

def has_repeated_words(text):
    pattern = r'\b(\w+)\s+\1\b'
    return bool(re.search(pattern, text, flags=re.IGNORECASE))
# Example
print(has_repeated_words("This is is a test."))  # True

18. IPv6 address validation

def validate_ipv6(ipv6):
    pattern = r'^((?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4})$'
    return bool(re.match(pattern, ipv6))
# Example
print(validate_ipv6('2001:0db8:85a3:0000:0000:8a2e:0370:7334'))  # True

19. Clean phone number by removing non‑digit characters

def clean_phone_number(phone):
    cleaned = re.sub(r'\D', '', phone)  # \D matches any non‑digit character
    return cleaned
# Example
print(clean_phone_number('+1-800-123-4567'))  # 18001234567

20. Extract all email addresses from text

def extract_emails(text):
    emails = re.findall(r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+', text)
    return emails
# Example
text = "Contact us at [email protected] or [email protected]"
print(extract_emails(text))  # ['[email protected]', '[email protected]']
Pythondata validationValidation Functions
Test Development Learning Exchange
Written by

Test Development Learning Exchange

Test Development Learning Exchange

0 followers
Reader feedback

How this landed with the community

login Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.