pythonintermediate

Data Validation with Pydantic

Validate and parse data records using Pydantic models with custom validators and error reporting.

python
from datetime import date
from pydantic import BaseModel, field_validator, ValidationError


class SalesRecord(BaseModel):
    id: int
    product: str
    amount: float
    quantity: int
    sale_date: date
    region: str

    @field_validator("amount")
    @classmethod
    def amount_positive(cls, v: float) -> float:
        if v <= 0:
            raise ValueError("amount must be positive")
        return round(v, 2)

    @field_validator("quantity")
    @classmethod
    def quantity_range(cls, v: int) -> int:
        if v < 1 or v > 10000:
            raise ValueError("quantity must be between 1 and 10000")
        return v

    @field_validator("region")
    @classmethod
    def valid_region(cls, v: str) -> str:
        allowed = {"US", "EU", "APAC", "LATAM"}
        if v.upper() not in allowed:
            raise ValueError(f"region must be one of {allowed}")
        return v.upper()


def validate_batch(records: list[dict]) -> tuple[list[SalesRecord], list[dict]]:
    valid: list[SalesRecord] = []
    errors: list[dict] = []
    for i, record in enumerate(records):
        try:
            valid.append(SalesRecord(**record))
        except ValidationError as e:
            errors.append({"row": i, "data": record, "errors": e.errors()})
    return valid, errors


raw_data = [
    {"id": 1, "product": "Widget", "amount": 29.99, "quantity": 5, "sale_date": "2024-06-15", "region": "US"},
    {"id": 2, "product": "Gadget", "amount": -10, "quantity": 0, "sale_date": "invalid", "region": "XX"},
]

valid_records, error_records = validate_batch(raw_data)
print(f"Valid: {len(valid_records)}, Errors: {len(error_records)}")
for err in error_records:
    print(f"Row {err['row']}: {err['errors']}")

Use Cases

  • Validating incoming data before warehouse loading
  • Data quality checks in ETL pipelines
  • Schema enforcement for API data ingestion

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.