pythonintermediate

Nested JSON Flattening in Python

Flatten deeply nested JSON structures into flat dictionaries suitable for DataFrames or CSV export.

python
import json
import pandas as pd
from typing import Any


def flatten_json(obj: Any, prefix: str = "", sep: str = "_") -> dict:
    items: dict = {}
    if isinstance(obj, dict):
        for key, value in obj.items():
            new_key = f"{prefix}{sep}{key}" if prefix else key
            items.update(flatten_json(value, new_key, sep))
    elif isinstance(obj, list):
        for i, value in enumerate(obj):
            new_key = f"{prefix}{sep}{i}"
            items.update(flatten_json(value, new_key, sep))
    else:
        items[prefix] = obj
    return items


# Example nested JSON
nested_data = [
    {
        "id": 1,
        "user": {
            "name": "Alice",
            "address": {"city": "NYC", "zip": "10001"},
        },
        "orders": [
            {"product": "Widget", "amount": 29.99},
            {"product": "Gadget", "amount": 49.99},
        ],
    },
]

# Flatten all records
flat_records = [flatten_json(record) for record in nested_data]
df = pd.DataFrame(flat_records)

print(df.columns.tolist())
# ['id', 'user_name', 'user_address_city', 'user_address_zip',
#  'orders_0_product', 'orders_0_amount', 'orders_1_product', 'orders_1_amount']

# Alternative: pandas json_normalize for consistent structures
df_normalized = pd.json_normalize(
    nested_data,
    record_path="orders",
    meta=["id", ["user", "name"]],
    record_prefix="order_",
)
print(df_normalized)

Use Cases

  • Converting API responses to flat tables
  • Preparing nested data for database import
  • Transforming document store data for analytics

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.