pythonintermediate

Pandas Memory Reduction via Dtypes

Reduce DataFrame memory by 60-80% by downcasting numeric types and using categorical columns.

python
import pandas as pd
import numpy as np

def reduce_mem(df: pd.DataFrame) -> pd.DataFrame:
    for col in df.columns:
        dtype = df[col].dtype
        if dtype == object:
            if df[col].nunique() / len(df) < 0.5:
                df[col] = df[col].astype('category')
        elif np.issubdtype(dtype, np.integer):
            df[col] = pd.to_numeric(df[col], downcast='integer')
        elif np.issubdtype(dtype, np.floating):
            df[col] = pd.to_numeric(df[col], downcast='float')
    return df

df = pd.DataFrame({'x': np.random.randint(0, 100, 10000), 'y': np.random.rand(10000)})
before = df.memory_usage(deep=True).sum() / 1e6
df = reduce_mem(df)
after = df.memory_usage(deep=True).sum() / 1e6
print(f'Memory: {before:.1f} MB -> {after:.1f} MB')

Use Cases

  • large dataset loading
  • memory-constrained environments
  • Pandas optimization

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.