pythonbeginner

Pandas Null Handling Strategies

Comprehensive strategies for detecting, filling, and handling missing values in pandas DataFrames.

python
import pandas as pd
import numpy as np

df = pd.read_csv("data.csv")

# Detect nulls
print("Null counts:")
print(df.isna().sum())
print(f"\nTotal null cells: {df.isna().sum().sum()}")
print(f"Rows with any null: {df.isna().any(axis=1).sum()}")

# Drop rows/columns with nulls
df_clean = df.dropna()                          # drop any row with nulls
df_thresh = df.dropna(thresh=3)                  # keep rows with >= 3 non-null values
df_subset = df.dropna(subset=["email", "name"])  # only check specific columns

# Fill with constants
df["category"] = df["category"].fillna("unknown")
df["score"] = df["score"].fillna(0)

# Fill with statistics
df["amount"] = df["amount"].fillna(df["amount"].median())
df["rating"] = df["rating"].fillna(df["rating"].mean())

# Forward/backward fill (time series)
df["price"] = df["price"].ffill()   # carry forward last known value
df["price"] = df["price"].bfill()   # fill backward from next known value

# Group-specific fill
df["salary"] = df.groupby("department")["salary"].transform(
    lambda x: x.fillna(x.median())
)

# Interpolate numeric values
df["temperature"] = df["temperature"].interpolate(method="linear")

# Replace sentinel values with NaN
df = df.replace({-999: np.nan, "N/A": np.nan, "": np.nan})

print(f"\nAfter cleaning: {df.isna().sum().sum()} nulls remaining")

Use Cases

  • Cleaning datasets with missing values
  • Preparing data for machine learning models
  • Handling incomplete records in ETL pipelines

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.