sqladvanced

Slowly Changing Dimension Type 2 in SQL

Implement SCD Type 2 to track historical changes in dimension tables with effective date ranges.

sql
-- SCD Type 2 dimension table
CREATE TABLE dim_customer (
    surrogate_key SERIAL PRIMARY KEY,
    customer_id INT NOT NULL,
    name TEXT NOT NULL,
    email TEXT,
    region TEXT,
    effective_from DATE NOT NULL,
    effective_to DATE DEFAULT '9999-12-31',
    is_current BOOLEAN DEFAULT TRUE
);

CREATE INDEX idx_dim_customer_current ON dim_customer (customer_id) WHERE is_current;

-- Merge procedure: expire old row, insert new row
WITH changes AS (
    SELECT s.customer_id, s.name, s.email, s.region
    FROM staging.customers s
    JOIN dim_customer d ON s.customer_id = d.customer_id AND d.is_current
    WHERE s.name != d.name OR s.email != d.email OR s.region != d.region
)
-- Step 1: Expire current rows
UPDATE dim_customer d
SET effective_to = CURRENT_DATE - 1, is_current = FALSE
FROM changes c
WHERE d.customer_id = c.customer_id AND d.is_current;

-- Step 2: Insert new versions
INSERT INTO dim_customer (customer_id, name, email, region, effective_from)
SELECT customer_id, name, email, region, CURRENT_DATE
FROM changes;

-- Step 3: Insert brand new customers
INSERT INTO dim_customer (customer_id, name, email, region, effective_from)
SELECT s.customer_id, s.name, s.email, s.region, CURRENT_DATE
FROM staging.customers s
LEFT JOIN dim_customer d ON s.customer_id = d.customer_id
WHERE d.customer_id IS NULL;

-- Query: get customer state at a specific date
SELECT * FROM dim_customer
WHERE customer_id = 123
  AND '2024-06-15' BETWEEN effective_from AND effective_to;

Use Cases

  • Tracking customer attribute changes over time
  • Data warehouse dimension management
  • Historical reporting and point-in-time queries

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.