sqlintermediate

BigQuery — Partitioned and Clustered Tables

Create BigQuery tables with time partitioning and clustering for optimal query performance and cost.

sql
-- Create partitioned + clustered table
CREATE TABLE `project.dataset.events`
PARTITION BY DATE(event_timestamp)
CLUSTER BY user_id, event_type
AS
SELECT
    event_id,
    user_id,
    event_type,
    event_timestamp,
    properties
FROM `project.dataset.raw_events`;

-- Query with partition pruning (scans only relevant dates)
SELECT
    event_type,
    COUNT(*) AS event_count,
    COUNT(DISTINCT user_id) AS unique_users
FROM `project.dataset.events`
WHERE event_timestamp BETWEEN '2024-01-01' AND '2024-03-31'
  AND event_type IN ('page_view', 'purchase')
GROUP BY event_type;

-- Integer range partitioning
CREATE TABLE `project.dataset.users`
PARTITION BY RANGE_BUCKET(user_id, GENERATE_ARRAY(0, 1000000, 10000))
CLUSTER BY country, signup_date
AS SELECT * FROM `project.dataset.raw_users`;

-- Ingestion-time partitioning
CREATE TABLE `project.dataset.logs` (
    message STRING,
    severity STRING,
    timestamp TIMESTAMP
)
PARTITION BY _PARTITIONDATE;

-- Check partition metadata
SELECT
    table_name,
    partition_id,
    total_rows,
    total_logical_bytes / (1024*1024) AS size_mb
FROM `project.dataset.INFORMATION_SCHEMA.PARTITIONS`
WHERE table_name = 'events'
ORDER BY partition_id DESC
LIMIT 10;

-- Expire old partitions automatically
ALTER TABLE `project.dataset.events`
SET OPTIONS (partition_expiration_days = 365);

Use Cases

  • Optimizing BigQuery costs with partition pruning
  • High-performance analytics on time-series data
  • Data warehouse table design best practices

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.