scalaintermediate
GroupBy, Partition, and Aggregate
Group, partition, and aggregate collections: groupBy, groupMapReduce, partition, and sliding aggregations.
scalaPress ⌘/Ctrl + Shift + C to copy
case class Sale(product: String, category: String, amount: Double, quarter: Int)
case class Student(name: String, grade: Int, score: Double)
@main def run(): Unit =
val sales = List(
Sale("Widget", "Electronics", 299.99, 1),
Sale("Gadget", "Electronics", 149.99, 1),
Sale("Widget", "Electronics", 299.99, 2),
Sale("Book", "Media", 19.99, 1),
Sale("Movie", "Media", 14.99, 1),
Sale("Book", "Media", 24.99, 2),
Sale("Shirt", "Clothing", 39.99, 1),
Sale("Pants", "Clothing", 59.99, 2),
Sale("Widget", "Electronics", 279.99, 3),
Sale("Book", "Media", 29.99, 3)
)
// groupBy: Map[K, List[V]]
val byCategory = sales.groupBy(_.category)
println("By Category:")
byCategory.foreach { (cat, items) =>
println(f" $cat: ${items.size} sales, $$${items.map(_.amount).sum}%.2f")
}
// groupMap: group then map values
val productsByCategory = sales.groupMap(_.category)(_.product)
println(s"\nProducts by category: $productsByCategory")
// groupMapReduce: group, map, reduce in one pass
val totalByCategory = sales.groupMapReduce(_.category)(_.amount)(_ + _)
println(s"\nTotal by category: $totalByCategory")
val countByProduct = sales.groupMapReduce(_.product)(_ => 1)(_ + _)
println(s"Count by product: $countByProduct")
// Partition: split into two
val (highValue, lowValue) = sales.partition(_.amount > 100)
println(s"\nHigh value: ${highValue.size}, Low value: ${lowValue.size}")
// Multi-level grouping
val byCategoryAndQuarter = sales.groupBy(s => (s.category, s.quarter))
println("\nBy Category & Quarter:")
byCategoryAndQuarter.toList.sortBy(_._1).foreach { (key, items) =>
println(f" ${key._1}-Q${key._2}: $$${items.map(_.amount).sum}%.2f")
}
// Students example
val students = List(
Student("Alice", 10, 92.5),
Student("Bob", 10, 85.0),
Student("Carol", 11, 95.0),
Student("Dave", 11, 78.0),
Student("Eve", 10, 88.5),
Student("Frank", 11, 91.0)
)
// Average score by grade
val avgByGrade = students
.groupMapReduce(_.grade)(s => (s.score, 1)) {
case ((sum1, c1), (sum2, c2)) => (sum1 + sum2, c1 + c2)
}
.view.mapValues((sum, count) => sum / count).toMap
println(s"\nAvg by grade: $avgByGrade")
// Top student per grade
val topByGrade = students.groupBy(_.grade)
.view.mapValues(_.maxBy(_.score)).toMap
println(s"Top by grade: $topByGrade")
// Sliding aggregation (moving average)
val values = List(10.0, 20.0, 30.0, 25.0, 35.0, 40.0, 30.0, 45.0)
val movingAvg = values.sliding(3).map(w => w.sum / w.size).toList
println(s"\nMoving avg (window=3): ${movingAvg.map(v => f"$v%.1f")}")
// Running total
val running = values.scanLeft(0.0)(_ + _).tail
println(s"Running total: $running")
// Percentiles
val sorted = values.sorted
def percentile(data: List[Double], p: Double): Double =
val idx = (p / 100.0 * (data.size - 1)).toInt
data(idx)
println(f"\nP50: ${percentile(sorted, 50)}%.1f")
println(f"P90: ${percentile(sorted, 90)}%.1f")
// Frequency distribution
val scores = List(85, 90, 78, 92, 88, 76, 95, 82, 91, 87)
val distribution = scores
.groupBy(s => s / 10 * 10)
.view.mapValues(_.size)
.toList.sortBy(_._1)
println(s"\nScore distribution:")
distribution.foreach { (range, count) =>
println(s" $range-${range + 9}: ${"█" * count} ($count)")
}Use Cases
- Data analysis and reporting
- Statistical aggregations
- Multi-dimensional data grouping
Tags
Related Snippets
Similar patterns you can reuse in the same workflow.
scalabeginner
Collections Map Filter Fold Operations
Master Scala collections: map, flatMap, filter, fold, groupBy, partition, and zip operations.
Best for: Data transformation and aggregation
#scala#collections
scalabeginner
Map and HashMap Operations
Work with Scala Maps: create, update, merge, transform, and use default values.
Best for: Configuration management
#scala#map
scalabeginner
Set Operations and Algorithms
Perform set operations: union, intersection, difference, subsets, and practical set algorithms.
Best for: Deduplication and uniqueness
#scala#set
scalaintermediate
Thread-Safe Concurrent Collections
Use concurrent collections for thread-safe access: TrieMap, concurrent queues, and synchronized wrappers.
Best for: Thread-safe service registries
#scala#concurrent