scalaintermediate

GroupBy, Partition, and Aggregate

Group, partition, and aggregate collections: groupBy, groupMapReduce, partition, and sliding aggregations.

scala
case class Sale(product: String, category: String, amount: Double, quarter: Int)
case class Student(name: String, grade: Int, score: Double)

@main def run(): Unit =
  val sales = List(
    Sale("Widget", "Electronics", 299.99, 1),
    Sale("Gadget", "Electronics", 149.99, 1),
    Sale("Widget", "Electronics", 299.99, 2),
    Sale("Book", "Media", 19.99, 1),
    Sale("Movie", "Media", 14.99, 1),
    Sale("Book", "Media", 24.99, 2),
    Sale("Shirt", "Clothing", 39.99, 1),
    Sale("Pants", "Clothing", 59.99, 2),
    Sale("Widget", "Electronics", 279.99, 3),
    Sale("Book", "Media", 29.99, 3)
  )

  // groupBy: Map[K, List[V]]
  val byCategory = sales.groupBy(_.category)
  println("By Category:")
  byCategory.foreach { (cat, items) =>
    println(f"  $cat: ${items.size} sales, $$${items.map(_.amount).sum}%.2f")
  }

  // groupMap: group then map values
  val productsByCategory = sales.groupMap(_.category)(_.product)
  println(s"\nProducts by category: $productsByCategory")

  // groupMapReduce: group, map, reduce in one pass
  val totalByCategory = sales.groupMapReduce(_.category)(_.amount)(_ + _)
  println(s"\nTotal by category: $totalByCategory")

  val countByProduct = sales.groupMapReduce(_.product)(_ => 1)(_ + _)
  println(s"Count by product: $countByProduct")

  // Partition: split into two
  val (highValue, lowValue) = sales.partition(_.amount > 100)
  println(s"\nHigh value: ${highValue.size}, Low value: ${lowValue.size}")

  // Multi-level grouping
  val byCategoryAndQuarter = sales.groupBy(s => (s.category, s.quarter))
  println("\nBy Category & Quarter:")
  byCategoryAndQuarter.toList.sortBy(_._1).foreach { (key, items) =>
    println(f"  ${key._1}-Q${key._2}: $$${items.map(_.amount).sum}%.2f")
  }

  // Students example
  val students = List(
    Student("Alice", 10, 92.5),
    Student("Bob", 10, 85.0),
    Student("Carol", 11, 95.0),
    Student("Dave", 11, 78.0),
    Student("Eve", 10, 88.5),
    Student("Frank", 11, 91.0)
  )

  // Average score by grade
  val avgByGrade = students
    .groupMapReduce(_.grade)(s => (s.score, 1)) {
      case ((sum1, c1), (sum2, c2)) => (sum1 + sum2, c1 + c2)
    }
    .view.mapValues((sum, count) => sum / count).toMap
  println(s"\nAvg by grade: $avgByGrade")

  // Top student per grade
  val topByGrade = students.groupBy(_.grade)
    .view.mapValues(_.maxBy(_.score)).toMap
  println(s"Top by grade: $topByGrade")

  // Sliding aggregation (moving average)
  val values = List(10.0, 20.0, 30.0, 25.0, 35.0, 40.0, 30.0, 45.0)
  val movingAvg = values.sliding(3).map(w => w.sum / w.size).toList
  println(s"\nMoving avg (window=3): ${movingAvg.map(v => f"$v%.1f")}")

  // Running total
  val running = values.scanLeft(0.0)(_ + _).tail
  println(s"Running total: $running")

  // Percentiles
  val sorted = values.sorted
  def percentile(data: List[Double], p: Double): Double =
    val idx = (p / 100.0 * (data.size - 1)).toInt
    data(idx)

  println(f"\nP50: ${percentile(sorted, 50)}%.1f")
  println(f"P90: ${percentile(sorted, 90)}%.1f")

  // Frequency distribution
  val scores = List(85, 90, 78, 92, 88, 76, 95, 82, 91, 87)
  val distribution = scores
    .groupBy(s => s / 10 * 10)
    .view.mapValues(_.size)
    .toList.sortBy(_._1)
  println(s"\nScore distribution:")
  distribution.foreach { (range, count) =>
    println(s"  $range-${range + 9}: ${"█" * count} ($count)")
  }

Use Cases

  • Data analysis and reporting
  • Statistical aggregations
  • Multi-dimensional data grouping

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.