First/Last reduction and cleanup of agg APIs (#839)

revans2 · web-flow · commit 1a2b17e02f8b · 2020-09-23T15:14:46.000-05:00
Signed-off-by: Robert (Bobby) Evans &lt;bobby@apache.org&gt;
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -348,3 +348,51 @@ def test_count_distinct_with_nan_floats(data_gen):
 
 # TODO: Literal tests
 # TODO: First and Last tests
+
+# REDUCTIONS
+
+non_nan_all_basic_gens = [byte_gen, short_gen, int_gen, long_gen,
+        # nans and -0.0 cannot work because of nan support in min/max, -0.0 == 0.0 in cudf for distinct and
+        # https://github.com/NVIDIA/spark-rapids/issues/84 in the ordering
+        FloatGen(no_nans=True, special_cases=[]), DoubleGen(no_nans=True, special_cases=[]),
+        string_gen, boolean_gen, date_gen, timestamp_gen]
+
+
+@pytest.mark.parametrize('data_gen', non_nan_all_basic_gens, ids=idfn)
+def test_generic_reductions(data_gen):
+    assert_gpu_and_cpu_are_equal_collect(
+            # Coalesce and sort are to make sure that first and last, which are non-deterministic
+            # become deterministic
+            lambda spark : binary_op_df(spark, data_gen)\
+                    .coalesce(1)\
+                    .sortWithinPartitions('b').selectExpr(
+                'min(a)',
+                'max(a)',
+                'first(a)',
+                'last(a)',
+                'count(a)',
+                'count(1)'),
+            conf = _no_nans_float_conf)
+
+@pytest.mark.parametrize('data_gen', non_nan_all_basic_gens, ids=idfn)
+def test_distinct_count_reductions(data_gen):
+    assert_gpu_and_cpu_are_equal_collect(
+            lambda spark : binary_op_df(spark, data_gen).selectExpr(
+                'count(DISTINCT a)'))
+
+@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/837')
+@pytest.mark.parametrize('data_gen', [float_gen, double_gen], ids=idfn)
+def test_distinct_float_count_reductions(data_gen):
+    assert_gpu_and_cpu_are_equal_collect(
+            lambda spark : binary_op_df(spark, data_gen).selectExpr(
+                'count(DISTINCT a)'))
+
+@approximate_float
+@pytest.mark.parametrize('data_gen', numeric_gens, ids=idfn)
+def test_arithmetic_reductions(data_gen):
+    assert_gpu_and_cpu_are_equal_collect(
+            lambda spark : unary_op_df(spark, data_gen).selectExpr(
+                'sum(a)',
+                'avg(a)'),
+            conf = _no_nans_float_conf)
+
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuWindowExpression.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuWindowExpression.scala
@@ -16,7 +16,7 @@
 
 package com.nvidia.spark.rapids
 
-import ai.rapids.cudf.{DType, Table, WindowAggregate, WindowOptions}
+import ai.rapids.cudf.{Aggregation, AggregationOverWindow, DType, Table, WindowOptions}
 import com.nvidia.spark.rapids.GpuOverrides.wrapExpr
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
@@ -243,11 +243,11 @@ object GpuWindowExpression {
   def getRowBasedWindowFrame(columnIndex : Int,
                              aggExpression : Expression,
                              windowSpec : GpuSpecifiedWindowFrame)
-  : WindowAggregate = {
+  : AggregationOverWindow = {
 
     // FIXME: Currently, only negative or 0 values are supported.
     var lower = getBoundaryValue(windowSpec.lower)
-    if(lower > 0) {
+    if (lower > 0) {
       throw new IllegalStateException(
         s"Lower-bounds ahead of current row is not supported. Found $lower")
     }
@@ -272,32 +272,33 @@ object GpuWindowExpression {
     val windowOption = WindowOptions.builder().minPeriods(1)
       .window(lower, upper).build()
 
-    aggExpression match {
+    val agg: Aggregation = aggExpression match {
       case gpuAggregateExpression : GpuAggregateExpression =>
         gpuAggregateExpression.aggregateFunction match {
-          case GpuCount(_) => WindowAggregate.count(columnIndex, windowOption)
-          case GpuSum(_) => WindowAggregate.sum(columnIndex, windowOption)
-          case GpuMin(_) => WindowAggregate.min(columnIndex, windowOption)
-          case GpuMax(_) => WindowAggregate.max(columnIndex, windowOption)
+          case GpuCount(_) => Aggregation.count()
+          case GpuSum(_) => Aggregation.sum()
+          case GpuMin(_) => Aggregation.min()
+          case GpuMax(_) => Aggregation.max()
           case anythingElse =>
             throw new UnsupportedOperationException(
               s"Unsupported aggregation: ${anythingElse.prettyName}")
         }
       case _: GpuRowNumber =>
-        // ROW_NUMBER does not depend on input column values.
-        WindowAggregate.row_number(0, windowOption)
+        // ROW_NUMBER does not depend on input column values, but it still should be fine
+        Aggregation.rowNumber()
       case anythingElse =>
         throw new UnsupportedOperationException(
           s"Unsupported window aggregation: ${anythingElse.prettyName}")
     }
+    agg.onColumn(columnIndex).overWindow(windowOption)
   }
 
   def getRangeBasedWindowFrame(aggColumnIndex : Int,
                                timeColumnIndex : Int,
                                aggExpression : Expression,
                                windowSpec : GpuSpecifiedWindowFrame,
                                timestampIsAscending : Boolean)
-  : WindowAggregate = {
+  : AggregationOverWindow = {
 
     // FIXME: Currently, only negative or 0 values are supported.
     var lower = getBoundaryValue(windowSpec.lower)
@@ -332,12 +333,12 @@ object GpuWindowExpression {
 
     val windowOption = windowOptionBuilder.build()
 
-    aggExpression match {
+    val agg: Aggregation = aggExpression match {
       case gpuAggExpression : GpuAggregateExpression => gpuAggExpression.aggregateFunction match {
-        case GpuCount(_) => WindowAggregate.count(aggColumnIndex, windowOption)
-        case GpuSum(_) => WindowAggregate.sum(aggColumnIndex, windowOption)
-        case GpuMin(_) => WindowAggregate.min(aggColumnIndex, windowOption)
-        case GpuMax(_) => WindowAggregate.max(aggColumnIndex, windowOption)
+        case GpuCount(_) => Aggregation.count()
+        case GpuSum(_) => Aggregation.sum()
+        case GpuMin(_) => Aggregation.min()
+        case GpuMax(_) => Aggregation.max()
         case anythingElse =>
           throw new UnsupportedOperationException(
             s"Unsupported aggregation: ${anythingElse.prettyName}")
@@ -346,6 +347,7 @@ object GpuWindowExpression {
         throw new UnsupportedOperationException(
           s"Unsupported window aggregation: ${anythingElse.prettyName}")
     }
+    agg.onColumn(aggColumnIndex).overWindow(windowOption)
   }
 
   def getBoundaryValue(boundary : Expression) : Int = boundary match {
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/aggregate.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/aggregate.scala
@@ -98,13 +98,6 @@ class GpuHashAggregateMeta(
       resultExpressions
 
   override def tagPlanForGpu(): Unit = {
-    if (agg.groupingExpressions.isEmpty) {
-      // first/last reductions not supported yet
-      if (agg.aggregateExpressions.exists(e => e.aggregateFunction.isInstanceOf[First] ||
-        e.aggregateFunction.isInstanceOf[Last])) {
-        willNotWorkOnGpu("First/Last reductions are not supported on GPU")
-      }
-    }
     if (agg.resultExpressions.isEmpty) {
       willNotWorkOnGpu("result expressions is empty")
     }
@@ -192,13 +185,6 @@ class GpuSortAggregateMeta(
       resultExpressions
 
   override def tagPlanForGpu(): Unit = {
-    if (agg.groupingExpressions.isEmpty) {
-      // first/last reductions not supported yet
-      if (agg.aggregateExpressions.exists(e => e.aggregateFunction.isInstanceOf[First] ||
-        e.aggregateFunction.isInstanceOf[Last])) {
-        willNotWorkOnGpu("First/Last reductions are not supported on GPU")
-      }
-    }
     if (GpuOverrides.isAnyStringLit(agg.groupingExpressions)) {
       willNotWorkOnGpu("string literal values are not supported in a hash aggregate")
     }
@@ -842,9 +828,9 @@ case class GpuHashAggregateExec(
           val aggregates = aggModeCudfAggregates.flatMap(_._2)
           val cudfAggregates = aggModeCudfAggregates.flatMap { case (mode, aggregates) =>
             if ((mode == Partial || mode == Complete) && !merge) {
-              aggregates.map(a => a.updateAggregate)
+              aggregates.map(a => a.updateAggregate.onColumn(a.getOrdinal(a.ref)))
             } else {
-              aggregates.map(a => a.mergeAggregate)
+              aggregates.map(a => a.mergeAggregate.onColumn(a.getOrdinal(a.ref)))
             }
           }
           tbl = new cudf.Table(toAggregateCvs.map(_.getBase): _*)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/AggregateFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/AggregateFunctions.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.sql.rapids
 
 import ai.rapids.cudf
+import ai.rapids.cudf.Aggregation
 import com.nvidia.spark.rapids._
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
@@ -111,7 +112,7 @@ case class GpuAggregateExpression(origAggregateFunction: GpuAggregateFunction,
                                   resultId: ExprId)
   extends GpuExpression with GpuUnevaluable {
 
-  val aggregateFunction = if (filter.isDefined) {
+  val aggregateFunction: GpuAggregateFunction = if (filter.isDefined) {
     WrappedAggFunction(origAggregateFunction, filter.get)
   } else {
     origAggregateFunction
@@ -170,8 +171,8 @@ abstract case class CudfAggregate(ref: Expression) extends GpuUnevaluable {
   def getOrdinal(ref: Expression): Int = ref.asInstanceOf[GpuBoundReference].ordinal
   val updateReductionAggregate: cudf.ColumnVector => cudf.Scalar
   val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar
-  val updateAggregate: cudf.Aggregate
-  val mergeAggregate: cudf.Aggregate
+  val updateAggregate: Aggregation
+  val mergeAggregate: Aggregation
 
   def dataType: DataType = ref.dataType
   def nullable: Boolean = ref.nullable
@@ -185,9 +186,8 @@ class CudfCount(ref: Expression) extends CudfAggregate(ref) {
     (col: cudf.ColumnVector) => cudf.Scalar.fromLong(col.getRowCount - col.getNullCount)
   override val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar =
     (col: cudf.ColumnVector) => col.sum
-  override lazy val updateAggregate: cudf.Aggregate =
-    cudf.Table.count(getOrdinal(ref), includeNulls)
-  override lazy val mergeAggregate: cudf.Aggregate = cudf.Table.sum(getOrdinal(ref))
+  override lazy val updateAggregate: Aggregation = Aggregation.count(includeNulls)
+  override lazy val mergeAggregate: Aggregation = Aggregation.sum()
   override def toString(): String = "CudfCount"
 }
 
@@ -196,8 +196,8 @@ class CudfSum(ref: Expression) extends CudfAggregate(ref) {
     (col: cudf.ColumnVector) => col.sum
   override val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar =
     (col: cudf.ColumnVector) => col.sum
-  override lazy val updateAggregate: cudf.Aggregate = cudf.Table.sum(getOrdinal(ref))
-  override lazy val mergeAggregate: cudf.Aggregate = cudf.Table.sum(getOrdinal(ref))
+  override lazy val updateAggregate: Aggregation = Aggregation.sum()
+  override lazy val mergeAggregate: Aggregation = Aggregation.sum()
   override def toString(): String = "CudfSum"
 }
 
@@ -206,8 +206,8 @@ class CudfMax(ref: Expression) extends CudfAggregate(ref) {
     (col: cudf.ColumnVector) => col.max
   override val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar =
     (col: cudf.ColumnVector) => col.max
-  override lazy val updateAggregate: cudf.Aggregate = cudf.Table.max(getOrdinal(ref))
-  override lazy val mergeAggregate: cudf.Aggregate = cudf.Table.max(getOrdinal(ref))
+  override lazy val updateAggregate: Aggregation = Aggregation.max()
+  override lazy val mergeAggregate: Aggregation = Aggregation.max()
   override def toString(): String = "CudfMax"
 }
 
@@ -216,48 +216,41 @@ class CudfMin(ref: Expression) extends CudfAggregate(ref) {
     (col: cudf.ColumnVector) => col.min
   override val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar =
     (col: cudf.ColumnVector) => col.min
-  override lazy val updateAggregate: cudf.Aggregate = cudf.Table.min(getOrdinal(ref))
-  override lazy val mergeAggregate: cudf.Aggregate = cudf.Table.min(getOrdinal(ref))
+  override lazy val updateAggregate: Aggregation = Aggregation.min()
+  override lazy val mergeAggregate: Aggregation = Aggregation.min()
   override def toString(): String = "CudfMin"
 }
 
 abstract class CudfFirstLastBase(ref: Expression) extends CudfAggregate(ref) {
+  val includeNulls: Boolean
+  val offset: Int
+
   override val updateReductionAggregate: cudf.ColumnVector => cudf.Scalar =
-    (col: cudf.ColumnVector) =>
-      throw new UnsupportedOperationException("first/last reduction not supported on GPU")
+    (col: cudf.ColumnVector) => col.reduce(Aggregation.nth(offset, includeNulls))
   override val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar =
-    (col: cudf.ColumnVector) =>
-      throw new UnsupportedOperationException("first/last reduction not supported on GPU")
+    (col: cudf.ColumnVector) => col.reduce(Aggregation.nth(offset, includeNulls))
+  override lazy val updateAggregate: Aggregation = Aggregation.nth(offset, includeNulls)
+  override lazy val mergeAggregate: Aggregation = Aggregation.nth(offset, includeNulls)
 }
 
 class CudfFirstIncludeNulls(ref: Expression) extends CudfFirstLastBase(ref) {
-  val includeNulls = true
-  override lazy val updateAggregate: cudf.Aggregate =
-    cudf.Table.first(getOrdinal(ref), includeNulls)
-  override lazy val mergeAggregate: cudf.Aggregate =
-    cudf.Table.first(getOrdinal(ref), includeNulls)
+  override val includeNulls: Boolean = true
+  override val offset: Int = 0
 }
 
 class CudfFirstExcludeNulls(ref: Expression) extends CudfFirstLastBase(ref) {
-  val includeNulls = false
-  override lazy val updateAggregate: cudf.Aggregate =
-    cudf.Table.first(getOrdinal(ref), includeNulls)
-  override lazy val mergeAggregate: cudf.Aggregate =
-    cudf.Table.first(getOrdinal(ref), includeNulls)
+  override val includeNulls: Boolean = false
+  override val offset: Int = 0
 }
 
 class CudfLastIncludeNulls(ref: Expression) extends CudfFirstLastBase(ref) {
-  val includeNulls = true
-  override lazy val updateAggregate: cudf.Aggregate =
-    cudf.Table.last(getOrdinal(ref), includeNulls)
-  override lazy val mergeAggregate: cudf.Aggregate = cudf.Table.last(getOrdinal(ref), includeNulls)
+  override val includeNulls: Boolean = true
+  override val offset: Int = -1
 }
 
 class CudfLastExcludeNulls(ref: Expression) extends CudfFirstLastBase(ref) {
-  val includeNulls = false
-  override lazy val updateAggregate: cudf.Aggregate =
-    cudf.Table.last(getOrdinal(ref), includeNulls)
-  override lazy val mergeAggregate: cudf.Aggregate = cudf.Table.last(getOrdinal(ref), includeNulls)
+  override val includeNulls: Boolean = false
+  override val offset: Int = -1
 }
 
 abstract class GpuDeclarativeAggregate extends GpuAggregateFunction with GpuUnevaluable {
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregatesSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregatesSuite.scala
@@ -90,23 +90,6 @@ class HashAggregatesSuite extends SparkQueryCompareTestSuite {
       .agg(first(col("c0"), ignoreNulls = true), last(col("c0"), ignoreNulls = true))
   }
 
-  testExpectedExceptionStartsWith("test unsorted agg with first and last no grouping",
-    classOf[IllegalArgumentException],
-    "Part of the plan is not columnar", firstDf, repart = 2) {
-    frame => frame
-      .coalesce(1)
-      .agg(first(col("c0"), ignoreNulls = true), last(col("c0"), ignoreNulls = true))
-  }
-
-  testExpectedExceptionStartsWith("test sorted agg with first and last no grouping",
-    classOf[IllegalArgumentException],
-    "Part of the plan is not columnar", firstDf, repart = 2) {
-    frame => frame
-      .coalesce(1)
-      .sort(col("c2").asc, col("c0").asc) // force deterministic use case
-      .agg(first(col("c0"), ignoreNulls = true), last(col("c0"), ignoreNulls = true))
-  }
-
   IGNORE_ORDER_testSparkResultsAreEqualWithCapture(
       "nullable aggregate with not null filter",
       firstDf,
@@ -733,20 +716,6 @@ class HashAggregatesSuite extends SparkQueryCompareTestSuite {
     frame => frame.groupBy(col("more_longs") + col("longs")).agg(min("longs"))
   }
 
-  testExpectedExceptionStartsWith("first without grouping",
-    classOf[IllegalArgumentException],
-    "Part of the plan is not columnar",
-    intCsvDf) {
-    frame => frame.agg(first("ints", false))
-  }
-
-  testExpectedExceptionStartsWith("last without grouping",
-    classOf[IllegalArgumentException],
-    "Part of the plan is not columnar",
-    intCsvDf) {
-    frame => frame.agg(first("ints", false))
-  }
-
   IGNORE_ORDER_testSparkResultsAreEqual("first ignoreNulls=false", intCsvDf) {
     frame => frame.groupBy(col("more_ints")).agg(first("ints", false))
   }