JuliaData · bkamins · Sep 18, 2022 · Sep 16, 2022 · Sep 16, 2022 · Sep 17, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -76,6 +76,11 @@
 * Make `transform!` on `SubDataFrame` faster
   ([#3070](https://github.com/JuliaData/DataFrames.jl/pull/3070))
 
+## Integration changes
+
+* Support `Tables.subset` and move `ByRow` definition to Tables.jl
+  ([#3158](https://github.com/JuliaData/DataFrames.jl/pull/3158))
+
 # DataFrames.jl v1.3.4 Patch Release Notes
 
 ## Bug fixes

diff --git a/Project.toml b/Project.toml
@@ -36,7 +36,7 @@ Reexport = "0.1, 0.2, 1"
 ShiftedArrays = "1"
 SortingAlgorithms = "0.1, 0.2, 0.3, 1"
 TableTraits = "0.4, 1"
-Tables = "1.2"
+Tables = "1.8"
 Unitful = "1"
 julia = "1"
 

diff --git a/src/DataFrames.jl b/src/DataFrames.jl
@@ -9,6 +9,7 @@ import LinearAlgebra: norm
 using Markdown
 using PrettyTables
 using Random
+using Tables: ByRow
 
 import DataAPI,
        DataAPI.allcombinations,

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
@@ -309,33 +309,6 @@ function broadcast_pair(df::AbstractDataFrame, @nospecialize(p::AbstractVecOrMat
     end
 end
 
-"""
-    ByRow
-
-A type used for selection operations to signal that the wrapped function should
-be applied to each element (row) of the selection.
-
-The wrapped function is called exactly once for each element.
-This differs from `map` and `broadcast`, which assume for some types of
-source vectors (e.g. `SparseVector`) that the wrapped function is pure,
-allowing them to call the function only once for multiple equal values.
-When using such types, for maximal performance with pure functions
-which are relatively costly, use `x -> map(f, x)` instead of `ByRow(f)`.
-
-Note that `ByRow` always collects values returned by `fun` in a vector.
-"""
-struct ByRow{T} <: Function
-    fun::T
-end
-
-# invoke the generic AbstractVector function to ensure function is called
-# exactly once for each element
-(f::ByRow)(cols::AbstractVector...) =
-    invoke(map,
-           Tuple{typeof(f.fun), ntuple(i -> AbstractVector, length(cols))...},
-           f.fun, cols...)
-(f::ByRow)(table::NamedTuple) = [f.fun(nt) for nt in Tables.namedtupleiterator(table)]
-
 # add a method to funname defined in other/utils.jl
 funname(row::ByRow) = funname(row.fun)
 

diff --git a/src/other/tables.jl b/src/other/tables.jl
@@ -103,3 +103,12 @@ IteratorInterfaceExtensions.getiterator(df::AbstractDataFrame) =
     Tables.datavaluerows(Tables.columntable(df))
 IteratorInterfaceExtensions.isiterable(x::AbstractDataFrame) = true
 TableTraits.isiterabletable(x::AbstractDataFrame) = true
+
+function Tables.subset(df::AbstractDataFrame, inds; view::Union{Bool, Nothing}=nothing)
+    res = view === true ? DataFrames.view(df, inds, :) : df[inds, :]
+    if res isa DataFrameRow && view == false
+        return copy(res)
+    else
+        return res
+    end
+end
diff --git a/test/tables.jl b/test/tables.jl
@@ -323,4 +323,48 @@ end
     @test DataFrame === @inferred Tables.materializer(DataFrames.DataFrameColumns)
 end
 
+@testset "Tables.subset" begin
+    df = DataFrame(a=1:3, b=4:6)
+
+    res = Tables.subset(df, :)
+    @test res isa DataFrame
+    @test res == DataFrame(a=1:3, b=4:6)
+    res = Tables.subset(df, :, view=false)
+    @test res isa DataFrame
+    @test res == DataFrame(a=1:3, b=4:6)
+    res = Tables.subset(df, :, view=true)
+    @test res isa SubDataFrame
+    @test res == DataFrame(a=1:3, b=4:6)
+
+    res = Tables.subset(df, [3, 1])
+    @test res isa DataFrame
+    @test res == DataFrame(a=[3, 1], b=[6, 4])
+    res = Tables.subset(df, [3, 1], view=false)
+    @test res isa DataFrame
+    @test res == DataFrame(a=[3, 1], b=[6, 4])
+    res = Tables.subset(df, [3, 1], view=true)
+    @test res isa SubDataFrame
+    @test res == DataFrame(a=[3, 1], b=[6, 4])
+
+    res = Tables.subset(df, [true, false, true])
+    @test res isa DataFrame
+    @test res == DataFrame(a=[1, 3], b=[4, 6])
+    res = Tables.subset(df, [1, 3], view=false)
+    @test res isa DataFrame
+    @test res == DataFrame(a=[1, 3], b=[4, 6])
+    res = Tables.subset(df, [1, 3], view=true)
+    @test res isa SubDataFrame
+    @test res == DataFrame(a=[1, 3], b=[4, 6])
+
+    res = Tables.subset(df, 2)
+    @test res isa DataFrameRow
+    @test res == DataFrame(a=2, b=5)[1, :]
+    res = Tables.subset(df, 2, view=false)
+    @test res isa NamedTuple{(:a, :b), Tuple{Int, Int}}
+    @test res == (a=2, b=5)
+    res = Tables.subset(df, 2, view=true)
+    @test res isa DataFrameRow
+    @test res == DataFrame(a=2, b=5)[1, :]
+end
+
 end # module