Skip to content

Commit 23a28b1

Browse files
authored
add Iterators.partition for DataFrameRows (#3299)
1 parent e52b9a3 commit 23a28b1

3 files changed

Lines changed: 83 additions & 0 deletions

File tree

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## New functionalities
44

5+
* Add `Iterators.partition` support for `DataFrameRows`
6+
([#3299](https://github.com/JuliaData/DataFrames.jl/pull/3299))
57
* `DataFrameRows` and `DataFrameColumns` now support
68
`nrow`, `ncol`, and `Tables.subset`
79
([#3311](https://github.com/JuliaData/DataFrames.jl/pull/3311))

src/abstractdataframe/iteration.jl

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,66 @@ Compat.hasproperty(itr::DataFrameRows, s::AbstractString) = haskey(index(parent(
9595
# Private fields are never exposed since they can conflict with column names
9696
Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(parent(itr))
9797

98+
"""
99+
Iterators.partition(dfr::DataFrameRows, n::Integer)
100+
101+
Iterate over `DataFrameRows` `dfr` `n` rows at a time, returning each block
102+
as a `DataFrameRows` over a view of rows of parent of `dfr`.
103+
104+
# Examples
105+
106+
```jldoctest
107+
julia> collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2))
108+
3-element Vector{DataFrames.DataFrameRows{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}}:
109+
2×1 DataFrameRows
110+
Row │ x
111+
│ Int64
112+
─────┼───────
113+
1 │ 1
114+
2 │ 2
115+
2×1 DataFrameRows
116+
Row │ x
117+
│ Int64
118+
─────┼───────
119+
1 │ 3
120+
2 │ 4
121+
1×1 DataFrameRows
122+
Row │ x
123+
│ Int64
124+
─────┼───────
125+
1 │ 5
126+
```
127+
"""
128+
function Iterators.partition(dfr::DataFrameRows, n::Integer)
129+
n < 1 && throw(ArgumentError("cannot create partitions of length $n"))
130+
return Iterators.PartitionIterator(dfr, Int(n))
131+
end
132+
133+
# use autodetection of eltype
134+
Base.IteratorEltype(::Type{<:Iterators.PartitionIterator{<:DataFrameRows}}) =
135+
Base.EltypeUnknown()
136+
137+
# we do not need to be overly specific here as we rely on autodetection of eltype
138+
# this method is needed only to override the fallback for `PartitionIterator`
139+
Base.eltype(::Type{<:Iterators.PartitionIterator{<:DataFrameRows}}) =
140+
DataFrameRows
141+
142+
Base.IteratorSize(::Type{<:Iterators.PartitionIterator{<:DataFrameRows}}) =
143+
Base.HasLength()
144+
145+
function Base.length(itr::Iterators.PartitionIterator{<:DataFrameRows})
146+
l = nrow(parent(itr.c))
147+
return cld(l, itr.n)
148+
end
149+
150+
function Base.iterate(itr::Iterators.PartitionIterator{<:DataFrameRows}, state::Int=1)
151+
df = parent(itr.c)
152+
last_idx = nrow(df)
153+
state > last_idx && return nothing
154+
r = min(state + itr.n - 1, last_idx)
155+
return eachrow(view(df, state:r, :)), r + 1
156+
end
157+
98158
# Iteration by columns
99159

100160
const DATAFRAMECOLUMNS_DOCSTR = """

test/dataframe.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2311,13 +2311,34 @@ end
23112311
@test all(v -> v isa SubDataFrame, res)
23122312
@test_throws ArgumentError Iterators.partition(df, false)
23132313
@test_throws ArgumentError Iterators.partition(df, -1)
2314+
2315+
dfr = eachrow(df)
2316+
p = Iterators.partition(dfr, 2)
2317+
@test p isa Iterators.PartitionIterator
2318+
@test Tables.partitions(p) === p
2319+
@test eltype(p) === DataFrames.DataFrameRows
2320+
@test Base.IteratorEltype(typeof(p)) === Base.EltypeUnknown()
2321+
@test length(p) == 3
2322+
@test Base.IteratorSize(typeof(p)) === Base.HasLength()
2323+
res = collect(p)
2324+
@test res == eachrow.([DataFrame(x=1:2), DataFrame(x=3:4), DataFrame(x=5)])
2325+
@test all(v -> v isa DataFrames.DataFrameRows, res)
2326+
@test_throws ArgumentError Iterators.partition(df, false)
2327+
@test_throws ArgumentError Iterators.partition(df, -1)
23142328
end
23152329
p = Iterators.partition(DataFrame(), 1)
23162330
@test p isa Iterators.PartitionIterator
23172331
@test Tables.partitions(p) === p
23182332
@test isempty(p)
23192333
@test length(p) == 0
23202334
@test eltype(collect(p)) <: SubDataFrame
2335+
2336+
p = Iterators.partition(eachrow(DataFrame()), 1)
2337+
@test p isa Iterators.PartitionIterator
2338+
@test Tables.partitions(p) === p
2339+
@test isempty(p)
2340+
@test length(p) == 0
2341+
@test eltype(collect(p)) <: DataFrames.DataFrameRows
23212342
end
23222343

23232344
end # module

0 commit comments

Comments
 (0)