Skip to content

Commit 243de3f

Browse files
authored
add chunk_ranges function to iterutils (#312)
1 parent 40a7b47 commit 243de3f

File tree

3 files changed

+75
-3
lines changed

3 files changed

+75
-3
lines changed

boltons/iterutils.py

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,13 @@ def chunked(src, size, count=None, **kw):
323323
return list(itertools.islice(chunk_iter, count))
324324

325325

326+
def _validate_positive_int(value, name, strictly_positive=True):
327+
value = int(value)
328+
if value < 0 or (strictly_positive and value == 0):
329+
raise ValueError('expected a positive integer ' + name)
330+
return value
331+
332+
326333
def chunked_iter(src, size, **kw):
327334
"""Generates *size*-sized chunks from *src* iterable. Unless the
328335
optional *fill* keyword argument is provided, iterables not evenly
@@ -339,9 +346,7 @@ def chunked_iter(src, size, **kw):
339346
# TODO: add count kwarg?
340347
if not is_iterable(src):
341348
raise TypeError('expected an iterable')
342-
size = int(size)
343-
if size <= 0:
344-
raise ValueError('expected a positive integer chunk size')
349+
size = _validate_positive_int(size, 'chunk size')
345350
do_fill = True
346351
try:
347352
fill_val = kw.pop('fill')
@@ -369,6 +374,56 @@ def chunked_iter(src, size, **kw):
369374
return
370375

371376

377+
def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):
378+
"""Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.
379+
Optionally, a start of the input can be set via *input_offset*, and
380+
and overlap between the chunks may be specified via *overlap_size*.
381+
Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*
382+
are always at the beginning of the chunk.
383+
384+
Returns an iterator of (start, end) tuples, one tuple per chunk.
385+
386+
>>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))
387+
[(10, 15), (15, 20)]
388+
>>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))
389+
[(10, 15), (14, 19), (18, 20)]
390+
>>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))
391+
[(10, 15), (13, 18), (16, 20)]
392+
393+
>>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))
394+
[(4, 9), (9, 14), (14, 19)]
395+
>>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))
396+
[(4, 5), (5, 10), (10, 15), (15, 19)]
397+
398+
>>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))
399+
[(2, 7), (6, 11), (10, 15), (14, 17)]
400+
>>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))
401+
[(2, 5), (4, 9), (8, 13), (12, 17)]
402+
>>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))
403+
[(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]
404+
"""
405+
input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)
406+
chunk_size = _validate_positive_int(chunk_size, 'chunk_size')
407+
input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)
408+
overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)
409+
410+
input_stop = input_offset + input_size
411+
412+
if align:
413+
initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)
414+
if initial_chunk_len != overlap_size:
415+
yield (input_offset, min(input_offset + initial_chunk_len, input_stop))
416+
if input_offset + initial_chunk_len >= input_stop:
417+
return
418+
input_offset = input_offset + initial_chunk_len - overlap_size
419+
420+
for i in range(input_offset, input_stop, chunk_size - overlap_size):
421+
yield (i, min(i + chunk_size, input_stop))
422+
423+
if i + chunk_size >= input_stop:
424+
return
425+
426+
372427
def pairwise(src):
373428
"""Convenience function for calling :func:`windowed` on *src*, with
374429
*size* set to 2.

docs/iterutils.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ present in the standard library.
1818

1919
.. autofunction:: chunked
2020
.. autofunction:: chunked_iter
21+
.. autofunction:: chunk_ranges
2122
.. autofunction:: pairwise
2223
.. autofunction:: pairwise_iter
2324
.. autofunction:: windowed

tests/test_iterutils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,22 @@ def test_chunked_bytes():
511511
assert chunked(b'123', 2) in (['12', '3'], [b'12', b'3'])
512512

513513

514+
def test_chunk_ranges():
515+
from boltons.iterutils import chunk_ranges
516+
517+
assert list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5)) == [(10, 15), (15, 20)]
518+
assert list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1)) == [(10, 15), (14, 19), (18, 20)]
519+
assert list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2)) == [(10, 15), (13, 18), (16, 20)]
520+
521+
assert list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False)) == [(4, 9), (9, 14), (14, 19)]
522+
assert list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True)) == [(4, 5), (5, 10), (10, 15), (15, 19)]
523+
524+
assert list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False)) == [(2, 7), (6, 11), (10, 15), (14, 17)]
525+
assert list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True)) == [(2, 5), (4, 9), (8, 13), (12, 17)]
526+
assert list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True)) == [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]
527+
assert list(chunk_ranges(input_offset=3, input_size=2, chunk_size=5, overlap_size=1, align=True)) == [(3, 5)]
528+
529+
514530
def test_lstrip():
515531
from boltons.iterutils import lstrip
516532

0 commit comments

Comments
 (0)