@@ -323,6 +323,13 @@ def chunked(src, size, count=None, **kw):
323323 return list (itertools .islice (chunk_iter , count ))
324324
325325
326+ def _validate_positive_int (value , name , strictly_positive = True ):
327+ value = int (value )
328+ if value < 0 or (strictly_positive and value == 0 ):
329+ raise ValueError ('expected a positive integer ' + name )
330+ return value
331+
332+
326333def chunked_iter (src , size , ** kw ):
327334 """Generates *size*-sized chunks from *src* iterable. Unless the
328335 optional *fill* keyword argument is provided, iterables not evenly
@@ -339,9 +346,7 @@ def chunked_iter(src, size, **kw):
339346 # TODO: add count kwarg?
340347 if not is_iterable (src ):
341348 raise TypeError ('expected an iterable' )
342- size = int (size )
343- if size <= 0 :
344- raise ValueError ('expected a positive integer chunk size' )
349+ size = _validate_positive_int (size , 'chunk size' )
345350 do_fill = True
346351 try :
347352 fill_val = kw .pop ('fill' )
@@ -369,6 +374,56 @@ def chunked_iter(src, size, **kw):
369374 return
370375
371376
377+ def chunk_ranges (input_size , chunk_size , input_offset = 0 , overlap_size = 0 , align = False ):
378+ """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.
379+ Optionally, a start of the input can be set via *input_offset*, and
380+ and overlap between the chunks may be specified via *overlap_size*.
381+ Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*
382+ are always at the beginning of the chunk.
383+
384+ Returns an iterator of (start, end) tuples, one tuple per chunk.
385+
386+ >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))
387+ [(10, 15), (15, 20)]
388+ >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))
389+ [(10, 15), (14, 19), (18, 20)]
390+ >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))
391+ [(10, 15), (13, 18), (16, 20)]
392+
393+ >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))
394+ [(4, 9), (9, 14), (14, 19)]
395+ >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))
396+ [(4, 5), (5, 10), (10, 15), (15, 19)]
397+
398+ >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))
399+ [(2, 7), (6, 11), (10, 15), (14, 17)]
400+ >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))
401+ [(2, 5), (4, 9), (8, 13), (12, 17)]
402+ >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))
403+ [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]
404+ """
405+ input_size = _validate_positive_int (input_size , 'input_size' , strictly_positive = False )
406+ chunk_size = _validate_positive_int (chunk_size , 'chunk_size' )
407+ input_offset = _validate_positive_int (input_offset , 'input_offset' , strictly_positive = False )
408+ overlap_size = _validate_positive_int (overlap_size , 'overlap_size' , strictly_positive = False )
409+
410+ input_stop = input_offset + input_size
411+
412+ if align :
413+ initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size )
414+ if initial_chunk_len != overlap_size :
415+ yield (input_offset , min (input_offset + initial_chunk_len , input_stop ))
416+ if input_offset + initial_chunk_len >= input_stop :
417+ return
418+ input_offset = input_offset + initial_chunk_len - overlap_size
419+
420+ for i in range (input_offset , input_stop , chunk_size - overlap_size ):
421+ yield (i , min (i + chunk_size , input_stop ))
422+
423+ if i + chunk_size >= input_stop :
424+ return
425+
426+
372427def pairwise (src ):
373428 """Convenience function for calling :func:`windowed` on *src*, with
374429 *size* set to 2.
0 commit comments