Skip to content

Commit 1b0ae4e

Browse files
committed
refactor and tests for interval helper method
1 parent 2284f7b commit 1b0ae4e

File tree

2 files changed

+174
-30
lines changed

2 files changed

+174
-30
lines changed

python_modules/dagster/dagster/_utils/schedules.py

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -862,17 +862,35 @@ def get_next_cron_tick(
862862
return next(cron_iter)
863863

864864

865+
def _get_smallest_gap(values: list[int], wrap_at: Optional[int] = None) -> Optional[int]:
866+
"""Get the smallest gap between consecutive values in a sorted list.
867+
868+
Args:
869+
values: List of integer values
870+
wrap_at: If provided, also considers wrap-around gap (e.g., 60 for minutes)
871+
"""
872+
if len(values) < 2:
873+
return None
874+
sorted_values = sorted(values)
875+
876+
# Calculate gaps between consecutive values
877+
gaps = [sorted_values[i + 1] - sorted_values[i] for i in range(len(sorted_values) - 1)]
878+
879+
# If wrap_at is provided, also consider the wrap-around gap
880+
if wrap_at is not None:
881+
wrap_gap = (wrap_at - sorted_values[-1]) + sorted_values[0]
882+
gaps.append(wrap_gap)
883+
884+
return min(gaps)
885+
886+
865887
def get_smallest_cron_interval(
866888
cron_string: str,
867889
execution_timezone: Optional[str] = None,
868890
) -> datetime.timedelta:
869891
"""Find the smallest interval between cron ticks for a given cron schedule using deterministic
870892
analysis of the cron pattern.
871893
872-
This function parses the cron string and algebraically determines the minimum interval without
873-
sampling. This is more efficient and deterministic than get_smallest_cron_interval() for most
874-
common patterns.
875-
876894
For complex patterns that cannot be analyzed deterministically (e.g., patterns with both
877895
day-of-month AND day-of-week constraints, or irregular intervals), this falls back to the
878896
sampling-based approach.
@@ -892,7 +910,6 @@ def get_smallest_cron_interval(
892910
)
893911

894912
# Parse the cron string into its components: [minutes, hours, day_of_month, month, day_of_week]
895-
# Each component is a list of int or '*'
896913
cron_parts, nth_weekday_of_month, *_ = CroniterShim.expand(cron_string)
897914

898915
# If nth_weekday_of_month is used (e.g., "first Monday of the month"), fall back to sampling
@@ -901,28 +918,6 @@ def get_smallest_cron_interval(
901918

902919
minutes, hours, days_of_month, months, days_of_week = cron_parts
903920

904-
# Helper function to get smallest gap in a sorted list of integers
905-
def get_smallest_gap(values: list[int], wrap_at: Optional[int] = None) -> Optional[int]:
906-
"""Get the smallest gap between consecutive values in a sorted list.
907-
908-
Args:
909-
values: List of integer values
910-
wrap_at: If provided, also considers wrap-around gap (e.g., 60 for minutes)
911-
"""
912-
if len(values) < 2:
913-
return None
914-
sorted_values = sorted(values)
915-
916-
# Calculate gaps between consecutive values
917-
gaps = [sorted_values[i + 1] - sorted_values[i] for i in range(len(sorted_values) - 1)]
918-
919-
# If wrap_at is provided, also consider the wrap-around gap
920-
if wrap_at is not None:
921-
wrap_gap = (wrap_at - sorted_values[-1]) + sorted_values[0]
922-
gaps.append(wrap_gap)
923-
924-
return min(gaps)
925-
926921
# Determine if each field is constrained or wildcarded
927922
minutes_is_wildcard = len(minutes) == 1 and minutes[0] == "*"
928923
hours_is_wildcard = len(hours) == 1 and hours[0] == "*"
@@ -948,7 +943,7 @@ def get_smallest_gap(values: list[int], wrap_at: Optional[int] = None) -> Option
948943
# Case 2: Multiple minute values specified (e.g., "0,15,30,45")
949944
# The smallest interval is the minimum gap between minute values
950945
if len(minute_values) > 1:
951-
min_minute_gap = get_smallest_gap(minute_values, wrap_at=60)
946+
min_minute_gap = _get_smallest_gap(minute_values, wrap_at=60)
952947
if min_minute_gap is not None:
953948
# If hours/days/months/weekdays are all wildcarded, this is the answer
954949
if (
@@ -975,7 +970,7 @@ def get_smallest_gap(values: list[int], wrap_at: Optional[int] = None) -> Option
975970

976971
# Multiple hour values specified
977972
if len(hour_values) > 1:
978-
min_hour_gap = get_smallest_gap(hour_values, wrap_at=24)
973+
min_hour_gap = _get_smallest_gap(hour_values, wrap_at=24)
979974
if min_hour_gap is not None:
980975
# If days/months/weekdays are all wildcarded, the interval is based on hours
981976
if days_of_month_is_wildcard and months_is_wildcard and days_of_week_is_wildcard:
@@ -995,7 +990,7 @@ def get_smallest_gap(values: list[int], wrap_at: Optional[int] = None) -> Option
995990

996991
# Multiple days of week (e.g., Mon, Wed, Fri)
997992
if days_of_month_is_wildcard and months_is_wildcard and len(day_of_week_values) > 1:
998-
min_dow_gap = get_smallest_gap(day_of_week_values, wrap_at=7)
993+
min_dow_gap = _get_smallest_gap(day_of_week_values, wrap_at=7)
999994
if min_dow_gap is not None:
1000995
return datetime.timedelta(days=min_dow_gap)
1001996

python_modules/dagster/dagster_tests/scheduler_tests/test_get_smallest_cron_interval.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,155 @@ def test_comparison_with_sampling_leap_year():
509509
assert deterministic.days >= 365
510510

511511

512+
# ==============================================================================
513+
# Test 12: _get_smallest_gap helper function tests
514+
# ==============================================================================
515+
516+
517+
def test_get_smallest_gap_two_values():
518+
"""Test _get_smallest_gap with exactly two values."""
519+
from dagster._utils.schedules import _get_smallest_gap
520+
521+
# Simple case: [5, 10] -> gap is 5
522+
assert _get_smallest_gap([5, 10]) == 5
523+
524+
# Reverse order (should still work due to sorting): [10, 5] -> gap is 5
525+
assert _get_smallest_gap([10, 5]) == 5
526+
527+
# Large gap: [0, 50] -> gap is 50
528+
assert _get_smallest_gap([0, 50]) == 50
529+
530+
531+
def test_get_smallest_gap_multiple_values():
532+
"""Test _get_smallest_gap with multiple values."""
533+
from dagster._utils.schedules import _get_smallest_gap
534+
535+
# Three values: [0, 15, 30] -> gaps are 15, 15 -> min is 15
536+
assert _get_smallest_gap([0, 15, 30]) == 15
537+
538+
# Irregular gaps: [0, 5, 15] -> gaps are 5, 10 -> min is 5
539+
assert _get_smallest_gap([0, 5, 15]) == 5
540+
541+
# Gaps at the end: [0, 10, 20, 25] -> gaps are 10, 10, 5 -> min is 5
542+
assert _get_smallest_gap([0, 10, 20, 25]) == 5
543+
544+
# Unsorted input: [30, 0, 15] -> sorted to [0, 15, 30] -> gaps are 15, 15 -> min is 15
545+
assert _get_smallest_gap([30, 0, 15]) == 15
546+
547+
548+
def test_get_smallest_gap_with_wrap_around():
549+
"""Test _get_smallest_gap with wrap-around at specified boundary."""
550+
from dagster._utils.schedules import _get_smallest_gap
551+
552+
# Wrap-around for minutes: [50, 5, 20] with wrap_at=60
553+
# Sorted: [5, 20, 50]
554+
# Gaps: 20-5=15, 50-20=30, wrap: (60-50)+5=15
555+
# Min: 15
556+
assert _get_smallest_gap([50, 5, 20], wrap_at=60) == 15
557+
558+
# Wrap-around for minutes: [55, 5] with wrap_at=60
559+
# Gaps: 5-55 wrap = (60-55)+5=10
560+
assert _get_smallest_gap([55, 5], wrap_at=60) == 10
561+
562+
# Wrap-around for minutes: [0, 59] with wrap_at=60
563+
# Sorted: [0, 59]
564+
# Gaps: 59-0=59, wrap: (60-59)+0=1
565+
# Min: 1
566+
assert _get_smallest_gap([0, 59], wrap_at=60) == 1
567+
568+
# Wrap-around for hours: [22, 2] with wrap_at=24
569+
# Sorted: [2, 22]
570+
# Gaps: 22-2=20, wrap: (24-22)+2=4
571+
# Min: 4
572+
assert _get_smallest_gap([22, 2], wrap_at=24) == 4
573+
574+
# Wrap-around for hours: [20, 4, 12] with wrap_at=24
575+
# Sorted: [4, 12, 20]
576+
# Gaps: 12-4=8, 20-12=8, wrap: (24-20)+4=8
577+
# Min: 8
578+
assert _get_smallest_gap([20, 4, 12], wrap_at=24) == 8
579+
580+
# Wrap-around for days of week: [6, 1] with wrap_at=7 (Saturday to Monday)
581+
# Sorted: [1, 6]
582+
# Gaps: 6-1=5, wrap: (7-6)+1=2
583+
# Min: 2
584+
assert _get_smallest_gap([6, 1], wrap_at=7) == 2
585+
586+
# Wrap-around for days of week: [0, 5] with wrap_at=7 (Sunday and Friday)
587+
# Sorted: [0, 5]
588+
# Gaps: 5-0=5, wrap: (7-5)+0=2
589+
# Min: 2
590+
assert _get_smallest_gap([0, 5], wrap_at=7) == 2
591+
592+
# Wrap-around for days of week: [4, 0] with wrap_at=7 (Thursday and Sunday)
593+
# Sorted: [0, 4]
594+
# Gaps: 4-0=4, wrap: (7-4)+0=3
595+
# Min: 3
596+
assert _get_smallest_gap([4, 0], wrap_at=7) == 3
597+
598+
599+
def test_get_smallest_gap_edge_cases():
600+
"""Test _get_smallest_gap with edge cases."""
601+
from dagster._utils.schedules import _get_smallest_gap
602+
603+
# Single value: returns None (no gaps possible)
604+
assert _get_smallest_gap([5]) is None
605+
606+
# Empty list: returns None
607+
assert _get_smallest_gap([]) is None
608+
609+
# Consecutive integers: [0, 1, 2, 3] -> all gaps are 1
610+
assert _get_smallest_gap([0, 1, 2, 3]) == 1
611+
612+
# All same value (after sorting duplicate scenario): [5, 5] -> gap is 0
613+
assert _get_smallest_gap([5, 5]) == 0
614+
615+
616+
def test_get_smallest_gap_wrap_around_equal_to_non_wrap():
617+
"""Test cases where wrap-around gap equals non-wrap gap."""
618+
from dagster._utils.schedules import _get_smallest_gap
619+
620+
# [0, 30] with wrap_at=60
621+
# Gaps: 30-0=30, wrap: (60-30)+0=30
622+
# Min: 30
623+
assert _get_smallest_gap([0, 30], wrap_at=60) == 30
624+
625+
# [0, 12] with wrap_at=24
626+
# Gaps: 12-0=12, wrap: (24-12)+0=12
627+
# Min: 12
628+
assert _get_smallest_gap([0, 12], wrap_at=24) == 12
629+
630+
631+
def test_get_smallest_gap_real_world_cron_scenarios():
632+
"""Test _get_smallest_gap with real-world cron schedule scenarios."""
633+
from dagster._utils.schedules import _get_smallest_gap
634+
635+
# Every 15 minutes: 0, 15, 30, 45
636+
assert _get_smallest_gap([0, 15, 30, 45], wrap_at=60) == 15
637+
638+
# Every 5 minutes: 0, 5, 10, 15, 20, ...
639+
assert _get_smallest_gap([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55], wrap_at=60) == 5
640+
641+
# Business hours (9 AM to 5 PM): 9, 10, 11, 12, 13, 14, 15, 16, 17
642+
assert _get_smallest_gap([9, 10, 11, 12, 13, 14, 15, 16, 17]) == 1
643+
644+
# Twice daily: midnight and noon [0, 12]
645+
assert _get_smallest_gap([0, 12], wrap_at=24) == 12
646+
647+
# Weekdays: Mon(1), Tue(2), Wed(3), Thu(4), Fri(5)
648+
# Gaps: all 1, but wrap from Fri to Mon is (7-5)+1 = 3
649+
assert _get_smallest_gap([1, 2, 3, 4, 5], wrap_at=7) == 1
650+
651+
# Mon, Wed, Fri: 1, 3, 5
652+
# Gaps: 3-1=2, 5-3=2, wrap: (7-5)+1=3
653+
assert _get_smallest_gap([1, 3, 5], wrap_at=7) == 2
654+
655+
# Weekend: Sat(6), Sun(0)
656+
# Sorted: [0, 6]
657+
# Gaps: 6-0=6, wrap: (7-6)+0=1
658+
assert _get_smallest_gap([0, 6], wrap_at=7) == 1
659+
660+
512661
def test_comparison_with_sampling_complex_patterns():
513662
"""Test complex patterns match between methods."""
514663
complex_patterns = [

0 commit comments

Comments
 (0)