@@ -866,13 +866,166 @@ def get_smallest_cron_interval(
866866 cron_string : str ,
867867 execution_timezone : Optional [str ] = None ,
868868) -> datetime .timedelta :
869- """Find the smallest interval between cron ticks for a given cron schedule.
869+ """Find the smallest interval between cron ticks for a given cron schedule using deterministic
870+ analysis of the cron pattern.
870871
871- Uses a sampling-based approach to find the minimum interval by generating
872+ This function parses the cron string and algebraically determines the minimum interval without
873+ sampling. This is more efficient and deterministic than get_smallest_cron_interval() for most
874+ common patterns.
875+
876+ For complex patterns that cannot be analyzed deterministically (e.g., patterns with both
877+ day-of-month AND day-of-week constraints, or irregular intervals), this falls back to the
878+ sampling-based approach.
879+
880+ Args:
881+ cron_string: A cron string
882+ execution_timezone: Timezone to use for cron evaluation (only used for fallback)
883+
884+ Returns:
885+ The smallest timedelta between any two consecutive cron ticks
886+
887+ Raises:
888+ CheckError: If the cron string is invalid or not recognized by Dagster
889+ """
890+ check .invariant (
891+ is_valid_cron_string (cron_string ), desc = f"{ cron_string } must be a valid cron string"
892+ )
893+
894+ # Parse the cron string into its components: [minutes, hours, day_of_month, month, day_of_week]
895+ # Each component is a list of int or '*'
896+ cron_parts , nth_weekday_of_month , * _ = CroniterShim .expand (cron_string )
897+
898+ # If nth_weekday_of_month is used (e.g., "first Monday of the month"), fall back to sampling
899+ if nth_weekday_of_month :
900+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
901+
902+ minutes , hours , days_of_month , months , days_of_week = cron_parts
903+
904+ # Helper function to get smallest gap in a sorted list of integers
905+ def get_smallest_gap (values : list [int ], wrap_at : Optional [int ] = None ) -> Optional [int ]:
906+ """Get the smallest gap between consecutive values in a sorted list.
907+
908+ Args:
909+ values: List of integer values
910+ wrap_at: If provided, also considers wrap-around gap (e.g., 60 for minutes)
911+ """
912+ if len (values ) < 2 :
913+ return None
914+ sorted_values = sorted (values )
915+
916+ # Calculate gaps between consecutive values
917+ gaps = [sorted_values [i + 1 ] - sorted_values [i ] for i in range (len (sorted_values ) - 1 )]
918+
919+ # If wrap_at is provided, also consider the wrap-around gap
920+ if wrap_at is not None :
921+ wrap_gap = (wrap_at - sorted_values [- 1 ]) + sorted_values [0 ]
922+ gaps .append (wrap_gap )
923+
924+ return min (gaps )
925+
926+ # Determine if each field is constrained or wildcarded
927+ minutes_is_wildcard = len (minutes ) == 1 and minutes [0 ] == "*"
928+ hours_is_wildcard = len (hours ) == 1 and hours [0 ] == "*"
929+ days_of_month_is_wildcard = len (days_of_month ) == 1 and days_of_month [0 ] == "*"
930+ months_is_wildcard = len (months ) == 1 and months [0 ] == "*"
931+ days_of_week_is_wildcard = len (days_of_week ) == 1 and days_of_week [0 ] == "*"
932+
933+ # If both day_of_month and day_of_week are constrained, they use OR logic which is complex
934+ # Fall back to sampling for these cases
935+ if not days_of_month_is_wildcard and not days_of_week_is_wildcard :
936+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
937+
938+ # Extract numeric values (filter out '*')
939+ minute_values = [m for m in minutes if m != "*" ]
940+ hour_values = [h for h in hours if h != "*" ]
941+ day_of_week_values = [d for d in days_of_week if d != "*" ]
942+
943+ # Case 1: Minutes are wildcarded (* in minutes position)
944+ # This means the job runs every minute during the matching hours
945+ if minutes_is_wildcard :
946+ return datetime .timedelta (minutes = 1 )
947+
948+ # Case 2: Multiple minute values specified (e.g., "0,15,30,45")
949+ # The smallest interval is the minimum gap between minute values
950+ if len (minute_values ) > 1 :
951+ min_minute_gap = get_smallest_gap (minute_values , wrap_at = 60 )
952+ if min_minute_gap is not None :
953+ # If hours/days/months/weekdays are all wildcarded, this is the answer
954+ if (
955+ hours_is_wildcard
956+ and days_of_month_is_wildcard
957+ and months_is_wildcard
958+ and days_of_week_is_wildcard
959+ ):
960+ return datetime .timedelta (minutes = min_minute_gap )
961+ # Otherwise, we need to consider if the time constraints might make consecutive ticks
962+ # happen at different hours/days. This is complex, so fall back to sampling.
963+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
964+
965+ # Case 3: Single minute value specified (e.g., "0" or "15")
966+ # Now we need to look at the hour constraints
967+ if len (minute_values ) == 1 :
968+ # If hours are wildcarded, runs every hour at that minute
969+ if hours_is_wildcard :
970+ # Check day/month/week constraints
971+ if days_of_month_is_wildcard and months_is_wildcard and days_of_week_is_wildcard :
972+ return datetime .timedelta (hours = 1 )
973+ # If days/months/weeks are constrained, fall back to sampling
974+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
975+
976+ # Multiple hour values specified
977+ if len (hour_values ) > 1 :
978+ min_hour_gap = get_smallest_gap (hour_values , wrap_at = 24 )
979+ if min_hour_gap is not None :
980+ # If days/months/weekdays are all wildcarded, the interval is based on hours
981+ if days_of_month_is_wildcard and months_is_wildcard and days_of_week_is_wildcard :
982+ return datetime .timedelta (hours = min_hour_gap )
983+ # Otherwise, constraints might make it more complex
984+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
985+
986+ # Single hour value specified (e.g., "0 0 * * *" - daily at midnight)
987+ if len (hour_values ) == 1 :
988+ # Daily pattern: specific minute and hour, all days
989+ if days_of_month_is_wildcard and months_is_wildcard and days_of_week_is_wildcard :
990+ return datetime .timedelta (days = 1 )
991+
992+ # Weekly pattern: specific minute, hour, and day of week
993+ if days_of_month_is_wildcard and months_is_wildcard and len (day_of_week_values ) == 1 :
994+ return datetime .timedelta (days = 7 )
995+
996+ # Multiple days of week (e.g., Mon, Wed, Fri)
997+ if days_of_month_is_wildcard and months_is_wildcard and len (day_of_week_values ) > 1 :
998+ min_dow_gap = get_smallest_gap (day_of_week_values , wrap_at = 7 )
999+ if min_dow_gap is not None :
1000+ return datetime .timedelta (days = min_dow_gap )
1001+
1002+ # Monthly pattern: specific day of month
1003+ if not days_of_month_is_wildcard and months_is_wildcard and days_of_week_is_wildcard :
1004+ # For monthly patterns, the interval varies (28-31 days depending on the month)
1005+ # Fall back to sampling for accuracy
1006+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
1007+
1008+ # Complex pattern with month constraints
1009+ if not months_is_wildcard :
1010+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
1011+
1012+ # If we haven't returned yet, fall back to sampling-based approach
1013+ return _get_smallest_cron_interval_with_sampling (cron_string , execution_timezone )
1014+
1015+
1016+ def _get_smallest_cron_interval_with_sampling (
1017+ cron_string : str ,
1018+ execution_timezone : Optional [str ] = None ,
1019+ ) -> datetime .timedelta :
1020+ """Find the smallest interval between cron ticks for a given cron schedule,
1021+ using a sampling-based approach to find the minimum interval by generating
8721022 consecutive cron ticks and measuring the gaps between them. Sampling stops
8731023 early if either of these limits is reached:
8741024 - A maximum of 1000 generated ticks
875- - A time horizon of 20 years past the sampling start
1025+ - A time horizon of 20 years past the sampling start.
1026+
1027+ This is a fallback for complex patterns that cannot be analyzed deterministically,
1028+ and shouldn't be used for common patterns.
8761029
8771030 Args:
8781031 cron_string: A cron string
@@ -937,6 +1090,12 @@ def get_smallest_cron_interval(
9371090 # We've encountered a genuine zero interval (which shouldn't happen)
9381091 raise Exception ("Encountered a genuine zero interval" )
9391092
1093+ if interval < datetime .timedelta (seconds = 0 ):
1094+ # This happens when the sampling encounters a daylight savings transition where the clocks roll back
1095+ # Just skip this interval and continue sampling
1096+ prev_tick = current_tick
1097+ continue
1098+
9401099 # Update minimum interval
9411100 if min_interval is None or interval < min_interval :
9421101 min_interval = interval
0 commit comments