11"""
22Representation of a regular expression
33"""
4- from typing import Iterable
5-
6- from pyformlang import finite_automaton
7- # pylint: disable=cyclic-import
8- import pyformlang .regular_expression .regex_objects
9- from pyformlang import cfg
10- from pyformlang .finite_automaton import State
11- # pylint: disable=cyclic-import
4+
5+ from typing import List , Iterable , Tuple , Any
6+
7+ from pyformlang .finite_automaton import Epsilon as FAEpsilon
8+ from pyformlang .finite_automaton import EpsilonNFA , State , Symbol
9+ from pyformlang .cfg .cfg import CFG , Production
10+ from pyformlang .cfg .utils import to_variable
1211from pyformlang .regular_expression .regex_reader import RegexReader
13- from pyformlang import regular_expression
12+ from pyformlang .regular_expression .python_regex import PythonRegex
13+ from pyformlang .regular_expression .regex_objects import \
14+ Epsilon as RegexEpsilon , Empty , Concatenation , Union , KleeneStar
1415
1516
1617class Regex (RegexReader ):
@@ -85,16 +86,11 @@ class Regex(RegexReader):
8586
8687 """
8788
88- def __init__ (self , regex ):
89- self .head = None
90- self .sons = None
89+ def __init__ (self , regex : str ) -> None :
9190 super ().__init__ (regex )
91+ self .sons : List [Regex ] = []
9292 self ._counter = 0
93- self ._initialize_enfa ()
94- self ._enfa = None
95-
96- def _initialize_enfa (self ):
97- self ._enfa = finite_automaton .EpsilonNFA ()
93+ self ._enfa = EpsilonNFA ()
9894
9995 def get_number_symbols (self ) -> int :
10096 """ Gives the number of symbols in the regex
@@ -139,7 +135,7 @@ def get_number_operators(self) -> int:
139135 return 1 + sum (son .get_number_operators () for son in self .sons )
140136 return 0
141137
142- def to_epsilon_nfa (self ):
138+ def to_epsilon_nfa (self ) -> EpsilonNFA :
143139 """ Transforms the regular expression into an epsilon NFA
144140
145141 Returns
@@ -154,28 +150,28 @@ def to_epsilon_nfa(self):
154150 >>> regex.to_epsilon_nfa()
155151
156152 """
157- self ._initialize_enfa ()
153+ self ._enfa = EpsilonNFA ()
158154 s_initial = self ._set_and_get_initial_state_in_enfa ()
159155 s_final = self ._set_and_get_final_state_in_enfa ()
160156 self ._process_to_enfa (s_initial , s_final )
161157 return self ._enfa
162158
163- def _set_and_get_final_state_in_enfa (self ):
159+ def _set_and_get_final_state_in_enfa (self ) -> State :
164160 s_final = self ._get_next_state_enfa ()
165161 self ._enfa .add_final_state (s_final )
166162 return s_final
167163
168- def _get_next_state_enfa (self ):
169- s_final = finite_automaton . State (self ._counter )
164+ def _get_next_state_enfa (self ) -> State :
165+ s_final = State (self ._counter )
170166 self ._counter += 1
171167 return s_final
172168
173- def _set_and_get_initial_state_in_enfa (self ):
169+ def _set_and_get_initial_state_in_enfa (self ) -> State :
174170 s_initial = self ._get_next_state_enfa ()
175171 self ._enfa .add_start_state (s_initial )
176172 return s_initial
177173
178- def _process_to_enfa (self , s_from : State , s_to : State ):
174+ def _process_to_enfa (self , s_from : State , s_to : State ) -> None :
179175 """ Internal function to add a regex to a given epsilon NFA
180176
181177 Parameters
@@ -190,29 +186,24 @@ def _process_to_enfa(self, s_from: State, s_to: State):
190186 else :
191187 self ._process_to_enfa_when_no_son (s_from , s_to )
192188
193- def _process_to_enfa_when_no_son (self , s_from , s_to ):
194- if isinstance (self .head ,
195- pyformlang .regular_expression .regex_objects .Epsilon ):
189+ def _process_to_enfa_when_no_son (self , s_from : State , s_to : State ) -> None :
190+ if isinstance (self .head , RegexEpsilon ):
196191 self ._add_epsilon_transition_in_enfa_between (s_from , s_to )
197- elif not isinstance (self .head ,
198- pyformlang .regular_expression .regex_objects .Empty ):
199- symbol = finite_automaton .Symbol (self .head .value )
192+ elif not isinstance (self .head , Empty ):
193+ symbol = Symbol (self .head .value )
200194 self ._enfa .add_transition (s_from , symbol , s_to )
201195
202- def _process_to_enfa_when_sons (self , s_from , s_to ) :
196+ def _process_to_enfa_when_sons (self , s_from : State , s_to : State ) -> None :
203197 if isinstance (
204- self .head ,
205- pyformlang .regular_expression .regex_objects .Concatenation ):
198+ self .head , Concatenation ):
206199 self ._process_to_enfa_concatenation (s_from , s_to )
207- elif isinstance (self .head ,
208- pyformlang .regular_expression .regex_objects .Union ):
200+ elif isinstance (self .head , Union ):
209201 self ._process_to_enfa_union (s_from , s_to )
210202 elif isinstance (
211- self .head ,
212- pyformlang .regular_expression .regex_objects .KleeneStar ):
203+ self .head , KleeneStar ):
213204 self ._process_to_enfa_kleene_star (s_from , s_to )
214205
215- def _process_to_enfa_kleene_star (self , s_from , s_to ) :
206+ def _process_to_enfa_kleene_star (self , s_from : State , s_to : State ) -> None :
216207 # pylint: disable=protected-access
217208 state_first = self ._get_next_state_enfa ()
218209 state_second = self ._get_next_state_enfa ()
@@ -222,30 +213,40 @@ def _process_to_enfa_kleene_star(self, s_from, s_to):
222213 self ._add_epsilon_transition_in_enfa_between (state_second , s_to )
223214 self ._process_to_enfa_son (state_first , state_second , 0 )
224215
225- def _process_to_enfa_union (self , s_from , s_to ) :
216+ def _process_to_enfa_union (self , s_from : State , s_to : State ) -> None :
226217 son_number = 0
227218 self ._create_union_branch_in_enfa (s_from , s_to , son_number )
228219 son_number = 1
229220 self ._create_union_branch_in_enfa (s_from , s_to , son_number )
230221
231- def _create_union_branch_in_enfa (self , s_from , s_to , son_number ):
222+ def _create_union_branch_in_enfa (self ,
223+ s_from : State ,
224+ s_to : State ,
225+ son_number : int ) -> None :
232226 state0 = self ._get_next_state_enfa ()
233227 state2 = self ._get_next_state_enfa ()
234228 self ._add_epsilon_transition_in_enfa_between (s_from , state0 )
235229 self ._add_epsilon_transition_in_enfa_between (state2 , s_to )
236230 self ._process_to_enfa_son (state0 , state2 , son_number )
237231
238- def _process_to_enfa_concatenation (self , s_from , s_to ):
232+ def _process_to_enfa_concatenation (self ,
233+ s_from : State ,
234+ s_to : State ) -> None :
239235 state0 = self ._get_next_state_enfa ()
240236 state1 = self ._get_next_state_enfa ()
241237 self ._add_epsilon_transition_in_enfa_between (state0 , state1 )
242238 self ._process_to_enfa_son (s_from , state0 , 0 )
243239 self ._process_to_enfa_son (state1 , s_to , 1 )
244240
245- def _add_epsilon_transition_in_enfa_between (self , state0 , state1 ):
246- self ._enfa .add_transition (state0 , finite_automaton .Epsilon (), state1 )
241+ def _add_epsilon_transition_in_enfa_between (self ,
242+ state0 : State ,
243+ state1 : State ) -> None :
244+ self ._enfa .add_transition (state0 , FAEpsilon (), state1 )
247245
248- def _process_to_enfa_son (self , s_from , s_to , index_son ):
246+ def _process_to_enfa_son (self ,
247+ s_from : State ,
248+ s_to : State ,
249+ index_son : int ) -> None :
249250 # pylint: disable=protected-access
250251 self .sons [index_son ]._counter = self ._counter
251252 self .sons [index_son ]._enfa = self ._enfa
@@ -280,7 +281,7 @@ def get_tree_str(self, depth: int = 0) -> str:
280281 temp += son .get_tree_str (depth + 1 )
281282 return temp
282283
283- def to_cfg (self , starting_symbol = "S" ) -> " CFG" :
284+ def to_cfg (self , starting_symbol : str = "S" ) -> CFG :
284285 """
285286 Turns the regex into a context-free grammar
286287
@@ -304,11 +305,12 @@ def to_cfg(self, starting_symbol="S") -> "CFG":
304305
305306 """
306307 productions , _ = self ._get_production (starting_symbol )
307- cfg_res = cfg . CFG (start_symbol = cfg . utils . to_variable (starting_symbol ),
308+ cfg_res = CFG (start_symbol = to_variable (starting_symbol ),
308309 productions = set (productions ))
309310 return cfg_res
310311
311- def _get_production (self , current_symbol , count = 0 ):
312+ def _get_production (self , current_symbol : Any , count : int = 0 ) \
313+ -> Tuple [List [Production ], int ]:
312314 next_symbols = []
313315 next_productions = []
314316 for son in self .sons :
@@ -322,7 +324,7 @@ def _get_production(self, current_symbol, count=0):
322324 next_productions += new_prods
323325 return next_productions , count
324326
325- def __repr__ (self ):
327+ def __repr__ (self ) -> str :
326328 return self .head .get_str_repr ([str (son ) for son in self .sons ])
327329
328330 def union (self , other : "Regex" ) -> "Regex" :
@@ -357,11 +359,11 @@ def union(self, other: "Regex") -> "Regex":
357359
358360 """
359361 regex = Regex ("" )
360- regex .head = pyformlang . regular_expression . regex_objects . Union ()
362+ regex .head = Union ()
361363 regex .sons = [self , other ]
362364 return regex
363365
364- def __or__ (self , other ) :
366+ def __or__ (self , other : "Regex" ) -> "Regex" :
365367 """ Makes the union with another regex
366368
367369 Parameters
@@ -427,12 +429,11 @@ def concatenate(self, other: "Regex") -> "Regex":
427429 True
428430 """
429431 regex = Regex ("" )
430- regex .head = \
431- pyformlang .regular_expression .regex_objects .Concatenation ()
432+ regex .head = Concatenation ()
432433 regex .sons = [self , other ]
433434 return regex
434435
435- def __add__ (self , other ) :
436+ def __add__ (self , other : "Regex" ) -> "Regex" :
436437 """ Concatenates a regular expression with an other one
437438
438439 Parameters
@@ -485,11 +486,11 @@ def kleene_star(self) -> "Regex":
485486
486487 """
487488 regex = Regex ("" )
488- regex .head = pyformlang . regular_expression . regex_objects . KleeneStar ()
489+ regex .head = KleeneStar ()
489490 regex .sons = [self ]
490491 return regex
491492
492- def from_string (self , regex_str : str ):
493+ def from_string (self , regex_str : str ) -> "Regex" :
493494 """ Construct a regex from a string. For internal usage.
494495
495496 Equivalent to the constructor of Regex
@@ -515,7 +516,7 @@ def from_string(self, regex_str: str):
515516 """
516517 return Regex (regex_str )
517518
518- def accepts (self , word : Iterable [str ]) -> bool :
519+ def accepts (self , word : Iterable [Any ]) -> bool :
519520 """
520521 Check if a word matches (completely) the regex
521522
@@ -545,7 +546,7 @@ def accepts(self, word: Iterable[str]) -> bool:
545546 return self ._enfa .accepts (word )
546547
547548 @classmethod
548- def from_python_regex (cls , regex ) :
549+ def from_python_regex (cls , regex : str ) -> PythonRegex :
549550 """
550551 Creates a regex from a string using the python way to write it.
551552
@@ -570,4 +571,4 @@ def from_python_regex(cls, regex):
570571 >>> Regex.from_python_regex("a+[cd]")
571572
572573 """
573- return regular_expression . PythonRegex (regex )
574+ return PythonRegex (regex )
0 commit comments