11"""Feature Context-Free Grammar"""
2- import string
3- from typing import Iterable , AbstractSet
42
5- from pyformlang .cfg import CFG , Terminal , Epsilon , Variable
6- from pyformlang .cfg .cfg import is_special_text , EPSILON_SYMBOLS , NotParsableException
3+ from typing import List , Set , Tuple , AbstractSet , Iterable , Optional , Any
4+ from string import ascii_uppercase
5+
6+ from pyformlang .cfg import CFG , Terminal , Variable , Production , Epsilon
7+ from pyformlang .cfg .cfg import is_special_text , \
8+ EPSILON_SYMBOLS , NotParsableException
9+ from pyformlang .cfg .cfg_object import CFGObject
710from pyformlang .cfg .parse_tree import ParseTree
811from pyformlang .cfg .utils import to_terminal
912from pyformlang .fcfg .feature_production import FeatureProduction
10- from pyformlang .fcfg .feature_structure import FeatureStructure , FeatureStructuresNotCompatibleException
13+ from pyformlang .fcfg .feature_structure import FeatureStructure , \
14+ FeatureStructuresNotCompatibleException
1115from pyformlang .fcfg .state import State , StateProcessed
1216
1317
@@ -58,21 +62,28 @@ def __init__(self,
5862 variables : AbstractSet [Variable ] = None ,
5963 terminals : AbstractSet [Terminal ] = None ,
6064 start_symbol : Variable = None ,
61- productions : Iterable [FeatureProduction ] = None ):
65+ productions : Iterable [FeatureProduction ] = None ) -> None :
6266 super ().__init__ (variables , terminals , start_symbol , productions )
67+ self ._productions : Set [FeatureProduction ] # type: ignore
6368
64- def __predictor (self , state , chart , processed ):
69+ def __predictor (self ,
70+ state : State ,
71+ chart : List [List [State ]],
72+ processed : StateProcessed ) -> None :
6573 # We have an incomplete state and the next token is a variable
6674 # We must ask to process the variable with another rule
6775 end_idx = state .positions [1 ]
6876 next_var = state .production .body [state .positions [2 ]]
69- for production in self .productions :
77+ for production in self ._productions :
7078 if production .head == next_var :
71- new_state = State (production , (end_idx , end_idx , 0 ), production .features , ParseTree (production .head ))
79+ new_state = State (production ,
80+ (end_idx , end_idx , 0 ),
81+ production .features ,
82+ ParseTree (production .head ))
7283 if processed .add (end_idx , new_state ):
7384 chart [end_idx ].append (new_state )
7485
75- def contains (self , word : Iterable [Terminal ]) -> bool :
86+ def contains (self , word : Iterable [Any ]) -> bool :
7687 """ Gives the membership of a word to the grammar
7788
7889 Parameters
@@ -85,9 +96,10 @@ def contains(self, word: Iterable[Terminal]) -> bool:
8596 contains : bool
8697 Whether word if in the FCFG or not
8798 """
99+ word = [to_terminal (x ) for x in word if x != Epsilon ()]
88100 return self ._get_final_state (word ) is not None
89101
90- def get_parse_tree (self , word : Iterable [Terminal ]) -> ParseTree :
102+ def get_parse_tree (self , word : Iterable [Any ]) -> ParseTree :
91103 """ Gives the parse tree for a sentence, if possible
92104
93105 Parameters
@@ -105,20 +117,30 @@ def get_parse_tree(self, word: Iterable[Terminal]) -> ParseTree:
105117 NotParsableException
106118 When the word is not parsable.
107119 """
120+ word = [to_terminal (x ) for x in word if x != Epsilon ()]
108121 final_state = self ._get_final_state (word )
109122 if final_state is None :
110123 raise NotParsableException ()
111124 return final_state .parse_tree
112125
113- def _get_final_state (self , word : Iterable [Terminal ]):
114- word = [to_terminal (x ) for x in word if x != Epsilon ()]
126+ def _get_final_state (self , word : List [Terminal ]) -> Optional [State ]:
115127 chart = [[] for _ in range (len (word ) + 1 )]
116- # Processed[i] contains all production rule that are currently working until i.
128+ # Processed[i] contains all production rule \
129+ # that are currently working until i.
117130 processed = StateProcessed (len (word ) + 1 )
118131 gamma = Variable ("Gamma" )
119- dummy_rule = FeatureProduction (gamma , [self .start_symbol ], FeatureStructure (), [FeatureStructure ()])
132+ production_body : List [CFGObject ] = []
133+ if self .start_symbol is not None :
134+ production_body .append (self .start_symbol )
135+ dummy_rule = FeatureProduction (gamma ,
136+ production_body ,
137+ FeatureStructure (),
138+ [FeatureStructure ()])
120139 # State = (rule, [begin, end, dot position, diag)
121- first_state = State (dummy_rule , (0 , 0 , 0 ), dummy_rule .features , ParseTree ("BEGIN" ))
140+ first_state = State (dummy_rule ,
141+ (0 , 0 , 0 ),
142+ dummy_rule .features ,
143+ ParseTree ("BEGIN" ))
122144 chart [0 ].append (first_state )
123145 processed .add (0 , first_state )
124146 for i in range (len (chart ) - 1 ):
@@ -136,19 +158,26 @@ def _get_final_state(self, word: Iterable[Terminal]):
136158 if not state .is_incomplete ():
137159 _completer (state , chart , processed )
138160 for state in processed .generator (len (word )):
139- if state .positions [0 ] == 0 and not state .is_incomplete () and state .production .head == self .start_symbol :
161+ if state .positions [0 ] == 0 \
162+ and not state .is_incomplete () \
163+ and state .production .head == self .start_symbol :
140164 return state
141165 return None
142166
143167 @classmethod
144- def _read_line (cls , line , productions , terminals , variables ):
168+ def _read_line (cls ,
169+ line : str ,
170+ productions : Set [Production ],
171+ terminals : Set [Terminal ],
172+ variables : Set [Variable ]) -> None :
145173 structure_variables = {}
146174 head_s , body_s = line .split ("->" )
147175 head_text = head_s .strip ()
148176 if is_special_text (head_text ):
149177 head_text = head_text [5 :- 1 ]
150178 head_text , head_conditions = _split_text_conditions (head_text )
151- head_fs = FeatureStructure .from_text (head_conditions , structure_variables )
179+ head_fs = FeatureStructure .from_text (
180+ head_conditions , structure_variables )
152181 head = Variable (head_text )
153182 variables .add (head )
154183 all_body_fs = []
@@ -160,10 +189,12 @@ def _read_line(cls, line, productions, terminals, variables):
160189 body_component = body_component [5 :- 1 ]
161190 else :
162191 type_component = ""
163- if body_component [0 ] in string . ascii_uppercase or \
192+ if body_component [0 ] in ascii_uppercase or \
164193 type_component == "VAR" :
165- body_component , body_conditions = _split_text_conditions (body_component )
166- body_fs = FeatureStructure .from_text (body_conditions , structure_variables )
194+ body_component , body_conditions = \
195+ _split_text_conditions (body_component )
196+ body_fs = FeatureStructure .from_text (
197+ body_conditions , structure_variables )
167198 all_body_fs .append (body_fs )
168199 body_var = Variable (body_component )
169200 variables .add (body_var )
@@ -178,7 +209,7 @@ def _read_line(cls, line, productions, terminals, variables):
178209 productions .add (production )
179210
180211
181- def _split_text_conditions (head_text ) :
212+ def _split_text_conditions (head_text : str ) -> Tuple [ str , str ] :
182213 if head_text [- 1 ] != "]" :
183214 return head_text , ""
184215 idx = head_text .find ("[" )
@@ -187,36 +218,48 @@ def _split_text_conditions(head_text):
187218 return head_text [:idx ], head_text [idx + 1 :- 1 ]
188219
189220
190- def _scanner (state , chart , processed ):
221+ def _scanner (state : State ,
222+ chart : List [List [State ]],
223+ processed : StateProcessed ) -> None :
191224 # We have an incomplete state and the next token is the word given as input
192225 # We move the end token and the dot token by one.
193226 end_idx = state .positions [1 ]
194- state .parse_tree .sons .append (ParseTree (state .production .body [state .positions [2 ]]))
195- new_state = State (state .production , (state .positions [0 ], end_idx + 1 , state .positions [2 ] + 1 ),
196- state .feature_stucture , state .parse_tree )
227+ state .parse_tree .sons .append (
228+ ParseTree (state .production .body [state .positions [2 ]]))
229+ new_state = State (state .production ,
230+ (state .positions [0 ], end_idx + 1 , state .positions [2 ] + 1 ),
231+ state .feature_stucture ,
232+ state .parse_tree )
197233 if processed .add (end_idx + 1 , new_state ):
198234 chart [end_idx + 1 ].append (new_state )
199235
200236
201- def _completer (state , chart , processed ):
202- # We have a complete state. We must check if it helps to move another state forward.
237+ def _completer (state : State ,
238+ chart : List [List [State ]],
239+ processed : StateProcessed ) -> None :
240+ # We have a complete state.
241+ # We must check if it helps to move another state forward.
203242 begin_idx = state .positions [0 ]
204243 head = state .production .head
205244 for next_state in processed .generator (begin_idx ):
206245 # next_state[1][1] == begin_idx always true
207- if next_state .is_incomplete () and next_state .production .body [next_state .positions [2 ]] == head :
246+ if next_state .is_incomplete () \
247+ and next_state .production .body [next_state .positions [2 ]] == head :
208248 try :
209249 copy_left = state .feature_stucture .copy ()
210250 copy_left = copy_left .get_feature_by_path (["head" ])
211251 copy_right = next_state .feature_stucture .copy ()
212- copy_right_considered = copy_right .get_feature_by_path ([str (next_state .positions [2 ])])
252+ copy_right_considered = copy_right .get_feature_by_path (
253+ [str (next_state .positions [2 ])])
213254 copy_right_considered .unify (copy_left )
214255 except FeatureStructuresNotCompatibleException :
215256 continue
216257 parse_tree = next_state .parse_tree
217258 parse_tree .sons .append (state .parse_tree )
218259 new_state = State (next_state .production ,
219- (next_state .positions [0 ], state .positions [1 ], next_state .positions [2 ] + 1 ),
260+ (next_state .positions [0 ],
261+ state .positions [1 ],
262+ next_state .positions [2 ] + 1 ),
220263 copy_right , parse_tree )
221264 if processed .add (state .positions [1 ], new_state ):
222265 chart [state .positions [1 ]].append (new_state )
0 commit comments