Skip to content

Commit 98ffd66

Browse files
committed
add fcfg annotations
1 parent 557adde commit 98ffd66

File tree

4 files changed

+170
-84
lines changed

4 files changed

+170
-84
lines changed

pyformlang/fcfg/fcfg.py

Lines changed: 75 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
"""Feature Context-Free Grammar"""
2-
import string
3-
from typing import Iterable, AbstractSet
42

5-
from pyformlang.cfg import CFG, Terminal, Epsilon, Variable
6-
from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS, NotParsableException
3+
from typing import List, Set, Tuple, AbstractSet, Iterable, Optional, Any
4+
from string import ascii_uppercase
5+
6+
from pyformlang.cfg import CFG, Terminal, Variable, Production, Epsilon
7+
from pyformlang.cfg.cfg import is_special_text, \
8+
EPSILON_SYMBOLS, NotParsableException
9+
from pyformlang.cfg.cfg_object import CFGObject
710
from pyformlang.cfg.parse_tree import ParseTree
811
from pyformlang.cfg.utils import to_terminal
912
from pyformlang.fcfg.feature_production import FeatureProduction
10-
from pyformlang.fcfg.feature_structure import FeatureStructure, FeatureStructuresNotCompatibleException
13+
from pyformlang.fcfg.feature_structure import FeatureStructure, \
14+
FeatureStructuresNotCompatibleException
1115
from pyformlang.fcfg.state import State, StateProcessed
1216

1317

@@ -58,21 +62,28 @@ def __init__(self,
5862
variables: AbstractSet[Variable] = None,
5963
terminals: AbstractSet[Terminal] = None,
6064
start_symbol: Variable = None,
61-
productions: Iterable[FeatureProduction] = None):
65+
productions: Iterable[FeatureProduction] = None) -> None:
6266
super().__init__(variables, terminals, start_symbol, productions)
67+
self._productions: Set[FeatureProduction] # type: ignore
6368

64-
def __predictor(self, state, chart, processed):
69+
def __predictor(self,
70+
state: State,
71+
chart: List[List[State]],
72+
processed: StateProcessed) -> None:
6573
# We have an incomplete state and the next token is a variable
6674
# We must ask to process the variable with another rule
6775
end_idx = state.positions[1]
6876
next_var = state.production.body[state.positions[2]]
69-
for production in self.productions:
77+
for production in self._productions:
7078
if production.head == next_var:
71-
new_state = State(production, (end_idx, end_idx, 0), production.features, ParseTree(production.head))
79+
new_state = State(production,
80+
(end_idx, end_idx, 0),
81+
production.features,
82+
ParseTree(production.head))
7283
if processed.add(end_idx, new_state):
7384
chart[end_idx].append(new_state)
7485

75-
def contains(self, word: Iterable[Terminal]) -> bool:
86+
def contains(self, word: Iterable[Any]) -> bool:
7687
""" Gives the membership of a word to the grammar
7788
7889
Parameters
@@ -85,9 +96,10 @@ def contains(self, word: Iterable[Terminal]) -> bool:
8596
contains : bool
8697
Whether word if in the FCFG or not
8798
"""
99+
word = [to_terminal(x) for x in word if x != Epsilon()]
88100
return self._get_final_state(word) is not None
89101

90-
def get_parse_tree(self, word: Iterable[Terminal]) -> ParseTree:
102+
def get_parse_tree(self, word: Iterable[Any]) -> ParseTree:
91103
""" Gives the parse tree for a sentence, if possible
92104
93105
Parameters
@@ -105,20 +117,30 @@ def get_parse_tree(self, word: Iterable[Terminal]) -> ParseTree:
105117
NotParsableException
106118
When the word is not parsable.
107119
"""
120+
word = [to_terminal(x) for x in word if x != Epsilon()]
108121
final_state = self._get_final_state(word)
109122
if final_state is None:
110123
raise NotParsableException()
111124
return final_state.parse_tree
112125

113-
def _get_final_state(self, word: Iterable[Terminal]):
114-
word = [to_terminal(x) for x in word if x != Epsilon()]
126+
def _get_final_state(self, word: List[Terminal]) -> Optional[State]:
115127
chart = [[] for _ in range(len(word) + 1)]
116-
# Processed[i] contains all production rule that are currently working until i.
128+
# Processed[i] contains all production rule \
129+
# that are currently working until i.
117130
processed = StateProcessed(len(word) + 1)
118131
gamma = Variable("Gamma")
119-
dummy_rule = FeatureProduction(gamma, [self.start_symbol], FeatureStructure(), [FeatureStructure()])
132+
production_body: List[CFGObject] = []
133+
if self.start_symbol is not None:
134+
production_body.append(self.start_symbol)
135+
dummy_rule = FeatureProduction(gamma,
136+
production_body,
137+
FeatureStructure(),
138+
[FeatureStructure()])
120139
# State = (rule, [begin, end, dot position, diag)
121-
first_state = State(dummy_rule, (0, 0, 0), dummy_rule.features, ParseTree("BEGIN"))
140+
first_state = State(dummy_rule,
141+
(0, 0, 0),
142+
dummy_rule.features,
143+
ParseTree("BEGIN"))
122144
chart[0].append(first_state)
123145
processed.add(0, first_state)
124146
for i in range(len(chart) - 1):
@@ -136,19 +158,26 @@ def _get_final_state(self, word: Iterable[Terminal]):
136158
if not state.is_incomplete():
137159
_completer(state, chart, processed)
138160
for state in processed.generator(len(word)):
139-
if state.positions[0] == 0 and not state.is_incomplete() and state.production.head == self.start_symbol:
161+
if state.positions[0] == 0 \
162+
and not state.is_incomplete() \
163+
and state.production.head == self.start_symbol:
140164
return state
141165
return None
142166

143167
@classmethod
144-
def _read_line(cls, line, productions, terminals, variables):
168+
def _read_line(cls,
169+
line: str,
170+
productions: Set[Production],
171+
terminals: Set[Terminal],
172+
variables: Set[Variable]) -> None:
145173
structure_variables = {}
146174
head_s, body_s = line.split("->")
147175
head_text = head_s.strip()
148176
if is_special_text(head_text):
149177
head_text = head_text[5:-1]
150178
head_text, head_conditions = _split_text_conditions(head_text)
151-
head_fs = FeatureStructure.from_text(head_conditions, structure_variables)
179+
head_fs = FeatureStructure.from_text(
180+
head_conditions, structure_variables)
152181
head = Variable(head_text)
153182
variables.add(head)
154183
all_body_fs = []
@@ -160,10 +189,12 @@ def _read_line(cls, line, productions, terminals, variables):
160189
body_component = body_component[5:-1]
161190
else:
162191
type_component = ""
163-
if body_component[0] in string.ascii_uppercase or \
192+
if body_component[0] in ascii_uppercase or \
164193
type_component == "VAR":
165-
body_component, body_conditions = _split_text_conditions(body_component)
166-
body_fs = FeatureStructure.from_text(body_conditions, structure_variables)
194+
body_component, body_conditions = \
195+
_split_text_conditions(body_component)
196+
body_fs = FeatureStructure.from_text(
197+
body_conditions, structure_variables)
167198
all_body_fs.append(body_fs)
168199
body_var = Variable(body_component)
169200
variables.add(body_var)
@@ -178,7 +209,7 @@ def _read_line(cls, line, productions, terminals, variables):
178209
productions.add(production)
179210

180211

181-
def _split_text_conditions(head_text):
212+
def _split_text_conditions(head_text: str) -> Tuple[str, str]:
182213
if head_text[-1] != "]":
183214
return head_text, ""
184215
idx = head_text.find("[")
@@ -187,36 +218,48 @@ def _split_text_conditions(head_text):
187218
return head_text[:idx], head_text[idx+1:-1]
188219

189220

190-
def _scanner(state, chart, processed):
221+
def _scanner(state: State,
222+
chart: List[List[State]],
223+
processed: StateProcessed) -> None:
191224
# We have an incomplete state and the next token is the word given as input
192225
# We move the end token and the dot token by one.
193226
end_idx = state.positions[1]
194-
state.parse_tree.sons.append(ParseTree(state.production.body[state.positions[2]]))
195-
new_state = State(state.production, (state.positions[0], end_idx + 1, state.positions[2] + 1),
196-
state.feature_stucture, state.parse_tree)
227+
state.parse_tree.sons.append(
228+
ParseTree(state.production.body[state.positions[2]]))
229+
new_state = State(state.production,
230+
(state.positions[0], end_idx + 1, state.positions[2] + 1),
231+
state.feature_stucture,
232+
state.parse_tree)
197233
if processed.add(end_idx + 1, new_state):
198234
chart[end_idx + 1].append(new_state)
199235

200236

201-
def _completer(state, chart, processed):
202-
# We have a complete state. We must check if it helps to move another state forward.
237+
def _completer(state: State,
238+
chart: List[List[State]],
239+
processed: StateProcessed) -> None:
240+
# We have a complete state.
241+
# We must check if it helps to move another state forward.
203242
begin_idx = state.positions[0]
204243
head = state.production.head
205244
for next_state in processed.generator(begin_idx):
206245
# next_state[1][1] == begin_idx always true
207-
if next_state.is_incomplete() and next_state.production.body[next_state.positions[2]] == head:
246+
if next_state.is_incomplete() \
247+
and next_state.production.body[next_state.positions[2]] == head:
208248
try:
209249
copy_left = state.feature_stucture.copy()
210250
copy_left = copy_left.get_feature_by_path(["head"])
211251
copy_right = next_state.feature_stucture.copy()
212-
copy_right_considered = copy_right.get_feature_by_path([str(next_state.positions[2])])
252+
copy_right_considered = copy_right.get_feature_by_path(
253+
[str(next_state.positions[2])])
213254
copy_right_considered.unify(copy_left)
214255
except FeatureStructuresNotCompatibleException:
215256
continue
216257
parse_tree = next_state.parse_tree
217258
parse_tree.sons.append(state.parse_tree)
218259
new_state = State(next_state.production,
219-
(next_state.positions[0], state.positions[1], next_state.positions[2] + 1),
260+
(next_state.positions[0],
261+
state.positions[1],
262+
next_state.positions[2] + 1),
220263
copy_right, parse_tree)
221264
if processed.add(state.positions[1], new_state):
222265
chart[state.positions[1]].append(new_state)

pyformlang/fcfg/feature_production.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Production rules with features"""
2-
from typing import List
2+
3+
from typing import List, Iterable
34

45
from pyformlang.cfg import Production, Variable
56
from pyformlang.cfg.cfg_object import CFGObject
@@ -18,22 +19,28 @@ class FeatureProduction(Production):
1819
head_feature : :class:`~pyformlang.fcfg.FeatureStructure`
1920
The feature structure of the head
2021
body_features : Iterable of :class:`~pyformlang.fcfg.FeatureStructure`
21-
The feature structures of the elements of the body. Must be the same size as the body.
22+
The feature structures of the elements of the body.
23+
Must be the same size as the body.
2224
"""
2325

24-
def __init__(self, head: Variable, body: List[CFGObject], head_feature, body_features, filtering=True):
26+
def __init__(self,
27+
head: Variable,
28+
body: List[CFGObject],
29+
head_feature: FeatureStructure,
30+
body_features: Iterable[FeatureStructure],
31+
filtering: bool = True) -> None:
2532
super().__init__(head, body, filtering)
2633
self._features = FeatureStructure()
2734
self._features.add_content("head", head_feature)
2835
for i, feature_structure in enumerate(body_features):
2936
self._features.add_content(str(i), feature_structure)
3037

3138
@property
32-
def features(self):
39+
def features(self) -> FeatureStructure:
3340
"""The merged features of the production rules"""
3441
return self._features
3542

36-
def __repr__(self):
43+
def __repr__(self) -> str:
3744
res = [self.head.to_text()]
3845
cond_head = str(self._features.get_feature_by_path(["head"]))
3946
if cond_head:

0 commit comments

Comments
 (0)