From ecf34778d595eb1d6782dbe02ea16867902c9eee Mon Sep 17 00:00:00 2001 From: Igor Dejanovic <igor.dejanovic@gmail.com> Date: Thu, 7 Aug 2014 22:31:19 +0200 Subject: [PATCH] Changed PTNode.rule->rule_name. rule is now a reference to ParsingExpression. --- arpeggio/__init__.py | 116 +++++++++--------- arpeggio/peg.py | 14 +-- tests/unit/test_peg_parser.py | 2 +- .../test_ptnode_navigation_expressions.py | 6 +- tests/unit/test_python_parser.py | 2 +- tests/unit/test_reduce_tree.py | 14 +-- 6 files changed, 80 insertions(+), 74 deletions(-) diff --git a/arpeggio/__init__.py b/arpeggio/__init__.py index 0f373a0..3a47101 100644 --- a/arpeggio/__init__.py +++ b/arpeggio/__init__.py @@ -51,13 +51,13 @@ class NoMatch(Exception): match is not successful. Args: - rule (str): A name of the rule. + rule_name (str): A name of the rule. position (int): A position in the input stream where exception occurred. parser (Parser): An instance of a parser. """ - def __init__(self, rule, position, parser): - self.rule = rule + def __init__(self, rule_name, position, parser): + self.rule_name = rule_name self.position = position self.parser = parser @@ -65,7 +65,7 @@ class NoMatch(Exception): self._up = True def __str__(self): - return "Expected '{}' at position {} => '{}'.".format(self.rule, + return "Expected '{}' at position {} => '{}'.".format(self.rule_name, str(self.parser.pos_to_linecol(self.position)), self.parser.context(position=self.position)) @@ -94,7 +94,7 @@ class ParsingExpression(object): elements: A list (or other python object) used as a staging structure for python based grammar definition. Used in _from_python for building nodes list of child parser expressions. - rule (str): The name of the parser rule if this is the root rule. + rule_name (str): The name of the parser rule if this is the root rule. root (bool): Does this parser expression represents the root of the parser rule? The root parser rule will create non-terminal node of the parse tree during parsing. @@ -106,7 +106,7 @@ class ParsingExpression(object): elements = elements[0] self.elements = elements - self.rule = kwargs.get('rule') + self.rule_name = kwargs.get('rule_name', '') self.root = kwargs.get('root', False) nodes = kwargs.get('nodes', []) @@ -125,14 +125,14 @@ class ParsingExpression(object): @property def name(self): if self.root: - return "%s=%s" % (self.rule, self.__class__.__name__) + return "%s=%s" % (self.rule_name, self.__class__.__name__) else: return self.__class__.__name__ @property def id(self): if self.root: - return self.rule + return self.rule_name else: return id(self) @@ -241,9 +241,9 @@ class ParsingExpression(object): if len(result) == 1: result = result[0] else: - result = NonTerminal(self.rule, c_pos, result) + result = NonTerminal(self, c_pos, result) else: - result = NonTerminal(self.rule, c_pos, result) + result = NonTerminal(self, c_pos, result) # Result caching for use by memoization. self.result_cache[c_pos] = (result, parser.position) @@ -260,7 +260,7 @@ class ParsingExpression(object): place of the NoMatch exception. """ if self.root and parser.position == nm.position and nm._up: - nm.rule = self.rule + nm.rule_name = self.rule_name class Sequence(ParsingExpression): @@ -446,7 +446,7 @@ class Combine(Decorator): results = flatten(results) # Create terminal from result - return Terminal(self.rule if self.root else '', c_pos, \ + return Terminal(self, c_pos, \ "".join([str(result) for result in results])) except NoMatch: parser.position = c_pos # Backtracking @@ -461,13 +461,13 @@ class Match(ParsingExpression): """ Base class for all classes that will try to match something from the input. """ - def __init__(self, rule, root=False): - super(Match, self).__init__(rule=rule, root=root) + def __init__(self, rule_name, root=False): + super(Match, self).__init__(rule_name=rule_name, root=root) @property def name(self): if self.root: - return "%s=%s(%s)" % (self.rule, self.__class__.__name__, self.to_match) + return "%s=%s(%s)" % (self.rule_name, self.__class__.__name__, self.to_match) else: return "%s(%s)" % (self.__class__.__name__, self.to_match) @@ -495,8 +495,8 @@ class RegExMatch(Match): Default is None to support propagation from global parser setting. ''' - def __init__(self, to_match, rule=None, root=False, ignore_case=None): - super(RegExMatch, self).__init__(rule, root) + def __init__(self, to_match, rule_name='', root=False, ignore_case=None): + super(RegExMatch, self).__init__(rule_name, root) self.to_match = to_match self.ignore_case = ignore_case @@ -517,8 +517,7 @@ class RegExMatch(Match): print("++ Match '%s' at %d => '%s'" % (m.group(), \ c_pos, parser.context(len(m.group())))) parser.position += len(m.group()) - return Terminal(self.rule if self.root else '', c_pos, - m.group()) + return Terminal(self, c_pos, m.group()) else: if parser.debug: print("-- NoMatch at {}".format(c_pos)) @@ -534,8 +533,8 @@ class StrMatch(Match): ignore_case(bool): If case insensitive match is needed. Default is None to support propagation from global parser setting. """ - def __init__(self, to_match, rule=None, root=False, ignore_case=None): - super(StrMatch, self).__init__(rule, root) + def __init__(self, to_match, rule_name='', root=False, ignore_case=None): + super(StrMatch, self).__init__(rule_name, root) self.to_match = to_match self.ignore_case = ignore_case @@ -557,8 +556,7 @@ class StrMatch(Match): # If this match is inside sequence than mark for suppression suppress = type(parser._last_pexpression) is Sequence - return Terminal(self.rule if self.root else '', c_pos, - self.to_match, suppress=suppress) + return Terminal(self, c_pos, self.to_match, suppress=suppress) else: if parser.debug: print("-- NoMatch at {}".format(c_pos)) @@ -584,15 +582,15 @@ class Kwd(StrMatch): super(Kwd, self).__init__(to_match, rule=None) self.to_match = to_match self.root = True - self.rule = 'keyword' + self.rule_name = 'keyword' class EndOfFile(Match): """ The Match class that will succeed in case end of input is reached. """ - def __init__(self, rule=None): - super(EndOfFile, self).__init__(rule) + def __init__(self): + super(EndOfFile, self).__init__("EOF") @property def name(self): @@ -601,7 +599,7 @@ class EndOfFile(Match): def _parse(self, parser): c_pos = parser.position if len(parser.input) == c_pos: - return Terminal('EOF', c_pos, '', suppress=True) + return Terminal(EOF(), c_pos, '', suppress=True) else: if parser.debug: print("!! EOF not matched.") @@ -623,8 +621,9 @@ class ParseTreeNode(object): The node can be terminal(the leaf of the parse tree) or non-terminal. Attributes: - rule (str): The name of the rule that created this node or empty - string in case this node is created by a non-root pexpression. + rule (ParsingExpression): The rule that created this node. + rule_name (str): The name of the rule that created this node if root rule + or empty string otherwise. position (int): A position in the input stream where the match occurred. error (bool): Is this a false parse tree node created during error @@ -632,14 +631,17 @@ class ParseTreeNode(object): comments : A parse tree of comment(s) attached to this node. """ def __init__(self, rule, position, error): + assert rule + assert rule.rule_name is not None self.rule = rule + self.rule_name = rule.rule_name self.position = position self.error = error self.comments = None @property def name(self): - return "%s [%s]" % (self.rule, self.position) + return "%s [%s]" % (self.rule_name, self.position) class Terminal(ParseTreeNode): @@ -647,7 +649,7 @@ class Terminal(ParseTreeNode): Leaf node of the Parse Tree. Represents matched string. Attributes: - rule (str): The name of the rule that created this terminal. + rule (ParsingExpression): The rule that created this terminal. position (int): A position in the input stream where match occurred. value (str): Matched string at the given position or missing token name in the case of an error node. @@ -662,9 +664,9 @@ class Terminal(ParseTreeNode): @property def desc(self): if self.value: - return "%s '%s' [%s]" % (self.rule, self.value, self.position) + return "%s '%s' [%s]" % (self.rule_name, self.value, self.position) else: - return "%s [%s]" % (self.rule, self.position) + return "%s [%s]" % (self.rule_name, self.position) def __str__(self): return self.value @@ -720,7 +722,8 @@ class NonTerminal(ParseTreeNode, list): this node rule. """ # Prevent infinite recursion - if rule_name == '_expr_cache': + if rule_name in ['_expr_cache', '_filtered', 'rule', 'rule_name', + 'position', 'append', 'extend']: raise AttributeError # First check the cache @@ -731,19 +734,22 @@ class NonTerminal(ParseTreeNode, list): # with the given rule name and create new NonTerminal # and cache it for later access. nodes = [] + rule = None for n in self: if self._filtered: # For filtered NT rule_name is a rule on # each of its children for m in n: - if m.rule == rule_name: + if m.rule_name == rule_name: nodes.append(m) + rule = m.rule else: - if n.rule == rule_name: + if n.rule_name == rule_name: nodes.append(n) + rule = n.rule # For expression NonTerminals instances position does not have any sense. - result = NonTerminal(rule=rule_name, position=None, nodes=nodes, _filtered=True) + result = NonTerminal(rule=rule, position=None, nodes=nodes, _filtered=True) self._expr_cache[rule_name] = result return result @@ -894,9 +900,9 @@ class Parser(object): # visualization if self.debug: from arpeggio.export import PTDOTExporter - root_rule = self.parse_tree.rule + root_rule_name = self.parse_tree.rule_name PTDOTExporter().exportFile(self.parse_tree, - "{}_parse_tree.dot".format(root_rule)) + "{}_parse_tree.dot".format(root_rule_name)) return self.parse_tree def getASG(self, sem_actions=None, defaults=True): @@ -940,7 +946,7 @@ class Parser(object): for n in node: child = tree_walk(n) if child is not None: - children.append_result(n.rule, child) + children.append_result(n.rule_name, child) if self.debug: print("Processing ", node.name, "= '", str(node), @@ -949,15 +955,15 @@ class Parser(object): for i, a in enumerate(children): print ("\t%d:" % (i + 1), unicode(a), "type:", type(a).__name__) - if node.rule in sem_actions: - sem_action = sem_actions[node.rule] + if node.rule_name in sem_actions: + sem_action = sem_actions[node.rule_name] if type(sem_action) is types.FunctionType: retval = sem_action(self, node, children) else: retval = sem_action.first_pass(self, node, children) if hasattr(sem_action, "second_pass"): - for_second_pass.append((node.rule, retval)) + for_second_pass.append((node.rule_name, retval)) if self.debug: print("\tApplying semantic action ", type(sem_action)) @@ -1071,9 +1077,9 @@ class Parser(object): if self.nm is None or args[0].position > self.nm.position: self.nm = args[0] else: - rule, position, parser = args + rule_name, position, parser = args if self.nm is None or position > self.nm.position: - self.nm = NoMatch(rule, position, parser) + self.nm = NoMatch(rule_name, position, parser) raise self.nm @@ -1106,7 +1112,7 @@ class ParserPython(Parser): # Comments should be optional and there can be more of them if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore): self.comments_model.root = True - self.comments_model.rule = comment_def.__name__ + self.comments_model.rule_name = comment_def.__name__ def _parse(self): return self.parser_model.parse(self) @@ -1127,11 +1133,11 @@ class ParserPython(Parser): def inner_from_python(expression): retval = None if type(expression) == types.FunctionType: # Is this expression a parser rule? - rule = expression.__name__ - if rule in __rule_cache: - c_rule = __rule_cache.get(rule) + rule_name = expression.__name__ + if rule_name in __rule_cache: + c_rule = __rule_cache.get(rule_name) if self.debug: - print("Rule {} founded in cache.".format(rule)) + print("Rule {} founded in cache.".format(rule_name)) if isinstance(c_rule, CrossRef): self.__cross_refs += 1 if self.debug: @@ -1141,10 +1147,10 @@ class ParserPython(Parser): # Semantic action for the rule if hasattr(expression, "sem"): - self.sem_actions[rule] = expression.sem + self.sem_actions[rule_name] = expression.sem # Register rule cross-ref to support recursion - __rule_cache[rule] = CrossRef(rule) + __rule_cache[rule_name] = CrossRef(rule_name) curr_expr = expression while type(curr_expr) is types.FunctionType: @@ -1152,14 +1158,14 @@ class ParserPython(Parser): # go into until non-function is returned. curr_expr = curr_expr() retval = inner_from_python(curr_expr) - retval.rule = rule + retval.rule_name = rule_name retval.root = True # Update cache - __rule_cache[rule] = retval + __rule_cache[rule_name] = retval if self.debug: print("New rule: {} -> {}" - .format(rule, retval.__class__.__name__)) + .format(rule_name, retval.__class__.__name__)) elif isinstance(expression, StrMatch): if expression.ignore_case is None: diff --git a/arpeggio/peg.py b/arpeggio/peg.py index add46d7..e5aa5fe 100644 --- a/arpeggio/peg.py +++ b/arpeggio/peg.py @@ -81,14 +81,14 @@ class PEGSemanticAction(SemanticAction): # If resolved rule hasn't got the same name it # should be cloned and preserved in the peg_rules cache - if resolved_rule.rule != n.rule_name: + if resolved_rule.rule_name != n.rule_name: resolved_rule = copy.copy(resolved_rule) - resolved_rule.rule = n.rule_name - parser.peg_rules[resolved_rule.rule] = resolved_rule + resolved_rule.rule_name = n.rule_name + parser.peg_rules[resolved_rule.rule_name] = resolved_rule if parser.debug: print("Resolving: cloned to {} = > {}"\ - .format(resolved_rule.rule, resolved_rule.name)) + .format(resolved_rule.rule_name, resolved_rule.name)) node.nodes[i] = resolved_rule @@ -111,7 +111,7 @@ class SemRule(PEGSemanticAction): retval = Sequence(nodes=children[1:]) else: retval = children[1] - retval.rule = rule_name + retval.rule_name = rule_name retval.root = True if not hasattr(parser, "peg_rules"): @@ -230,14 +230,14 @@ class ParserPEG(Parser): # visualization if self.debug: from arpeggio.export import PMDOTExporter - root_rule = self.parser_model.rule + root_rule = self.parser_model.rule_name PMDOTExporter().exportFile(self.parser_model, "{}_peg_parser_model.dot".format(root_rule)) # Comments should be optional and there can be more of them if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore): self.comments_model.root = True - self.comments_model.rule = comment_rule_name + self.comments_model.rule_name = comment_rule_name def _parse(self): return self.parser_model.parse(self) diff --git a/tests/unit/test_peg_parser.py b/tests/unit/test_peg_parser.py index a89a3db..953dfcd 100644 --- a/tests/unit/test_peg_parser.py +++ b/tests/unit/test_peg_parser.py @@ -24,7 +24,7 @@ def test_construct_parser(): parser = ParserPEG(grammar, 'calc') - assert parser.parser_model.rule == 'calc' + assert parser.parser_model.rule_name == 'calc' assert isinstance(parser.parser_model, Sequence) assert parser.parser_model.nodes[0].name == 'OneOrMore' diff --git a/tests/unit/test_ptnode_navigation_expressions.py b/tests/unit/test_ptnode_navigation_expressions.py index a5aadcd..756060b 100644 --- a/tests/unit/test_ptnode_navigation_expressions.py +++ b/tests/unit/test_ptnode_navigation_expressions.py @@ -34,9 +34,9 @@ def test_lookup_single(): assert isinstance(result, ParseTreeNode) assert isinstance(result.bar, NonTerminal) # dot access - assert result.bar.rule == 'bar' + assert result.bar.rule_name == 'bar' # Index access - assert result[1].rule == 'bar' + assert result[1].rule_name == 'bar' # There are six children from result assert len(result) == 6 @@ -52,7 +52,7 @@ def test_lookup_single(): # For example this returns all bum from all bar in result assert len(result.bar.bum) == 2 # Verify that proper bum are returned - assert result.bar.bum[0].rule == 'bum' + assert result.bar.bum[0].rule_name == 'bum' assert result.bar.bum[1].position == 18 # Access to terminal diff --git a/tests/unit/test_python_parser.py b/tests/unit/test_python_parser.py index f16dd80..ce28009 100644 --- a/tests/unit/test_python_parser.py +++ b/tests/unit/test_python_parser.py @@ -29,7 +29,7 @@ def test_pp_construction(): ''' parser = ParserPython(calc) - assert parser.parser_model.rule == 'calc' + assert parser.parser_model.rule_name == 'calc' assert isinstance(parser.parser_model, Sequence) assert parser.parser_model.nodes[0].desc == 'OneOrMore' diff --git a/tests/unit/test_reduce_tree.py b/tests/unit/test_reduce_tree.py index 624707f..5006a3a 100644 --- a/tests/unit/test_reduce_tree.py +++ b/tests/unit/test_reduce_tree.py @@ -31,21 +31,21 @@ def test_reduce_tree(): # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') - assert result[0].rule == 'first' + assert result[0].rule_name == 'first' assert isinstance(result[0], NonTerminal) - assert result[3].rule == 'first' - assert result[0][0].rule == 'fourth' + assert result[3].rule_name == 'first' + assert result[0][0].rule_name == 'fourth' # Check reduction for direct OrderedChoice - assert result[2][0].rule == 'third' + assert result[2][0].rule_name == 'third' parser = ParserPython(grammar, reduce_tree=True) result = parser.parse(input) # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') - assert result[0].rule == 'fourth' + assert result[0].rule_name == 'fourth' assert isinstance(result[0], Terminal) - assert result[3].rule == 'fourth' + assert result[3].rule_name == 'fourth' # Check reduction for direct OrderedChoice - assert result[2][0].rule == 'third_str' + assert result[2][0].rule_name == 'third_str' -- 2.18.0