diff --git a/arpeggio/__init__.py b/arpeggio/__init__.py index 5e21c55e295e6064b943440805d368f56ed57af3..68d68c126d3d854e5aafbdd6be8991f0710fd03f 100644 --- a/arpeggio/__init__.py +++ b/arpeggio/__init__.py @@ -746,6 +746,41 @@ class ParseTreeNode(object): def name(self): return "%s [%s]" % (self.rule_name, self.position) + def visit(self, visitor): + """ + Visitor pattern implementation. + + Args: + visitor(PTNodeVisitor): The visitor object. + """ + if visitor.debug: + print("Visiting ", self.name, " type:", + type(self).__name__, "str:", text(self)) + + children = SemanticActionResults() + if isinstance(self, NonTerminal): + for node in self: + child = node.visit(visitor) + # If visit returns None suppress that child node + if child is not None: + children.append(child) + + visit_name = "visit_%s" % self.rule_name + if hasattr(visitor, visit_name): + # Call visit method. + result = getattr(visitor, visit_name)(self, children) + + # If there is a method with 'second' prefix save + # the result of visit for post-processing + if hasattr(visitor, "second_%s" % self.rule_name): + visitor.for_second_pass.append((self.rule_name, result)) + + return result + + elif visitor.defaults: + # If default actions are enabled + return visitor.visit__default__(self, children) + class Terminal(ParseTreeNode): """ @@ -869,6 +904,87 @@ class NonTerminal(ParseTreeNode, list): # ---------------------------------------------------- # Semantic Actions # +class PTNodeVisitor(object): + """ + Base class for all parse tree visitors. + """ + def __init__(self, defaults=True, debug=False): + """ + Args: + defaults(bool): If the default visit method should be applied in + case no method is defined. + debug(bool): Print debug messages? + """ + self.for_second_pass = [] + self.debug = debug + self.defaults = defaults + + def visit__default__(self, node, children): + """ + Called if no visit method is defined for the node. + + Args: + node(ParseTreeNode): + children(processed children ParseTreeNode-s): + """ + if isinstance(node, Terminal): + # Default for Terminal is to convert to string unless suppress flag + # is set in which case it is suppressed by setting to None. + retval = text(node) if not node.suppress else None + else: + retval = node + # Special case. If only one child exist return it. + if len(children) == 1: + retval = children[0] + else: + # If there is only one non-string child return + # that by default. This will support e.g. bracket + # removals. + last_non_str = None + for c in children: + if not isstr(c): + if last_non_str is None: + last_non_str = c + else: + # If there is multiple non-string objects + # by default convert non-terminal to string + if self.debug: + print("*** Warning: Multiple non-string objects found in default visit. Converting non-terminal to a string.") + retval = text(node) + break + else: + # Return the only non-string child + retval = last_non_str + + return retval + + +def visit_parse_tree(parse_tree, visitor): + """ + Applies visitor to parse_tree and runs the second pass + afterwards. + + Args: + parse_tree(ParseTreeNode): + visitor(PTNodeVisitor): + """ + if not parse_tree: + raise Exception( + "Parse tree is empty. You did call parse(), didn't you?") + + if visitor.debug: + print("ASG: First pass") + # Visit tree. + result = parse_tree.visit(visitor) + + # Second pass + if visitor.debug: + print("ASG: Second pass") + for sa_name, asg_node in visitor.for_second_pass: + getattr(visitor, "second_%s" % sa_name)(asg_node) + + return result + class SemanticAction(object): """ diff --git a/examples/calc_peg_visitor.py b/examples/calc_peg_visitor.py new file mode 100644 index 0000000000000000000000000000000000000000..f944cb3be57dafa7b564edf7ed353b028f11dc20 --- /dev/null +++ b/examples/calc_peg_visitor.py @@ -0,0 +1,123 @@ +####################################################################### +# Name: calc_peg.py +# Purpose: Simple expression evaluator example using PEG language and +# visitor pattern for semantic analysis. +# Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com> +# Copyright: (c) 2009-2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com> +# License: MIT License +# +# This example is functionally equivalent to calc_peg.py. +# It is a demonstration of visitor pattern approach for semantic analysis. +# Parser model as well as parse tree exported to dot files should be +# the same as parser model and parse tree generated in calc.py example. +####################################################################### +from __future__ import absolute_import, unicode_literals, print_function +try: + text = unicode +except: + text = str + +from arpeggio.cleanpeg import ParserPEG +from arpeggio import PTNodeVisitor, visit_parse_tree + + +# Grammar is defined using textual specification based on PEG language. +calc_grammar = """ + number = r'\d*\.\d*|\d+' + factor = ("+" / "-")? + (number / "(" expression ")") + term = factor (( "*" / "/") factor)* + expression = term (("+" / "-") term)* + calc = expression+ EOF +""" + + +class CalcVisitor(PTNodeVisitor): + + def visit_number(self, node, children): + """ + Converts node value to float. + """ + if self.debug: + print("Converting {}.".format(node.value)) + return float(node.value) + + def visit_factor(self, node, children): + """ + Removes parenthesis if exists and returns what was contained inside. + """ + if self.debug: + print("Factor {}".format(children)) + if len(children) == 1: + return children[0] + sign = -1 if children[0] == '-' else 1 + return sign * children[-1] + + def visit_term(self, node, children): + """ + Divides or multiplies factors. + Factor nodes will be already evaluated. + """ + if self.debug: + print("Term {}".format(children)) + term = children[0] + for i in range(2, len(children), 2): + if children[i-1] == "*": + term *= children[i] + else: + term /= children[i] + if self.debug: + print("Term = {}".format(term)) + return term + + def visit_expression(self, node, children): + """ + Adds or substracts terms. + Term nodes will be already evaluated. + """ + if self.debug: + print("Expression {}".format(children)) + expr = 0 + start = 0 + # Check for unary + or - operator + if text(children[0]) in "+-": + start = 1 + + for i in range(start, len(children), 2): + if i and children[i - 1] == "-": + expr -= children[i] + else: + expr += children[i] + + if self.debug: + print("Expression = {}".format(expr)) + + return expr + + +def main(debug=False): + + # First we will make a parser - an instance of the calc parser model. + # Parser model is given in the form of PEG notation therefore we + # are using ParserPEG class. Root rule name (parsing expression) is "calc". + parser = ParserPEG(calc_grammar, "calc", debug=debug) + + # An expression we want to evaluate + input_expr = "-(4-1)*5+(2+4.67)+5.89/(.2+7)" + + # Then parse tree is created out of the input_expr expression. + parse_tree = parser.parse(input_expr) + + result = visit_parse_tree(parse_tree, CalcVisitor(debug=debug)) + + # visit_parse_tree will start semantic analysis. + # In this case semantic analysis will evaluate expression and + # returned value will be evaluated result of the input_expr expression. + print("{} = {}".format(input_expr, result)) + +if __name__ == "__main__": + # In debug mode dot (graphviz) files for parser model + # and parse tree will be created for visualization. + # Checkout current folder for .dot files. + main(debug=False) +