Commit a57110c4 authored by Igor Dejanovic's avatar Igor Dejanovic

Merge branch 'visitor'

parents ade85d0f f349b0fc
...@@ -746,6 +746,41 @@ class ParseTreeNode(object): ...@@ -746,6 +746,41 @@ class ParseTreeNode(object):
def name(self): def name(self):
return "%s [%s]" % (self.rule_name, self.position) return "%s [%s]" % (self.rule_name, self.position)
def visit(self, visitor):
"""
Visitor pattern implementation.
Args:
visitor(PTNodeVisitor): The visitor object.
"""
if visitor.debug:
print("Visiting ", self.name, " type:",
type(self).__name__, "str:", text(self))
children = SemanticActionResults()
if isinstance(self, NonTerminal):
for node in self:
child = node.visit(visitor)
# If visit returns None suppress that child node
if child is not None:
children.append_result(node.rule_name, child)
visit_name = "visit_%s" % self.rule_name
if hasattr(visitor, visit_name):
# Call visit method.
result = getattr(visitor, visit_name)(self, children)
# If there is a method with 'second' prefix save
# the result of visit for post-processing
if hasattr(visitor, "second_%s" % self.rule_name):
visitor.for_second_pass.append((self.rule_name, result))
return result
elif visitor.defaults:
# If default actions are enabled
return visitor.visit__default__(self, children)
class Terminal(ParseTreeNode): class Terminal(ParseTreeNode):
""" """
...@@ -869,6 +904,88 @@ class NonTerminal(ParseTreeNode, list): ...@@ -869,6 +904,88 @@ class NonTerminal(ParseTreeNode, list):
# ---------------------------------------------------- # ----------------------------------------------------
# Semantic Actions # Semantic Actions
# #
class PTNodeVisitor(object):
"""
Base class for all parse tree visitors.
"""
def __init__(self, defaults=True, debug=False):
"""
Args:
defaults(bool): If the default visit method should be applied in
case no method is defined.
debug(bool): Print debug messages?
"""
self.for_second_pass = []
self.debug = debug
self.defaults = defaults
def visit__default__(self, node, children):
"""
Called if no visit method is defined for the node.
Args:
node(ParseTreeNode):
children(processed children ParseTreeNode-s):
"""
if isinstance(node, Terminal):
# Default for Terminal is to convert to string unless suppress flag
# is set in which case it is suppressed by setting to None.
retval = text(node) if not node.suppress else None
else:
retval = node
# Special case. If only one child exist return it.
if len(children) == 1:
retval = children[0]
else:
# If there is only one non-string child return
# that by default. This will support e.g. bracket
# removals.
last_non_str = None
for c in children:
if not isstr(c):
if last_non_str is None:
last_non_str = c
else:
# If there is multiple non-string objects
# by default convert non-terminal to string
if self.debug:
print("*** Warning: Multiple non-string objects found in default visit. Converting non-terminal to a string.")
retval = text(node)
break
else:
# Return the only non-string child
retval = last_non_str
return retval
def visit_parse_tree(parse_tree, visitor):
"""
Applies visitor to parse_tree and runs the second pass
afterwards.
Args:
parse_tree(ParseTreeNode):
visitor(PTNodeVisitor):
"""
if not parse_tree:
raise Exception(
"Parse tree is empty. You did call parse(), didn't you?")
if visitor.debug:
print("ASG: First pass")
# Visit tree.
result = parse_tree.visit(visitor)
# Second pass
if visitor.debug:
print("ASG: Second pass")
for sa_name, asg_node in visitor.for_second_pass:
getattr(visitor, "second_%s" % sa_name)(asg_node)
return result
class SemanticAction(object): class SemanticAction(object):
""" """
......
...@@ -13,7 +13,7 @@ from __future__ import print_function, unicode_literals ...@@ -13,7 +13,7 @@ from __future__ import print_function, unicode_literals
from arpeggio import * from arpeggio import *
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
from .peg import sem_actions from .peg import PEGVisitor
from .peg import ParserPEG as ParserPEGOrig from .peg import ParserPEG as ParserPEGOrig
__all__ = ['ParserPEG'] __all__ = ['ParserPEG']
...@@ -49,13 +49,14 @@ def comment(): return _("#.*\n") ...@@ -49,13 +49,14 @@ def comment(): return _("#.*\n")
class ParserPEG(ParserPEGOrig): class ParserPEG(ParserPEGOrig):
def _from_peg(self, language_def): def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False, parser = ParserPython(peggrammar, comment, reduce_tree=False,
debug=self.debug) debug=self.debug)
parser.root_rule_name = self.root_rule_name parser.root_rule_name = self.root_rule_name
parser.parse(language_def) parse_tree = parser.parse(language_def)
# Initialise cross-ref counter
parser._crossref_cnt = 0
return parser.getASG(sem_actions=sem_actions) # return parser.getASG(sem_actions=sem_actions)
return visit_parse_tree(parse_tree, PEGVisitor(self.root_rule_name,
self.ignore_case,
debug=self.debug))
This diff is collapsed.
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import pprint import pprint
import sys, os import os
import sys
from arpeggio import * from arpeggio import *
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
...@@ -38,26 +39,25 @@ def fieldvalue_part(): return _(r'((\\")|[^{}])+') ...@@ -38,26 +39,25 @@ def fieldvalue_part(): return _(r'((\\")|[^{}])+')
def fieldvalue_inner(): return "{", fieldvalue_braced_content, "}" def fieldvalue_inner(): return "{", fieldvalue_braced_content, "}"
# Semantic actions # Semantic actions visitor
class BibFileSem(SemanticAction): class BibtexVisitor(PTNodeVisitor):
"""
Just returns list of child nodes (bibentries). def visit_bibfile(self, node, children):
""" """
def first_pass(self, parser, node, children): Just returns list of child nodes (bibentries).
if parser.debug: """
if self.debug:
print("Processing Bibfile") print("Processing Bibfile")
# Return only dict nodes # Return only dict nodes
return [x for x in children if type(x) is dict] return [x for x in children if type(x) is dict]
def visit_bibentry(self, node, children):
class BibEntrySem(SemanticAction): """
""" Constructs a map where key is bibentry field name.
Constructs a map where key is bibentry field name. Key is returned under 'bibkey' key. Type is returned under 'bibtype'.
Key is returned under 'bibkey' key. Type is returned under 'bibtype'. """
""" if self.debug:
def first_pass(self, parser, node, children):
if parser.debug:
print(" Processing bibentry %s" % children[1]) print(" Processing bibentry %s" % children[1])
bib_entry_map = { bib_entry_map = {
'bibtype': children[0], 'bibtype': children[0],
...@@ -67,42 +67,32 @@ class BibEntrySem(SemanticAction): ...@@ -67,42 +67,32 @@ class BibEntrySem(SemanticAction):
bib_entry_map[field[0]] = field[1] bib_entry_map[field[0]] = field[1]
return bib_entry_map return bib_entry_map
def visit_field(self, node, children):
class FieldSem(SemanticAction): """
""" Constructs a tuple (fieldname, fieldvalue).
Constructs a tuple (fieldname, fieldvalue). """
""" if self.debug:
def first_pass(self, parser, node, children):
if parser.debug:
print(" Processing field %s" % children[0]) print(" Processing field %s" % children[0])
field = (children[0], children[1]) field = (children[0], children[1])
return field return field
def visit_fieldvalue(self, node, children):
class FieldValueSem(SemanticAction): """
""" Serbian Serbian letters form latex encoding to Unicode.
Serbian Serbian letters form latex encoding to Unicode. Remove braces. Remove newlines.
Remove braces. Remove newlines. """
"""
def first_pass(self, parser, node, children):
value = children[0] value = children[0]
value = value.replace(r"\'{c}", u"ć")\ value = value.replace(r"\'{c}", u"ć")\
.replace(r"\'{C}", u"Ć")\ .replace(r"\'{C}", u"Ć")\
.replace(r"\v{c}", u"č")\ .replace(r"\v{c}", u"č")\
.replace(r"\v{C}", u"Č")\ .replace(r"\v{C}", u"Č")\
.replace(r"\v{z}", u"ž")\ .replace(r"\v{z}", u"ž")\
.replace(r"\v{Z}", u"Ž")\ .replace(r"\v{Z}", u"Ž")\
.replace(r"\v{s}", u"š")\ .replace(r"\v{s}", u"š")\
.replace(r"\v{S}", u"Š") .replace(r"\v{S}", u"Š")
value = re.sub("[\n{}]", '', value) value = re.sub("[\n{}]", '', value)
return value return value
# Connecting rules with semantic actions
bibfile.sem = BibFileSem()
bibentry.sem = BibEntrySem()
field.sem = FieldSem()
fieldvalue_braces.sem = FieldValueSem()
fieldvalue_quotes.sem = FieldValueSem()
def main(debug=False, file_name=None): def main(debug=False, file_name=None):
# First we will make a parser - an instance of the bib parser model. # First we will make a parser - an instance of the bib parser model.
...@@ -111,7 +101,8 @@ def main(debug=False, file_name=None): ...@@ -111,7 +101,8 @@ def main(debug=False, file_name=None):
parser = ParserPython(bibfile, reduce_tree=True, debug=debug) parser = ParserPython(bibfile, reduce_tree=True, debug=debug)
if not file_name: if not file_name:
file_name = os.path.join(os.path.dirname(__file__), 'bibtex_example.bib') file_name = os.path.join(os.path.dirname(__file__),
'bibtex_example.bib')
with codecs.open(file_name, "r", encoding="utf-8") as bibtexfile: with codecs.open(file_name, "r", encoding="utf-8") as bibtexfile:
bibtexfile_content = bibtexfile.read() bibtexfile_content = bibtexfile.read()
...@@ -120,9 +111,9 @@ def main(debug=False, file_name=None): ...@@ -120,9 +111,9 @@ def main(debug=False, file_name=None):
# textual input # textual input
parse_tree = parser.parse(bibtexfile_content) parse_tree = parser.parse(bibtexfile_content)
# getASG will start semantic analysis. # visit_parse_tree will start semantic analysis.
# In this case semantic analysis will list of bibentry maps. # In this case semantic analysis will return list of bibentry maps.
ast = parser.getASG() ast = visit_parse_tree(parse_tree, BibtexVisitor(debug=debug))
return ast return ast
......
...@@ -52,12 +52,11 @@ def main(debug=False): ...@@ -52,12 +52,11 @@ def main(debug=False):
result = parser.getASG(sem_actions) result = parser.getASG(sem_actions)
if debug: # getASG will start semantic analysis.
# getASG will start semantic analysis. # In this case semantic analysis will evaluate expression and
# In this case semantic analysis will evaluate expression and # returned value will be evaluated result of the input_expr expression.
# returned value will be evaluated result of the input_expr expression. # Semantic actions are supplied to the getASG function.
# Semantic actions are supplied to the getASG function. print("{} = {}".format(input_expr, result))
print("{} = {}".format(input_expr, result))
if __name__ == "__main__": if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model # In debug mode dot (graphviz) files for parser model
......
#######################################################################
# Name: calc_peg.py
# Purpose: Simple expression evaluator example using PEG language and
# visitor pattern for semantic analysis.
# Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2009-2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#
# This example is functionally equivalent to calc_peg.py.
# It is a demonstration of visitor pattern approach for semantic analysis.
# Parser model as well as parse tree exported to dot files should be
# the same as parser model and parse tree generated in calc.py example.
#######################################################################
from __future__ import absolute_import, unicode_literals, print_function
try:
text = unicode
except:
text = str
from arpeggio.cleanpeg import ParserPEG
from arpeggio import PTNodeVisitor, visit_parse_tree
# Grammar is defined using textual specification based on PEG language.
calc_grammar = """
number = r'\d*\.\d*|\d+'
factor = ("+" / "-")?
(number / "(" expression ")")
term = factor (( "*" / "/") factor)*
expression = term (("+" / "-") term)*
calc = expression+ EOF
"""
class CalcVisitor(PTNodeVisitor):
def visit_number(self, node, children):
"""
Converts node value to float.
"""
if self.debug:
print("Converting {}.".format(node.value))
return float(node.value)
def visit_factor(self, node, children):
"""
Removes parenthesis if exists and returns what was contained inside.
"""
if self.debug:
print("Factor {}".format(children))
if len(children) == 1:
return children[0]
sign = -1 if children[0] == '-' else 1
return sign * children[-1]
def visit_term(self, node, children):
"""
Divides or multiplies factors.
Factor nodes will be already evaluated.
"""
if self.debug:
print("Term {}".format(children))
term = children[0]
for i in range(2, len(children), 2):
if children[i-1] == "*":
term *= children[i]
else:
term /= children[i]
if self.debug:
print("Term = {}".format(term))
return term
def visit_expression(self, node, children):
"""
Adds or substracts terms.
Term nodes will be already evaluated.
"""
if self.debug:
print("Expression {}".format(children))
expr = 0
start = 0
# Check for unary + or - operator
if text(children[0]) in "+-":
start = 1
for i in range(start, len(children), 2):
if i and children[i - 1] == "-":
expr -= children[i]
else:
expr += children[i]
if self.debug:
print("Expression = {}".format(expr))
return expr
def main(debug=False):
# First we will make a parser - an instance of the calc parser model.
# Parser model is given in the form of PEG notation therefore we
# are using ParserPEG class. Root rule name (parsing expression) is "calc".
parser = ParserPEG(calc_grammar, "calc", debug=debug)
# An expression we want to evaluate
input_expr = "-(4-1)*5+(2+4.67)+5.89/(.2+7)"
# Then parse tree is created out of the input_expr expression.
parse_tree = parser.parse(input_expr)
result = visit_parse_tree(parse_tree, CalcVisitor(debug=debug))
# visit_parse_tree will start semantic analysis.
# In this case semantic analysis will evaluate expression and
# returned value will be evaluated result of the input_expr expression.
print("{} = {}".format(input_expr, result))
if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model
# and parse tree will be created for visualization.
# Checkout current folder for .dot files.
main(debug=False)
...@@ -15,24 +15,7 @@ from __future__ import unicode_literals ...@@ -15,24 +15,7 @@ from __future__ import unicode_literals
from arpeggio import * from arpeggio import *
from arpeggio.export import PMDOTExporter from arpeggio.export import PMDOTExporter
from arpeggio.peg import ParserPEG from arpeggio.peg import ParserPEG, PEGVisitor
# Semantic actions
from arpeggio.peg import SemGrammar, sem_rule, sem_sequence, sem_ordered_choice,\
sem_sufix, sem_prefix, sem_strmatch, sem_regex, sem_rule_crossref
sem_actions = {
"peggrammar": SemGrammar(),
"rule": sem_rule,
"ordered_choice": sem_ordered_choice,
"sequence": sem_sequence,
"prefix": sem_prefix,
"sufix": sem_sufix,
"expression": SemanticActionSingleChild(),
"regex": sem_regex,
"str_match": sem_strmatch,
"rule_crossref": sem_rule_crossref
}
# PEG defined using PEG itself. # PEG defined using PEG itself.
...@@ -76,11 +59,13 @@ def main(debug=False): ...@@ -76,11 +59,13 @@ def main(debug=False):
# Now we will use created parser to parse the same peg_grammar used for # Now we will use created parser to parse the same peg_grammar used for
# parser initialization. We can parse peg_grammar because it is specified # parser initialization. We can parse peg_grammar because it is specified
# using PEG itself. # using PEG itself.
parser.parse(peg_grammar) parse_tree = parser.parse(peg_grammar)
# ASG should be the same as parser.parser_model because semantic # ASG should be the same as parser.parser_model because semantic
# actions will create PEG parser (tree of ParsingExpressions). # actions will create PEG parser (tree of ParsingExpressions).
asg = parser.getASG(sem_actions) asg = visit_parse_tree(parse_tree, PEGVisitor(root_rule_name='peggrammar',
ignore_case=False,
debug=debug))
if debug: if debug:
# This graph should be the same as peg_peg_parser_model.dot because # This graph should be the same as peg_peg_parser_model.dot because
......
...@@ -23,68 +23,52 @@ from __future__ import print_function, unicode_literals ...@@ -23,68 +23,52 @@ from __future__ import print_function, unicode_literals
from arpeggio import * from arpeggio import *
# Grammar rules # Grammar rules
def robot(): return Kwd('begin'), ZeroOrMore(command), Kwd('end'), EOF def robot(): return 'begin', ZeroOrMore(command), 'end', EOF
def command(): return [up, down, left, right] def command(): return [UP, DOWN, LEFT, RIGHT]
def up(): return 'up' def UP(): return 'up'
def down(): return 'down' def DOWN(): return 'down'
def left(): return 'left' def LEFT(): return 'left'
def right(): return 'right' def RIGHT(): return 'right'
# Semantic actions # Semantic actions visitor
class Up(SemanticAction): class RobotVisitor(PTNodeVisitor):
def first_pass(self, parser, node, children):
if parser.debug: def visit_robot(self, node, children):
if self.debug:
print("Evaluating position")
position = [0, 0]
for move in children:
position[0] += move[0]
position[1] += move[1]
return position
def visit_command(self, node, children):
if self.debug:
print("Command")
return children[0]
def visit_UP(self, node, children):
if self.debug:
print("Going up") print("Going up")
return (0, 1) return (0, 1)
def visit_DOWN(self, node, children):
class Down(SemanticAction): if self.debug:
def first_pass(self, parser, node, children):
if parser.debug:
print("Going down") print("Going down")
return (0, -1) return (0, -1)
def visit_LEFT(self, node, children):
class Left(SemanticAction): if self.debug:
def first_pass(self, parser, node, children):
if parser.debug:
print("Going left") print("Going left")
return (-1, 0) return (-1, 0)
def visit_RIGHT(self, node, children):
class Right(SemanticAction): if self.debug:
def first_pass(self, parser, node, children):
if parser.debug:
print("Going right") print("Going right")
return (1, 0) return (1, 0)
class Command(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
print("Command")
return children[0]
class Robot(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
print("Evaluating position")
position = [0, 0]
for move in children:
position[0] += move[0]
position[1] += move[1]
return position
# Connecting rules with semantic actions
robot.sem = Robot()
command.sem = Command()
up.sem = Up()
down.sem = Down()
left.sem = Left()
right.sem = Right()
def main(debug=False): def main(debug=False):
# Program code # Program code
input_program = ''' input_program = '''
...@@ -105,10 +89,10 @@ def main(debug=False): ...@@ -105,10 +89,10 @@ def main(debug=False):
# We create a parse tree out of textual input # We create a parse tree out of textual input
parse_tree = parser.parse(input_program) parse_tree = parser.parse(input_program)
# getASG will start semantic analysis. # visit_parse_tree will start semantic analysis.
# In this case semantic analysis will evaluate expression and # In this case semantic analysis will evaluate expression and
# returned value will be the final position of the robot. # returned value will be the final position of the robot.
result = parser.getASG() result = visit_parse_tree(parse_tree, RobotVisitor(debug=debug))
if debug: if debug:
print("position = ", result) print("position = ", result)
......
...@@ -33,16 +33,8 @@ LEFT <- 'left'; ...@@ -33,16 +33,8 @@ LEFT <- 'left';
RIGHT <- 'right'; RIGHT <- 'right';
''' '''
# Semantic actions # Semantic actions visitor
from robot import Up, Down, Left, Right, Command, Robot from robot import RobotVisitor
semantic_actions = {
'robot': Robot(),
'command': Command(),
'UP': Up(),
'DOWN': Down(),
'LEFT': Left(),
'RIGHT': Right()
}
def main(debug=False): def main(debug=False):
...@@ -57,7 +49,6 @@ def main(debug=False): ...@@ -57,7 +49,6 @@ def main(debug=False):
end end
''' '''
# First we will make a parser - an instance of the robot parser model. # First we will make a parser - an instance of the robot parser model.
# Parser model is given in the form of PEG specification therefore we # Parser model is given in the form of PEG specification therefore we
# are using ParserPEG class. # are using ParserPEG class.
...@@ -66,10 +57,10 @@ def main(debug=False): ...@@ -66,10 +57,10 @@ def main(debug=False):
# We create a parse tree out of textual input # We create a parse tree out of textual input
parse_tree = parser.parse(input) parse_tree = parser.parse(input)
# getASG will start semantic analysis. # visit_parse_tree will start semantic analysis.
# In this case semantic analysis will evaluate expression and # In this case semantic analysis will evaluate expression and
# returned value will be the final position of the robot. # returned value will be the final position of the robot.
return parser.getASG(sem_actions=semantic_actions) return visit_parse_tree(parse_tree, RobotVisitor(debug=debug))
if __name__ == "__main__": if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model # In debug mode dot (graphviz) files for parser model
......
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_semantic_action_results
# Purpose: Tests semantic actions based on visitor
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
# Grammar
from arpeggio import ZeroOrMore, OneOrMore, ParserPython,\
PTNodeVisitor, visit_parse_tree, SemanticActionResults
from arpeggio.export import PTDOTExporter
from arpeggio import RegExMatch as _
def grammar(): return first, "a", second
def first(): return [fourth, third], ZeroOrMore(third)
def second(): return OneOrMore(third), "b"
def third(): return [third_str, fourth]
def third_str(): return "3"
def fourth(): return _(r'\d+')
first_sar = None
third_sar = None
class TestVisitor(PTNodeVisitor):
def visit_first(self, node, children):
global first_sar
first_sar = children
def visit_third(self, node, children):
global third_sar
third_sar = children
return 1
def test_semantic_action_results():
global first_sar, third_sar
input = "4 3 3 3 a 3 3 b"
parser = ParserPython(grammar, reduce_tree=False)
result = parser.parse(input)
PTDOTExporter().exportFile(result, 'test_semantic_action_results_pt.dot')
visit_parse_tree(result, TestVisitor(defaults=True))
assert isinstance(first_sar, SemanticActionResults)
assert len(first_sar.third) == 3
assert third_sar.third_str[0] == '3'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment