Commit a57110c4 authored by Igor Dejanovic's avatar Igor Dejanovic

Merge branch 'visitor'

parents ade85d0f f349b0fc
......@@ -746,6 +746,41 @@ class ParseTreeNode(object):
def name(self):
return "%s [%s]" % (self.rule_name, self.position)
def visit(self, visitor):
"""
Visitor pattern implementation.
Args:
visitor(PTNodeVisitor): The visitor object.
"""
if visitor.debug:
print("Visiting ", self.name, " type:",
type(self).__name__, "str:", text(self))
children = SemanticActionResults()
if isinstance(self, NonTerminal):
for node in self:
child = node.visit(visitor)
# If visit returns None suppress that child node
if child is not None:
children.append_result(node.rule_name, child)
visit_name = "visit_%s" % self.rule_name
if hasattr(visitor, visit_name):
# Call visit method.
result = getattr(visitor, visit_name)(self, children)
# If there is a method with 'second' prefix save
# the result of visit for post-processing
if hasattr(visitor, "second_%s" % self.rule_name):
visitor.for_second_pass.append((self.rule_name, result))
return result
elif visitor.defaults:
# If default actions are enabled
return visitor.visit__default__(self, children)
class Terminal(ParseTreeNode):
"""
......@@ -869,6 +904,88 @@ class NonTerminal(ParseTreeNode, list):
# ----------------------------------------------------
# Semantic Actions
#
class PTNodeVisitor(object):
"""
Base class for all parse tree visitors.
"""
def __init__(self, defaults=True, debug=False):
"""
Args:
defaults(bool): If the default visit method should be applied in
case no method is defined.
debug(bool): Print debug messages?
"""
self.for_second_pass = []
self.debug = debug
self.defaults = defaults
def visit__default__(self, node, children):
"""
Called if no visit method is defined for the node.
Args:
node(ParseTreeNode):
children(processed children ParseTreeNode-s):
"""
if isinstance(node, Terminal):
# Default for Terminal is to convert to string unless suppress flag
# is set in which case it is suppressed by setting to None.
retval = text(node) if not node.suppress else None
else:
retval = node
# Special case. If only one child exist return it.
if len(children) == 1:
retval = children[0]
else:
# If there is only one non-string child return
# that by default. This will support e.g. bracket
# removals.
last_non_str = None
for c in children:
if not isstr(c):
if last_non_str is None:
last_non_str = c
else:
# If there is multiple non-string objects
# by default convert non-terminal to string
if self.debug:
print("*** Warning: Multiple non-string objects found in default visit. Converting non-terminal to a string.")
retval = text(node)
break
else:
# Return the only non-string child
retval = last_non_str
return retval
def visit_parse_tree(parse_tree, visitor):
"""
Applies visitor to parse_tree and runs the second pass
afterwards.
Args:
parse_tree(ParseTreeNode):
visitor(PTNodeVisitor):
"""
if not parse_tree:
raise Exception(
"Parse tree is empty. You did call parse(), didn't you?")
if visitor.debug:
print("ASG: First pass")
# Visit tree.
result = parse_tree.visit(visitor)
# Second pass
if visitor.debug:
print("ASG: Second pass")
for sa_name, asg_node in visitor.for_second_pass:
getattr(visitor, "second_%s" % sa_name)(asg_node)
return result
class SemanticAction(object):
"""
......
......@@ -13,7 +13,7 @@ from __future__ import print_function, unicode_literals
from arpeggio import *
from arpeggio import RegExMatch as _
from .peg import sem_actions
from .peg import PEGVisitor
from .peg import ParserPEG as ParserPEGOrig
__all__ = ['ParserPEG']
......@@ -49,13 +49,14 @@ def comment(): return _("#.*\n")
class ParserPEG(ParserPEGOrig):
def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False,
debug=self.debug)
parser.root_rule_name = self.root_rule_name
parser.parse(language_def)
# Initialise cross-ref counter
parser._crossref_cnt = 0
parse_tree = parser.parse(language_def)
return parser.getASG(sem_actions=sem_actions)
# return parser.getASG(sem_actions=sem_actions)
return visit_parse_tree(parse_tree, PEGVisitor(self.root_rule_name,
self.ignore_case,
debug=self.debug))
This diff is collapsed.
......@@ -11,7 +11,8 @@
from __future__ import print_function, unicode_literals
import pprint
import sys, os
import os
import sys
from arpeggio import *
from arpeggio import RegExMatch as _
......@@ -38,26 +39,25 @@ def fieldvalue_part(): return _(r'((\\")|[^{}])+')
def fieldvalue_inner(): return "{", fieldvalue_braced_content, "}"
# Semantic actions
class BibFileSem(SemanticAction):
"""
Just returns list of child nodes (bibentries).
"""
def first_pass(self, parser, node, children):
if parser.debug:
# Semantic actions visitor
class BibtexVisitor(PTNodeVisitor):
def visit_bibfile(self, node, children):
"""
Just returns list of child nodes (bibentries).
"""
if self.debug:
print("Processing Bibfile")
# Return only dict nodes
return [x for x in children if type(x) is dict]
class BibEntrySem(SemanticAction):
"""
Constructs a map where key is bibentry field name.
Key is returned under 'bibkey' key. Type is returned under 'bibtype'.
"""
def first_pass(self, parser, node, children):
if parser.debug:
def visit_bibentry(self, node, children):
"""
Constructs a map where key is bibentry field name.
Key is returned under 'bibkey' key. Type is returned under 'bibtype'.
"""
if self.debug:
print(" Processing bibentry %s" % children[1])
bib_entry_map = {
'bibtype': children[0],
......@@ -67,42 +67,32 @@ class BibEntrySem(SemanticAction):
bib_entry_map[field[0]] = field[1]
return bib_entry_map
class FieldSem(SemanticAction):
"""
Constructs a tuple (fieldname, fieldvalue).
"""
def first_pass(self, parser, node, children):
if parser.debug:
def visit_field(self, node, children):
"""
Constructs a tuple (fieldname, fieldvalue).
"""
if self.debug:
print(" Processing field %s" % children[0])
field = (children[0], children[1])
return field
class FieldValueSem(SemanticAction):
"""
Serbian Serbian letters form latex encoding to Unicode.
Remove braces. Remove newlines.
"""
def first_pass(self, parser, node, children):
def visit_fieldvalue(self, node, children):
"""
Serbian Serbian letters form latex encoding to Unicode.
Remove braces. Remove newlines.
"""
value = children[0]
value = value.replace(r"\'{c}", u"ć")\
.replace(r"\'{C}", u"Ć")\
.replace(r"\v{c}", u"č")\
.replace(r"\v{C}", u"Č")\
.replace(r"\v{z}", u"ž")\
.replace(r"\v{Z}", u"Ž")\
.replace(r"\v{s}", u"š")\
.replace(r"\v{S}", u"Š")
.replace(r"\'{C}", u"Ć")\
.replace(r"\v{c}", u"č")\
.replace(r"\v{C}", u"Č")\
.replace(r"\v{z}", u"ž")\
.replace(r"\v{Z}", u"Ž")\
.replace(r"\v{s}", u"š")\
.replace(r"\v{S}", u"Š")
value = re.sub("[\n{}]", '', value)
return value
# Connecting rules with semantic actions
bibfile.sem = BibFileSem()
bibentry.sem = BibEntrySem()
field.sem = FieldSem()
fieldvalue_braces.sem = FieldValueSem()
fieldvalue_quotes.sem = FieldValueSem()
def main(debug=False, file_name=None):
# First we will make a parser - an instance of the bib parser model.
......@@ -111,7 +101,8 @@ def main(debug=False, file_name=None):
parser = ParserPython(bibfile, reduce_tree=True, debug=debug)
if not file_name:
file_name = os.path.join(os.path.dirname(__file__), 'bibtex_example.bib')
file_name = os.path.join(os.path.dirname(__file__),
'bibtex_example.bib')
with codecs.open(file_name, "r", encoding="utf-8") as bibtexfile:
bibtexfile_content = bibtexfile.read()
......@@ -120,9 +111,9 @@ def main(debug=False, file_name=None):
# textual input
parse_tree = parser.parse(bibtexfile_content)
# getASG will start semantic analysis.
# In this case semantic analysis will list of bibentry maps.
ast = parser.getASG()
# visit_parse_tree will start semantic analysis.
# In this case semantic analysis will return list of bibentry maps.
ast = visit_parse_tree(parse_tree, BibtexVisitor(debug=debug))
return ast
......
......@@ -52,12 +52,11 @@ def main(debug=False):
result = parser.getASG(sem_actions)
if debug:
# getASG will start semantic analysis.
# In this case semantic analysis will evaluate expression and
# returned value will be evaluated result of the input_expr expression.
# Semantic actions are supplied to the getASG function.
print("{} = {}".format(input_expr, result))
# getASG will start semantic analysis.
# In this case semantic analysis will evaluate expression and
# returned value will be evaluated result of the input_expr expression.
# Semantic actions are supplied to the getASG function.
print("{} = {}".format(input_expr, result))
if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model
......
#######################################################################
# Name: calc_peg.py
# Purpose: Simple expression evaluator example using PEG language and
# visitor pattern for semantic analysis.
# Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2009-2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#
# This example is functionally equivalent to calc_peg.py.
# It is a demonstration of visitor pattern approach for semantic analysis.
# Parser model as well as parse tree exported to dot files should be
# the same as parser model and parse tree generated in calc.py example.
#######################################################################
from __future__ import absolute_import, unicode_literals, print_function
try:
text = unicode
except:
text = str
from arpeggio.cleanpeg import ParserPEG
from arpeggio import PTNodeVisitor, visit_parse_tree
# Grammar is defined using textual specification based on PEG language.
calc_grammar = """
number = r'\d*\.\d*|\d+'
factor = ("+" / "-")?
(number / "(" expression ")")
term = factor (( "*" / "/") factor)*
expression = term (("+" / "-") term)*
calc = expression+ EOF
"""
class CalcVisitor(PTNodeVisitor):
def visit_number(self, node, children):
"""
Converts node value to float.
"""
if self.debug:
print("Converting {}.".format(node.value))
return float(node.value)
def visit_factor(self, node, children):
"""
Removes parenthesis if exists and returns what was contained inside.
"""
if self.debug:
print("Factor {}".format(children))
if len(children) == 1:
return children[0]
sign = -1 if children[0] == '-' else 1
return sign * children[-1]
def visit_term(self, node, children):
"""
Divides or multiplies factors.
Factor nodes will be already evaluated.
"""
if self.debug:
print("Term {}".format(children))
term = children[0]
for i in range(2, len(children), 2):
if children[i-1] == "*":
term *= children[i]
else:
term /= children[i]
if self.debug:
print("Term = {}".format(term))
return term
def visit_expression(self, node, children):
"""
Adds or substracts terms.
Term nodes will be already evaluated.
"""
if self.debug:
print("Expression {}".format(children))
expr = 0
start = 0
# Check for unary + or - operator
if text(children[0]) in "+-":
start = 1
for i in range(start, len(children), 2):
if i and children[i - 1] == "-":
expr -= children[i]
else:
expr += children[i]
if self.debug:
print("Expression = {}".format(expr))
return expr
def main(debug=False):
# First we will make a parser - an instance of the calc parser model.
# Parser model is given in the form of PEG notation therefore we
# are using ParserPEG class. Root rule name (parsing expression) is "calc".
parser = ParserPEG(calc_grammar, "calc", debug=debug)
# An expression we want to evaluate
input_expr = "-(4-1)*5+(2+4.67)+5.89/(.2+7)"
# Then parse tree is created out of the input_expr expression.
parse_tree = parser.parse(input_expr)
result = visit_parse_tree(parse_tree, CalcVisitor(debug=debug))
# visit_parse_tree will start semantic analysis.
# In this case semantic analysis will evaluate expression and
# returned value will be evaluated result of the input_expr expression.
print("{} = {}".format(input_expr, result))
if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model
# and parse tree will be created for visualization.
# Checkout current folder for .dot files.
main(debug=False)
......@@ -15,24 +15,7 @@ from __future__ import unicode_literals
from arpeggio import *
from arpeggio.export import PMDOTExporter
from arpeggio.peg import ParserPEG
# Semantic actions
from arpeggio.peg import SemGrammar, sem_rule, sem_sequence, sem_ordered_choice,\
sem_sufix, sem_prefix, sem_strmatch, sem_regex, sem_rule_crossref
sem_actions = {
"peggrammar": SemGrammar(),
"rule": sem_rule,
"ordered_choice": sem_ordered_choice,
"sequence": sem_sequence,
"prefix": sem_prefix,
"sufix": sem_sufix,
"expression": SemanticActionSingleChild(),
"regex": sem_regex,
"str_match": sem_strmatch,
"rule_crossref": sem_rule_crossref
}
from arpeggio.peg import ParserPEG, PEGVisitor
# PEG defined using PEG itself.
......@@ -76,11 +59,13 @@ def main(debug=False):
# Now we will use created parser to parse the same peg_grammar used for
# parser initialization. We can parse peg_grammar because it is specified
# using PEG itself.
parser.parse(peg_grammar)
parse_tree = parser.parse(peg_grammar)
# ASG should be the same as parser.parser_model because semantic
# actions will create PEG parser (tree of ParsingExpressions).
asg = parser.getASG(sem_actions)
asg = visit_parse_tree(parse_tree, PEGVisitor(root_rule_name='peggrammar',
ignore_case=False,
debug=debug))
if debug:
# This graph should be the same as peg_peg_parser_model.dot because
......
......@@ -23,68 +23,52 @@ from __future__ import print_function, unicode_literals
from arpeggio import *
# Grammar rules
def robot(): return Kwd('begin'), ZeroOrMore(command), Kwd('end'), EOF
def command(): return [up, down, left, right]
def up(): return 'up'
def down(): return 'down'
def left(): return 'left'
def right(): return 'right'
# Semantic actions
class Up(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
def robot(): return 'begin', ZeroOrMore(command), 'end', EOF
def command(): return [UP, DOWN, LEFT, RIGHT]
def UP(): return 'up'
def DOWN(): return 'down'
def LEFT(): return 'left'
def RIGHT(): return 'right'
# Semantic actions visitor
class RobotVisitor(PTNodeVisitor):
def visit_robot(self, node, children):
if self.debug:
print("Evaluating position")
position = [0, 0]
for move in children:
position[0] += move[0]
position[1] += move[1]
return position
def visit_command(self, node, children):
if self.debug:
print("Command")
return children[0]
def visit_UP(self, node, children):
if self.debug:
print("Going up")
return (0, 1)
class Down(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
def visit_DOWN(self, node, children):
if self.debug:
print("Going down")
return (0, -1)
class Left(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
def visit_LEFT(self, node, children):
if self.debug:
print("Going left")
return (-1, 0)
class Right(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
def visit_RIGHT(self, node, children):
if self.debug:
print("Going right")
return (1, 0)
class Command(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
print("Command")
return children[0]
class Robot(SemanticAction):
def first_pass(self, parser, node, children):
if parser.debug:
print("Evaluating position")
position = [0, 0]
for move in children:
position[0] += move[0]
position[1] += move[1]
return position
# Connecting rules with semantic actions
robot.sem = Robot()
command.sem = Command()
up.sem = Up()
down.sem = Down()
left.sem = Left()
right.sem = Right()
def main(debug=False):
# Program code
input_program = '''
......@@ -105,10 +89,10 @@ def main(debug=False):
# We create a parse tree out of textual input
parse_tree = parser.parse(input_program)
# getASG will start semantic analysis.
# visit_parse_tree will start semantic analysis.
# In this case semantic analysis will evaluate expression and
# returned value will be the final position of the robot.
result = parser.getASG()
result = visit_parse_tree(parse_tree, RobotVisitor(debug=debug))
if debug:
print("position = ", result)
......
......@@ -33,16 +33,8 @@ LEFT <- 'left';
RIGHT <- 'right';
'''
# Semantic actions
from robot import Up, Down, Left, Right, Command, Robot
semantic_actions = {
'robot': Robot(),
'command': Command(),
'UP': Up(),
'DOWN': Down(),
'LEFT': Left(),
'RIGHT': Right()
}
# Semantic actions visitor
from robot import RobotVisitor
def main(debug=False):
......@@ -57,7 +49,6 @@ def main(debug=False):
end
'''
# First we will make a parser - an instance of the robot parser model.
# Parser model is given in the form of PEG specification therefore we
# are using ParserPEG class.
......@@ -66,10 +57,10 @@ def main(debug=False):
# We create a parse tree out of textual input
parse_tree = parser.parse(input)
# getASG will start semantic analysis.
# visit_parse_tree will start semantic analysis.
# In this case semantic analysis will evaluate expression and
# returned value will be the final position of the robot.
return parser.getASG(sem_actions=semantic_actions)
return visit_parse_tree(parse_tree, RobotVisitor(debug=debug))
if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model
......
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_semantic_action_results
# Purpose: Tests semantic actions based on visitor
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
# Grammar
from arpeggio import ZeroOrMore, OneOrMore, ParserPython,\
PTNodeVisitor, visit_parse_tree, SemanticActionResults
from arpeggio.export import PTDOTExporter
from arpeggio import RegExMatch as _
def grammar(): return first, "a", second
def first(): return [fourth, third], ZeroOrMore(third)
def second(): return OneOrMore(third), "b"
def third(): return [third_str, fourth]
def third_str(): return "3"
def fourth(): return _(r'\d+')
first_sar = None
third_sar = None
class TestVisitor(PTNodeVisitor):
def visit_first(self, node, children):
global first_sar
first_sar = children
def visit_third(self, node, children):
global third_sar
third_sar = children
return 1
def test_semantic_action_results():
global first_sar, third_sar
input = "4 3 3 3 a 3 3 b"
parser = ParserPython(grammar, reduce_tree=False)
result = parser.parse(input)
PTDOTExporter().exportFile(result, 'test_semantic_action_results_pt.dot')
visit_parse_tree(result, TestVisitor(defaults=True))
assert isinstance(first_sar, SemanticActionResults)
assert len(first_sar.third) == 3
assert third_sar.third_str[0] == '3'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment