Commit 71b2e4c6 authored by Igor Dejanovic's avatar Igor Dejanovic

cleanpeg example - Issue 11

parent c7424294
# -*- coding: utf-8 -*-
#######################################################################
# Name: peg.py
# Purpose: Implementing PEG language
# Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2009 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import print_function, unicode_literals
import sys
if sys.version < '3':
text = unicode
else:
text = str
import copy
from arpeggio import *
from arpeggio import RegExMatch as _
#from arpeggio.export import PMDOTExporter, PTDOTExporter
__all__ = ['ParserPEG']
# PEG syntax rules
def peggrammar(): return OneOrMore(rule), EOF
def rule(): return rule_name, ASSIGNMENT, ordered_choice
def ordered_choice(): return sequence, ZeroOrMore(SLASH, sequence)
def sequence(): return OneOrMore(prefix)
def prefix(): return Optional([AND,NOT]), sufix
def sufix(): return expression, Optional([QUESTION, STAR, PLUS])
def expression(): return [regex, rule_crossref,
(OPEN, ordered_choice, CLOSE),
str_match], Not(ASSIGNMENT)
# PEG Lexical rules
def ASSIGNMENT(): return "="
def SLASH(): return "/"
def STAR(): return "*"
def QUESTION(): return "?"
def PLUS(): return "+"
def AND(): return "&"
def NOT(): return "!"
def OPEN(): return "("
def CLOSE(): return ")"
def regex(): return "r'", _(r"(\\\'|[^\'])*"),"'"
def rule_name(): return _(r"[a-zA-Z_]([a-zA-Z_]|[0-9])*")
def rule_crossref(): return rule_name
#def literal(): return [_(r"\'(\\\'|[^\'])*\'"),_(r'"[^"]*"')]
def str_match(): return _(r'(\'(\\\'|[^\'])*\')|("[^"]*")')
def comment(): return "//", _(".*\n")
# ------------------------------------------------------------------
# PEG Semantic Actions
class SemGrammar(SemanticAction):
def first_pass(self, parser, node, children):
return parser.peg_rules[parser.root_rule_name]
def _resolve(self, parser, node):
def get_rule_by_name(rule_name):
if rule_name in parser.peg_rules:
return parser.peg_rules[rule_name]
else:
raise SemanticError("Rule \"{}\" does not exists."
.format(rule_name))
for i, n in enumerate(node.nodes):
if isinstance(n, CrossRef):
rule_name = n.rule_name
if parser.debug:
print("Resolving crossref {}".format(rule_name))
resolved_rule = get_rule_by_name(rule_name)
while type(resolved_rule) is CrossRef:
target_rule = resolved_rule.rule_name
resolved_rule = get_rule_by_name(target_rule)
# If resolved rule hasn't got the same name it
# should be cloned and preserved in the peg_rules cache
if resolved_rule.rule_name != rule_name:
resolved_rule = copy.copy(resolved_rule)
resolved_rule.rule_name = rule_name
parser.peg_rules[rule_name] = resolved_rule
if parser.debug:
print("Resolving: cloned to {} = > {}"\
.format(resolved_rule.rule_name, resolved_rule.name))
node.nodes[i] = resolved_rule
else:
resolved_rule = n
if not resolved_rule in self.resolved:
self.resolved.add(resolved_rule)
self._resolve(parser, resolved_rule)
def second_pass(self, parser, node):
'''
Resolving cross-references in second pass.
'''
if parser.debug:
print("Second pass:", type(node), text(node))
self.resolved = set()
self._resolve(parser, node)
return node
peggrammar.sem = SemGrammar()
def sem_rule(parser, node, children):
rule_name = children[0]
if len(children) > 2:
retval = Sequence(nodes=children[1:])
else:
retval = children[1]
# CrossRef already has rule_name set
# that attrib is a target rule name
if type(retval) is not CrossRef:
retval.rule_name = rule_name
retval.root = True
if not hasattr(parser, "peg_rules"):
parser.peg_rules = {} # Used for linking phase
parser.peg_rules["EOF"] = EndOfFile()
# Keep a map of parser rules for cross reference
# resolving.
parser.peg_rules[rule_name] = retval
return retval
rule.sem = sem_rule
def sem_sequence(parser, node, children):
if len(children) > 1:
return Sequence(nodes=children[:])
else:
# If only one child rule exists reduce.
return children[0]
sequence.sem = sem_sequence
def sem_ordered_choice(parser, node, children):
if len(children) > 1:
retval = OrderedChoice(nodes=children[:])
else:
# If only one child rule exists reduce.
retval = children[0]
return retval
ordered_choice.sem = sem_ordered_choice
def sem_prefix(parser, node, children):
if len(children) == 2:
if children[0] == NOT():
retval = Not()
else:
retval = And()
if type(children[1]) is list:
retval.nodes = children[1]
else:
retval.nodes = [children[1]]
else:
# If there is no optional prefix reduce.
retval = children[0]
return retval
prefix.sem = sem_prefix
def sem_sufix(parser, node, children):
if len(children) == 2:
if children[1] == STAR():
retval = ZeroOrMore(children[0])
elif children[1] == QUESTION():
retval = Optional(children[0])
else:
retval = OneOrMore(children[0])
if type(children[0]) is list:
retval.nodes = children[0]
else:
retval.nodes = [children[0]]
else:
retval = children[0]
return retval
sufix.sem = sem_sufix
def sem_rule_crossref(parser, node, children):
return CrossRef(node.value)
rule_crossref.sem = sem_rule_crossref
def sem_regex(parser, node, children):
match = RegExMatch(children[0],
ignore_case=parser.ignore_case)
match.compile()
return match
regex.sem = sem_regex
def sem_strmatch(parser, node, children):
match_str = node.value[1:-1]
match_str = match_str.replace("\\'", "'")
match_str = match_str.replace("\\\\", "\\")
return StrMatch(match_str, ignore_case=parser.ignore_case)
str_match.sem = sem_strmatch
expression.sem = SemanticActionSingleChild()
class ParserPEG(Parser):
def __init__(self, language_def, root_rule_name, comment_rule_name=None,
*args, **kwargs):
super(ParserPEG, self).__init__(*args, **kwargs)
self.root_rule_name = root_rule_name
# PEG Abstract Syntax Graph
self.parser_model = self._from_peg(language_def)
# In debug mode export parser model to dot for
# visualization
if self.debug:
from arpeggio.export import PMDOTExporter
root_rule = self.parser_model.rule_name
PMDOTExporter().exportFile(self.parser_model,
"{}_peg_parser_model.dot".format(root_rule))
# Comments should be optional and there can be more of them
if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore):
self.comments_model.root = True
self.comments_model.rule_name = comment_rule_name
def _parse(self):
return self.parser_model.parse(self)
def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False, debug=self.debug)
parser.root_rule_name = self.root_rule_name
parser.parse(language_def)
return parser.getASG()
......@@ -14,19 +14,19 @@
#######################################################################
from __future__ import absolute_import, unicode_literals, print_function
from arpeggio.peg import ParserPEG
from arpeggio.cleanpeg import ParserPEG
# Semantic actions
from calc import to_floatSA, factorSA, termSA, exprSA
# Grammar is defined using textual specification based on PEG language.
calc_grammar = """
number <- r'\d*\.\d*|\d+';
factor <- ("+" / "-")?
(number / "(" expression ")");
term <- factor (( "*" / "/") factor)*;
expression <- term (("+" / "-") term)*;
calc <- expression+ EOF;
number = r'\d*\.\d*|\d+'
factor = ("+" / "-")?
(number / "(" expression ")")
term = factor (( "*" / "/") factor)*
expression = term (("+" / "-") term)*
calc = expression+ EOF
"""
# Rules are mapped to semantic actions
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment