Commit 0a990e05 authored by Igor Dejanovic's avatar Igor Dejanovic

Fixing PEG language reference resolving and simplifying cleanpeg module.

cleanpeg module now uses semantic action defined in peg module.
parent effa6a1e
......@@ -1266,8 +1266,8 @@ class CrossRef(object):
'''
Used for rule reference resolving.
'''
def __init__(self, rule_name, position=-1):
self.rule_name = rule_name
def __init__(self, target_rule_name, position=-1):
self.target_rule_name = target_rule_name
self.position = position
......@@ -1322,7 +1322,7 @@ class ParserPython(Parser):
self.__cross_refs += 1
if self.debug:
print("CrossRef usage: {}"
.format(c_rule.rule_name))
.format(c_rule.target_rule_name))
return c_rule
# Semantic action for the rule
......@@ -1398,7 +1398,7 @@ class ParserPython(Parser):
for i, node in enumerate(e.nodes):
if isinstance(node, CrossRef):
self.__cross_refs -= 1
e.nodes[i] = __rule_cache[node.rule_name]
e.nodes[i] = __rule_cache[node.target_rule_name]
parser_model = inner_from_python(expression)
resolve()
......
......@@ -3,23 +3,18 @@
# Name: cleanpeg.py
# Purpose: This module is a variation of the original peg.py.
# The syntax is slightly changed to be more readable and familiar to
# python users. It is based on the Yash's suggestion - issue #11
# python users. It is based on the Yash's suggestion - issue 11
# Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import print_function, unicode_literals
import sys
if sys.version < '3':
text = unicode
else:
text = str
import copy
from arpeggio import *
from arpeggio import RegExMatch as _
#from arpeggio.export import PMDOTExporter, PTDOTExporter
from .peg import sem_actions
from .peg import ParserPEG as ParserPEGOrig
__all__ = ['ParserPEG']
......@@ -52,187 +47,15 @@ def rule_crossref(): return rule_name
def str_match(): return _(r'(\'(\\\'|[^\'])*\')|("[^"]*")')
def comment(): return _("#.*\n")
# ------------------------------------------------------------------
# PEG Semantic Actions
class SemGrammar(SemanticAction):
def first_pass(self, parser, node, children):
return parser.peg_rules[parser.root_rule_name]
def _resolve(self, parser, node):
def get_rule_by_name(rule_name):
if rule_name in parser.peg_rules:
return parser.peg_rules[rule_name]
else:
raise SemanticError("Rule \"{}\" does not exists."
.format(rule_name))
for i, n in enumerate(node.nodes):
if isinstance(n, CrossRef):
rule_name = n.rule_name
if parser.debug:
print("Resolving crossref {}".format(rule_name))
resolved_rule = get_rule_by_name(rule_name)
while type(resolved_rule) is CrossRef:
target_rule = resolved_rule.rule_name
resolved_rule = get_rule_by_name(target_rule)
# If resolved rule hasn't got the same name it
# should be cloned and preserved in the peg_rules cache
if resolved_rule.rule_name != rule_name:
resolved_rule = copy.copy(resolved_rule)
resolved_rule.rule_name = rule_name
parser.peg_rules[rule_name] = resolved_rule
if parser.debug:
print("Resolving: cloned to {} = > {}"\
.format(resolved_rule.rule_name, resolved_rule.name))
node.nodes[i] = resolved_rule
else:
resolved_rule = n
if not resolved_rule in self.resolved:
self.resolved.add(resolved_rule)
self._resolve(parser, resolved_rule)
def second_pass(self, parser, node):
'''
Resolving cross-references in second pass.
'''
if parser.debug:
print("Second pass:", type(node), text(node))
self.resolved = set()
self._resolve(parser, node)
return node
peggrammar.sem = SemGrammar()
def sem_rule(parser, node, children):
rule_name = children[0]
if len(children) > 2:
retval = Sequence(nodes=children[1:])
else:
retval = children[1]
# CrossRef already has rule_name set
# that attrib is a target rule name
if type(retval) is not CrossRef:
retval.rule_name = rule_name
retval.root = True
if not hasattr(parser, "peg_rules"):
parser.peg_rules = {} # Used for linking phase
parser.peg_rules["EOF"] = EndOfFile()
# Keep a map of parser rules for cross reference
# resolving.
parser.peg_rules[rule_name] = retval
return retval
rule.sem = sem_rule
def sem_sequence(parser, node, children):
if len(children) > 1:
return Sequence(nodes=children[:])
else:
# If only one child rule exists reduce.
return children[0]
sequence.sem = sem_sequence
def sem_ordered_choice(parser, node, children):
if len(children) > 1:
retval = OrderedChoice(nodes=children[:])
else:
# If only one child rule exists reduce.
retval = children[0]
return retval
ordered_choice.sem = sem_ordered_choice
def sem_prefix(parser, node, children):
if len(children) == 2:
if children[0] == NOT():
retval = Not()
else:
retval = And()
if type(children[1]) is list:
retval.nodes = children[1]
else:
retval.nodes = [children[1]]
else:
# If there is no optional prefix reduce.
retval = children[0]
return retval
prefix.sem = sem_prefix
def sem_sufix(parser, node, children):
if len(children) == 2:
if children[1] == STAR():
retval = ZeroOrMore(children[0])
elif children[1] == QUESTION():
retval = Optional(children[0])
else:
retval = OneOrMore(children[0])
if type(children[0]) is list:
retval.nodes = children[0]
else:
retval.nodes = [children[0]]
else:
retval = children[0]
return retval
sufix.sem = sem_sufix
def sem_rule_crossref(parser, node, children):
return CrossRef(node.value)
rule_crossref.sem = sem_rule_crossref
def sem_regex(parser, node, children):
match = RegExMatch(children[0],
ignore_case=parser.ignore_case)
match.compile()
return match
regex.sem = sem_regex
def sem_strmatch(parser, node, children):
match_str = node.value[1:-1]
match_str = match_str.replace("\\'", "'")
match_str = match_str.replace("\\\\", "\\")
return StrMatch(match_str, ignore_case=parser.ignore_case)
str_match.sem = sem_strmatch
expression.sem = SemanticActionSingleChild()
class ParserPEG(Parser):
def __init__(self, language_def, root_rule_name, comment_rule_name=None,
*args, **kwargs):
super(ParserPEG, self).__init__(*args, **kwargs)
self.root_rule_name = root_rule_name
# PEG Abstract Syntax Graph
self.parser_model = self._from_peg(language_def)
# In debug mode export parser model to dot for
# visualization
if self.debug:
from arpeggio.export import PMDOTExporter
root_rule = self.parser_model.rule_name
PMDOTExporter().exportFile(self.parser_model,
"{}_peg_parser_model.dot".format(root_rule))
# Comments should be optional and there can be more of them
if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore):
self.comments_model.root = True
self.comments_model.rule_name = comment_rule_name
def _parse(self):
return self.parser_model.parse(self)
class ParserPEG(ParserPEGOrig):
def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False, debug=self.debug)
parser = ParserPython(peggrammar, comment, reduce_tree=False,
debug=self.debug)
parser.root_rule_name = self.root_rule_name
parser.parse(language_def)
return parser.getASG()
# Initialise cross-ref counter
parser._crossref_cnt = 0
return parser.getASG(sem_actions=sem_actions)
......@@ -17,7 +17,6 @@ else:
import copy
from arpeggio import *
from arpeggio import RegExMatch as _
#from arpeggio.export import PMDOTExporter, PTDOTExporter
__all__ = ['ParserPEG']
......@@ -52,11 +51,19 @@ def comment(): return "//", _(".*\n")
# ------------------------------------------------------------------
# PEG Semantic Actions
class SemGrammar(SemanticAction):
def first_pass(self, parser, node, children):
return parser.peg_rules[parser.root_rule_name]
# Find root rule
for rule in children:
print("RULE", rule.rule_name)
if rule.rule_name == parser.root_rule_name:
self.resolved = set()
resolved_rule = self._resolve(parser, rule)
return resolved_rule
assert False, "Root rule not found!"
def _resolve(self, parser, node):
......@@ -67,14 +74,12 @@ class SemGrammar(SemanticAction):
raise SemanticError("Rule \"{}\" does not exists."
.format(rule_name))
for i, n in enumerate(node.nodes):
if isinstance(n, CrossRef):
rule_name = n.rule_name
def resolve_rule_by_name(rule_name):
if parser.debug:
print("Resolving crossref {}".format(rule_name))
resolved_rule = get_rule_by_name(rule_name)
while type(resolved_rule) is CrossRef:
target_rule = resolved_rule.rule_name
target_rule = resolved_rule.target_rule_name
resolved_rule = get_rule_by_name(target_rule)
# If resolved rule hasn't got the same name it
# should be cloned and preserved in the peg_rules cache
......@@ -82,29 +87,25 @@ class SemGrammar(SemanticAction):
resolved_rule = copy.copy(resolved_rule)
resolved_rule.rule_name = rule_name
parser.peg_rules[rule_name] = resolved_rule
if parser.debug:
print("Resolving: cloned to {} = > {}"\
.format(resolved_rule.rule_name, resolved_rule.name))
node.nodes[i] = resolved_rule
else:
resolved_rule = n
if not resolved_rule in self.resolved:
print("Resolving: cloned to {} = > {}"
.format(resolved_rule.rule_name,
resolved_rule.name))
return resolved_rule
if isinstance(node, CrossRef):
# The root rule is a cross-ref
resolved_rule = resolve_rule_by_name(node.target_rule_name)
if resolved_rule not in self.resolved:
self.resolved.add(resolved_rule)
self._resolve(parser, resolved_rule)
def second_pass(self, parser, node):
'''
Resolving cross-references in second pass.
'''
if parser.debug:
print("Second pass:", type(node), text(node))
self.resolved = set()
self._resolve(parser, node)
return node
peggrammar.sem = SemGrammar()
return resolved_rule
else:
# Resolve children nodes
for i, n in enumerate(node.nodes):
node.nodes[i] = self._resolve(parser, n)
self.resolved.add(node)
return node
def sem_rule(parser, node, children):
......@@ -114,11 +115,8 @@ def sem_rule(parser, node, children):
else:
retval = children[1]
# CrossRef already has rule_name set
# that attrib is a target rule name
if type(retval) is not CrossRef:
retval.rule_name = rule_name
retval.root = True
retval.rule_name = rule_name
retval.root = True
if not hasattr(parser, "peg_rules"):
parser.peg_rules = {} # Used for linking phase
......@@ -128,7 +126,7 @@ def sem_rule(parser, node, children):
# resolving.
parser.peg_rules[rule_name] = retval
return retval
rule.sem = sem_rule
def sem_sequence(parser, node, children):
if len(children) > 1:
......@@ -136,7 +134,7 @@ def sem_sequence(parser, node, children):
else:
# If only one child rule exists reduce.
return children[0]
sequence.sem = sem_sequence
def sem_ordered_choice(parser, node, children):
if len(children) > 1:
......@@ -145,7 +143,7 @@ def sem_ordered_choice(parser, node, children):
# If only one child rule exists reduce.
retval = children[0]
return retval
ordered_choice.sem = sem_ordered_choice
def sem_prefix(parser, node, children):
if len(children) == 2:
......@@ -162,7 +160,7 @@ def sem_prefix(parser, node, children):
retval = children[0]
return retval
prefix.sem = sem_prefix
def sem_sufix(parser, node, children):
if len(children) == 2:
......@@ -180,27 +178,38 @@ def sem_sufix(parser, node, children):
retval = children[0]
return retval
sufix.sem = sem_sufix
def sem_rule_crossref(parser, node, children):
return CrossRef(node.value)
rule_crossref.sem = sem_rule_crossref
def sem_regex(parser, node, children):
match = RegExMatch(children[0],
ignore_case=parser.ignore_case)
ignore_case=parser.ignore_case)
match.compile()
return match
regex.sem = sem_regex
def sem_strmatch(parser, node, children):
match_str = node.value[1:-1]
match_str = match_str.replace("\\'", "'")
match_str = match_str.replace("\\\\", "\\")
return StrMatch(match_str, ignore_case=parser.ignore_case)
str_match.sem = sem_strmatch
expression.sem = SemanticActionSingleChild()
sem_actions = {
'peggrammar': SemGrammar(),
'rule': sem_rule,
'sequence': sem_sequence,
'ordered_choice': sem_ordered_choice,
'prefix': sem_prefix,
'sufix': sem_sufix,
'rule_crossref': sem_rule_crossref,
'regex': sem_regex,
'str_match': sem_strmatch,
'expression': SemanticActionSingleChild(),
}
class ParserPEG(Parser):
......@@ -217,11 +226,11 @@ class ParserPEG(Parser):
if self.debug:
from arpeggio.export import PMDOTExporter
root_rule = self.parser_model.rule_name
PMDOTExporter().exportFile(self.parser_model,
"{}_peg_parser_model.dot".format(root_rule))
PMDOTExporter().exportFile(
self.parser_model, "{}_peg_parser_model.dot".format(root_rule))
# Comments should be optional and there can be more of them
if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore):
if self.comments_model:
self.comments_model.root = True
self.comments_model.rule_name = comment_rule_name
......@@ -229,8 +238,9 @@ class ParserPEG(Parser):
return self.parser_model.parse(self)
def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False, debug=self.debug)
parser = ParserPython(peggrammar, comment, reduce_tree=False,
debug=self.debug)
parser.root_rule_name = self.root_rule_name
parser.parse(language_def)
return parser.getASG()
return parser.getASG(sem_actions=sem_actions)
......@@ -63,5 +63,5 @@ if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model
# and parse tree will be created for visualization.
# Checkout current folder for .dot files.
main(debug=True)
main(debug=False)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals, print_function
import pytest
from arpeggio.cleanpeg import ParserPEG
input = """\
<?php
class Enum {
protected $self = array();
public function __construct( $fun ) {
$args = func_get_args();
for( $i=0, $n=count($args); $i<$n; $i++ )
$this->add($args[$i]);
}
public function __get( $name = null ) {
return $this->self[$name];
}
public function add( $name = null, $enum = null ) {
if( isset($enum) )
$this->self[$name] = $enum;
else
$this->self[$name] = end($this->self) + 1;
}
"""
grammar = """
calc = test
test = visibility ws* function_keyword ws* word ws* arguments* ws*
function = visibility "function" word arguments block
block = "{" ws* r'[^}]*' ws* "}"
arguments = "(" ws* argument* ws* ")"
#$types = array("cappuccino")
#arguments end with optional comma
argument = ( byvalue / byreference ) ("=" value )* ","*
byreference = "&" byvalue
byvalue = variable
#value may be variable or array or string or any php type
value = variable
visibility = "public" / "protected" / "private"
function_keyword = "function"
variable = "$" literal r'[a-zA-Z0-9_]*'
word = r'[a-zA-Z0-9_]+'
literal = r'[a-zA-Z]+'
comment = r'("//.*")|("/\*.*\*/")'
symbol = r'[\W]+'
anyword = r'[\w]*' ws*
ws = r'[\s]+'
"""
def argument(parser, node, children):
"""
Removes parenthesis if exists and returns what was contained inside.
"""
print(children)
if len(children) == 1:
print(children[0])
return children[0]
sign = -1 if children[0] == '-' else 1
return sign * children[-1]
# Rules are mapped to semantic actions
sem_actions = {
"argument": argument,
}
def test_issue_16():
parser = ParserPEG(grammar, "calc", skipws=False)
input_expr = """public function __construct( )"""
parse_tree = parser.parse(input_expr)
# Do semantic analysis. Do not use default actions.
asg = parser.getASG(sem_actions=sem_actions, defaults=False)
assert asg
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment