Commit 0a990e05 authored by Igor Dejanovic's avatar Igor Dejanovic

Fixing PEG language reference resolving and simplifying cleanpeg module.

cleanpeg module now uses semantic action defined in peg module.
parent effa6a1e
...@@ -1266,8 +1266,8 @@ class CrossRef(object): ...@@ -1266,8 +1266,8 @@ class CrossRef(object):
''' '''
Used for rule reference resolving. Used for rule reference resolving.
''' '''
def __init__(self, rule_name, position=-1): def __init__(self, target_rule_name, position=-1):
self.rule_name = rule_name self.target_rule_name = target_rule_name
self.position = position self.position = position
...@@ -1322,7 +1322,7 @@ class ParserPython(Parser): ...@@ -1322,7 +1322,7 @@ class ParserPython(Parser):
self.__cross_refs += 1 self.__cross_refs += 1
if self.debug: if self.debug:
print("CrossRef usage: {}" print("CrossRef usage: {}"
.format(c_rule.rule_name)) .format(c_rule.target_rule_name))
return c_rule return c_rule
# Semantic action for the rule # Semantic action for the rule
...@@ -1398,7 +1398,7 @@ class ParserPython(Parser): ...@@ -1398,7 +1398,7 @@ class ParserPython(Parser):
for i, node in enumerate(e.nodes): for i, node in enumerate(e.nodes):
if isinstance(node, CrossRef): if isinstance(node, CrossRef):
self.__cross_refs -= 1 self.__cross_refs -= 1
e.nodes[i] = __rule_cache[node.rule_name] e.nodes[i] = __rule_cache[node.target_rule_name]
parser_model = inner_from_python(expression) parser_model = inner_from_python(expression)
resolve() resolve()
......
...@@ -3,23 +3,18 @@ ...@@ -3,23 +3,18 @@
# Name: cleanpeg.py # Name: cleanpeg.py
# Purpose: This module is a variation of the original peg.py. # Purpose: This module is a variation of the original peg.py.
# The syntax is slightly changed to be more readable and familiar to # The syntax is slightly changed to be more readable and familiar to
# python users. It is based on the Yash's suggestion - issue #11 # python users. It is based on the Yash's suggestion - issue 11
# Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com> # Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com> # Copyright: (c) 2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import sys
if sys.version < '3':
text = unicode
else:
text = str
import copy
from arpeggio import * from arpeggio import *
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
#from arpeggio.export import PMDOTExporter, PTDOTExporter from .peg import sem_actions
from .peg import ParserPEG as ParserPEGOrig
__all__ = ['ParserPEG'] __all__ = ['ParserPEG']
...@@ -52,187 +47,15 @@ def rule_crossref(): return rule_name ...@@ -52,187 +47,15 @@ def rule_crossref(): return rule_name
def str_match(): return _(r'(\'(\\\'|[^\'])*\')|("[^"]*")') def str_match(): return _(r'(\'(\\\'|[^\'])*\')|("[^"]*")')
def comment(): return _("#.*\n") def comment(): return _("#.*\n")
# ------------------------------------------------------------------
# PEG Semantic Actions
class SemGrammar(SemanticAction):
def first_pass(self, parser, node, children):
return parser.peg_rules[parser.root_rule_name]
def _resolve(self, parser, node):
def get_rule_by_name(rule_name):
if rule_name in parser.peg_rules:
return parser.peg_rules[rule_name]
else:
raise SemanticError("Rule \"{}\" does not exists."
.format(rule_name))
for i, n in enumerate(node.nodes):
if isinstance(n, CrossRef):
rule_name = n.rule_name
if parser.debug:
print("Resolving crossref {}".format(rule_name))
resolved_rule = get_rule_by_name(rule_name)
while type(resolved_rule) is CrossRef:
target_rule = resolved_rule.rule_name
resolved_rule = get_rule_by_name(target_rule)
# If resolved rule hasn't got the same name it
# should be cloned and preserved in the peg_rules cache
if resolved_rule.rule_name != rule_name:
resolved_rule = copy.copy(resolved_rule)
resolved_rule.rule_name = rule_name
parser.peg_rules[rule_name] = resolved_rule
if parser.debug:
print("Resolving: cloned to {} = > {}"\
.format(resolved_rule.rule_name, resolved_rule.name))
node.nodes[i] = resolved_rule
else:
resolved_rule = n
if not resolved_rule in self.resolved:
self.resolved.add(resolved_rule)
self._resolve(parser, resolved_rule)
def second_pass(self, parser, node):
'''
Resolving cross-references in second pass.
'''
if parser.debug:
print("Second pass:", type(node), text(node))
self.resolved = set()
self._resolve(parser, node)
return node
peggrammar.sem = SemGrammar()
def sem_rule(parser, node, children):
rule_name = children[0]
if len(children) > 2:
retval = Sequence(nodes=children[1:])
else:
retval = children[1]
# CrossRef already has rule_name set
# that attrib is a target rule name
if type(retval) is not CrossRef:
retval.rule_name = rule_name
retval.root = True
if not hasattr(parser, "peg_rules"):
parser.peg_rules = {} # Used for linking phase
parser.peg_rules["EOF"] = EndOfFile()
# Keep a map of parser rules for cross reference
# resolving.
parser.peg_rules[rule_name] = retval
return retval
rule.sem = sem_rule
def sem_sequence(parser, node, children):
if len(children) > 1:
return Sequence(nodes=children[:])
else:
# If only one child rule exists reduce.
return children[0]
sequence.sem = sem_sequence
def sem_ordered_choice(parser, node, children):
if len(children) > 1:
retval = OrderedChoice(nodes=children[:])
else:
# If only one child rule exists reduce.
retval = children[0]
return retval
ordered_choice.sem = sem_ordered_choice
def sem_prefix(parser, node, children):
if len(children) == 2:
if children[0] == NOT():
retval = Not()
else:
retval = And()
if type(children[1]) is list:
retval.nodes = children[1]
else:
retval.nodes = [children[1]]
else:
# If there is no optional prefix reduce.
retval = children[0]
return retval
prefix.sem = sem_prefix
def sem_sufix(parser, node, children):
if len(children) == 2:
if children[1] == STAR():
retval = ZeroOrMore(children[0])
elif children[1] == QUESTION():
retval = Optional(children[0])
else:
retval = OneOrMore(children[0])
if type(children[0]) is list:
retval.nodes = children[0]
else:
retval.nodes = [children[0]]
else:
retval = children[0]
return retval
sufix.sem = sem_sufix
def sem_rule_crossref(parser, node, children):
return CrossRef(node.value)
rule_crossref.sem = sem_rule_crossref
def sem_regex(parser, node, children):
match = RegExMatch(children[0],
ignore_case=parser.ignore_case)
match.compile()
return match
regex.sem = sem_regex
def sem_strmatch(parser, node, children):
match_str = node.value[1:-1]
match_str = match_str.replace("\\'", "'")
match_str = match_str.replace("\\\\", "\\")
return StrMatch(match_str, ignore_case=parser.ignore_case)
str_match.sem = sem_strmatch
expression.sem = SemanticActionSingleChild()
class ParserPEG(Parser):
def __init__(self, language_def, root_rule_name, comment_rule_name=None,
*args, **kwargs):
super(ParserPEG, self).__init__(*args, **kwargs)
self.root_rule_name = root_rule_name
# PEG Abstract Syntax Graph
self.parser_model = self._from_peg(language_def)
# In debug mode export parser model to dot for
# visualization
if self.debug:
from arpeggio.export import PMDOTExporter
root_rule = self.parser_model.rule_name
PMDOTExporter().exportFile(self.parser_model,
"{}_peg_parser_model.dot".format(root_rule))
# Comments should be optional and there can be more of them
if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore):
self.comments_model.root = True
self.comments_model.rule_name = comment_rule_name
def _parse(self):
return self.parser_model.parse(self)
class ParserPEG(ParserPEGOrig):
def _from_peg(self, language_def): def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False, debug=self.debug) parser = ParserPython(peggrammar, comment, reduce_tree=False,
debug=self.debug)
parser.root_rule_name = self.root_rule_name parser.root_rule_name = self.root_rule_name
parser.parse(language_def) parser.parse(language_def)
return parser.getASG() # Initialise cross-ref counter
parser._crossref_cnt = 0
return parser.getASG(sem_actions=sem_actions)
...@@ -17,7 +17,6 @@ else: ...@@ -17,7 +17,6 @@ else:
import copy import copy
from arpeggio import * from arpeggio import *
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
#from arpeggio.export import PMDOTExporter, PTDOTExporter
__all__ = ['ParserPEG'] __all__ = ['ParserPEG']
...@@ -52,11 +51,19 @@ def comment(): return "//", _(".*\n") ...@@ -52,11 +51,19 @@ def comment(): return "//", _(".*\n")
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# PEG Semantic Actions # PEG Semantic Actions
class SemGrammar(SemanticAction): class SemGrammar(SemanticAction):
def first_pass(self, parser, node, children): def first_pass(self, parser, node, children):
return parser.peg_rules[parser.root_rule_name] # Find root rule
for rule in children:
print("RULE", rule.rule_name)
if rule.rule_name == parser.root_rule_name:
self.resolved = set()
resolved_rule = self._resolve(parser, rule)
return resolved_rule
assert False, "Root rule not found!"
def _resolve(self, parser, node): def _resolve(self, parser, node):
...@@ -67,14 +74,12 @@ class SemGrammar(SemanticAction): ...@@ -67,14 +74,12 @@ class SemGrammar(SemanticAction):
raise SemanticError("Rule \"{}\" does not exists." raise SemanticError("Rule \"{}\" does not exists."
.format(rule_name)) .format(rule_name))
for i, n in enumerate(node.nodes): def resolve_rule_by_name(rule_name):
if isinstance(n, CrossRef):
rule_name = n.rule_name
if parser.debug: if parser.debug:
print("Resolving crossref {}".format(rule_name)) print("Resolving crossref {}".format(rule_name))
resolved_rule = get_rule_by_name(rule_name) resolved_rule = get_rule_by_name(rule_name)
while type(resolved_rule) is CrossRef: while type(resolved_rule) is CrossRef:
target_rule = resolved_rule.rule_name target_rule = resolved_rule.target_rule_name
resolved_rule = get_rule_by_name(target_rule) resolved_rule = get_rule_by_name(target_rule)
# If resolved rule hasn't got the same name it # If resolved rule hasn't got the same name it
# should be cloned and preserved in the peg_rules cache # should be cloned and preserved in the peg_rules cache
...@@ -82,29 +87,25 @@ class SemGrammar(SemanticAction): ...@@ -82,29 +87,25 @@ class SemGrammar(SemanticAction):
resolved_rule = copy.copy(resolved_rule) resolved_rule = copy.copy(resolved_rule)
resolved_rule.rule_name = rule_name resolved_rule.rule_name = rule_name
parser.peg_rules[rule_name] = resolved_rule parser.peg_rules[rule_name] = resolved_rule
if parser.debug: if parser.debug:
print("Resolving: cloned to {} = > {}"\ print("Resolving: cloned to {} = > {}"
.format(resolved_rule.rule_name, resolved_rule.name)) .format(resolved_rule.rule_name,
node.nodes[i] = resolved_rule resolved_rule.name))
else: return resolved_rule
resolved_rule = n
if isinstance(node, CrossRef):
if not resolved_rule in self.resolved: # The root rule is a cross-ref
resolved_rule = resolve_rule_by_name(node.target_rule_name)
if resolved_rule not in self.resolved:
self.resolved.add(resolved_rule) self.resolved.add(resolved_rule)
self._resolve(parser, resolved_rule) self._resolve(parser, resolved_rule)
return resolved_rule
def second_pass(self, parser, node): else:
''' # Resolve children nodes
Resolving cross-references in second pass. for i, n in enumerate(node.nodes):
''' node.nodes[i] = self._resolve(parser, n)
if parser.debug: self.resolved.add(node)
print("Second pass:", type(node), text(node)) return node
self.resolved = set()
self._resolve(parser, node)
return node
peggrammar.sem = SemGrammar()
def sem_rule(parser, node, children): def sem_rule(parser, node, children):
...@@ -114,11 +115,8 @@ def sem_rule(parser, node, children): ...@@ -114,11 +115,8 @@ def sem_rule(parser, node, children):
else: else:
retval = children[1] retval = children[1]
# CrossRef already has rule_name set retval.rule_name = rule_name
# that attrib is a target rule name retval.root = True
if type(retval) is not CrossRef:
retval.rule_name = rule_name
retval.root = True
if not hasattr(parser, "peg_rules"): if not hasattr(parser, "peg_rules"):
parser.peg_rules = {} # Used for linking phase parser.peg_rules = {} # Used for linking phase
...@@ -128,7 +126,7 @@ def sem_rule(parser, node, children): ...@@ -128,7 +126,7 @@ def sem_rule(parser, node, children):
# resolving. # resolving.
parser.peg_rules[rule_name] = retval parser.peg_rules[rule_name] = retval
return retval return retval
rule.sem = sem_rule
def sem_sequence(parser, node, children): def sem_sequence(parser, node, children):
if len(children) > 1: if len(children) > 1:
...@@ -136,7 +134,7 @@ def sem_sequence(parser, node, children): ...@@ -136,7 +134,7 @@ def sem_sequence(parser, node, children):
else: else:
# If only one child rule exists reduce. # If only one child rule exists reduce.
return children[0] return children[0]
sequence.sem = sem_sequence
def sem_ordered_choice(parser, node, children): def sem_ordered_choice(parser, node, children):
if len(children) > 1: if len(children) > 1:
...@@ -145,7 +143,7 @@ def sem_ordered_choice(parser, node, children): ...@@ -145,7 +143,7 @@ def sem_ordered_choice(parser, node, children):
# If only one child rule exists reduce. # If only one child rule exists reduce.
retval = children[0] retval = children[0]
return retval return retval
ordered_choice.sem = sem_ordered_choice
def sem_prefix(parser, node, children): def sem_prefix(parser, node, children):
if len(children) == 2: if len(children) == 2:
...@@ -162,7 +160,7 @@ def sem_prefix(parser, node, children): ...@@ -162,7 +160,7 @@ def sem_prefix(parser, node, children):
retval = children[0] retval = children[0]
return retval return retval
prefix.sem = sem_prefix
def sem_sufix(parser, node, children): def sem_sufix(parser, node, children):
if len(children) == 2: if len(children) == 2:
...@@ -180,27 +178,38 @@ def sem_sufix(parser, node, children): ...@@ -180,27 +178,38 @@ def sem_sufix(parser, node, children):
retval = children[0] retval = children[0]
return retval return retval
sufix.sem = sem_sufix
def sem_rule_crossref(parser, node, children): def sem_rule_crossref(parser, node, children):
return CrossRef(node.value) return CrossRef(node.value)
rule_crossref.sem = sem_rule_crossref
def sem_regex(parser, node, children): def sem_regex(parser, node, children):
match = RegExMatch(children[0], match = RegExMatch(children[0],
ignore_case=parser.ignore_case) ignore_case=parser.ignore_case)
match.compile() match.compile()
return match return match
regex.sem = sem_regex
def sem_strmatch(parser, node, children): def sem_strmatch(parser, node, children):
match_str = node.value[1:-1] match_str = node.value[1:-1]
match_str = match_str.replace("\\'", "'") match_str = match_str.replace("\\'", "'")
match_str = match_str.replace("\\\\", "\\") match_str = match_str.replace("\\\\", "\\")
return StrMatch(match_str, ignore_case=parser.ignore_case) return StrMatch(match_str, ignore_case=parser.ignore_case)
str_match.sem = sem_strmatch
expression.sem = SemanticActionSingleChild()
sem_actions = {
'peggrammar': SemGrammar(),
'rule': sem_rule,
'sequence': sem_sequence,
'ordered_choice': sem_ordered_choice,
'prefix': sem_prefix,
'sufix': sem_sufix,
'rule_crossref': sem_rule_crossref,
'regex': sem_regex,
'str_match': sem_strmatch,
'expression': SemanticActionSingleChild(),
}
class ParserPEG(Parser): class ParserPEG(Parser):
...@@ -217,11 +226,11 @@ class ParserPEG(Parser): ...@@ -217,11 +226,11 @@ class ParserPEG(Parser):
if self.debug: if self.debug:
from arpeggio.export import PMDOTExporter from arpeggio.export import PMDOTExporter
root_rule = self.parser_model.rule_name root_rule = self.parser_model.rule_name
PMDOTExporter().exportFile(self.parser_model, PMDOTExporter().exportFile(
"{}_peg_parser_model.dot".format(root_rule)) self.parser_model, "{}_peg_parser_model.dot".format(root_rule))
# Comments should be optional and there can be more of them # Comments should be optional and there can be more of them
if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore): if self.comments_model:
self.comments_model.root = True self.comments_model.root = True
self.comments_model.rule_name = comment_rule_name self.comments_model.rule_name = comment_rule_name
...@@ -229,8 +238,9 @@ class ParserPEG(Parser): ...@@ -229,8 +238,9 @@ class ParserPEG(Parser):
return self.parser_model.parse(self) return self.parser_model.parse(self)
def _from_peg(self, language_def): def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False, debug=self.debug) parser = ParserPython(peggrammar, comment, reduce_tree=False,
debug=self.debug)
parser.root_rule_name = self.root_rule_name parser.root_rule_name = self.root_rule_name
parser.parse(language_def) parser.parse(language_def)
return parser.getASG() return parser.getASG(sem_actions=sem_actions)
...@@ -63,5 +63,5 @@ if __name__ == "__main__": ...@@ -63,5 +63,5 @@ if __name__ == "__main__":
# In debug mode dot (graphviz) files for parser model # In debug mode dot (graphviz) files for parser model
# and parse tree will be created for visualization. # and parse tree will be created for visualization.
# Checkout current folder for .dot files. # Checkout current folder for .dot files.
main(debug=True) main(debug=False)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals, print_function
import pytest
from arpeggio.cleanpeg import ParserPEG
input = """\
<?php
class Enum {
protected $self = array();
public function __construct( $fun ) {
$args = func_get_args();
for( $i=0, $n=count($args); $i<$n; $i++ )
$this->add($args[$i]);
}
public function __get( $name = null ) {
return $this->self[$name];
}
public function add( $name = null, $enum = null ) {
if( isset($enum) )
$this->self[$name] = $enum;
else
$this->self[$name] = end($this->self) + 1;
}
"""
grammar = """
calc = test
test = visibility ws* function_keyword ws* word ws* arguments* ws*
function = visibility "function" word arguments block
block = "{" ws* r'[^}]*' ws* "}"
arguments = "(" ws* argument* ws* ")"
#$types = array("cappuccino")
#arguments end with optional comma
argument = ( byvalue / byreference ) ("=" value )* ","*
byreference = "&" byvalue
byvalue = variable
#value may be variable or array or string or any php type
value = variable
visibility = "public" / "protected" / "private"
function_keyword = "function"
variable = "$" literal r'[a-zA-Z0-9_]*'
word = r'[a-zA-Z0-9_]+'
literal = r'[a-zA-Z]+'
comment = r'("//.*")|("/\*.*\*/")'
symbol = r'[\W]+'
anyword = r'[\w]*' ws*
ws = r'[\s]+'
"""
def argument(parser, node, children):
"""
Removes parenthesis if exists and returns what was contained inside.
"""
print(children)
if len(children) == 1:
print(children[0])
return children[0]
sign = -1 if children[0] == '-' else 1
return sign * children[-1]
# Rules are mapped to semantic actions
sem_actions = {
"argument": argument,
}
def test_issue_16():
parser = ParserPEG(grammar, "calc", skipws=False)
input_expr = """public function __construct( )"""
parse_tree = parser.parse(input_expr)
# Do semantic analysis. Do not use default actions.
asg = parser.getASG(sem_actions=sem_actions, defaults=False)
assert asg
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment