Commit 2951f542 authored by Igor Dejanovic's avatar Igor Dejanovic

refs #4, #6 Support for lexical rules and NonTerminal/Terminal str/repr

Introduced grammar rules decorator. Combine decorator is used to
mark part of the grammar as lexical rule. This rule will always
return Terminal, whitespaces will be preserved, and comments will
not get matched.
parent 713b5f2f
......@@ -151,9 +151,12 @@ class ParsingExpression(object):
def _parse_intro(self, parser):
if parser.debug:
print "Parsing %s" % self.name
# Skip whitespaces if we are not in the lexical rule
if not parser._in_lex_rule:
parser._skip_ws()
# Set the begining position in input stream of
# Set the begining position in the input stream of
# this parsing expression
self.c_pos = parser.position
......@@ -165,7 +168,7 @@ class ParsingExpression(object):
c_pos = self.c_pos
# Memoization.
# If this position is already parsed by this parser expression than use
# If this position is already parsed by this parser expression use
# the result
if c_pos in self.result_cache:
if parser.debug:
......@@ -181,7 +184,9 @@ class ParsingExpression(object):
result = self._parse(parser)
if result:
# Create terminal or non-terminal if result is not
# already a Terminal.
if result and not isinstance(result, Terminal):
if parser.reduce_tree:
if isinstance(result, list):
if self.root:
......@@ -360,6 +365,44 @@ class Empty(SyntaxPredicate):
pass
class Decorator(ParsingExpression):
"""
Decorator are special kind of parsing expression used to mark
a containing pexpression and give it some special semantics.
For example, decorators are used to mark pexpression as lexical
rules (see :class:Lex).
"""
class Combine(Decorator):
"""
This decorator defines pexpression that represents a lexeme rule.
This rules will always return a Terminal parse tree node.
Whitespaces will be preserved. Comments will not be matched.
"""
def _parse(self, parser):
results = []
parser._in_lex_rule = True
self.c_pos = parser.position
try:
for parser_model_node in self.nodes:
results.append(parser_model_node.parse(parser))
results = flatten(results)
# Create terminal from result
return Terminal(self.rule if self.root else '', self.c_pos, \
"".join([str(result) for result in results]))
except NoMatch:
parser.position = self.c_pos # Backtracking
raise
finally:
parser._in_lex_rule = False
return results
class Match(ParsingExpression):
"""
Base class for all classes that will try to match something from the input.
......@@ -375,14 +418,15 @@ class Match(ParsingExpression):
self._parse_intro(parser)
if parser._in_parse_comment:
return self._parse(parser)
comments = []
try:
match = self._parse(parser)
except NoMatch, nm:
# If not matched try to match comment
# If not matched and not in lexical rule try to match comment
#TODO: Comment handling refactoring. Should think of better way to
# handle comments.
if parser.comments_model:
if not parser._in_lex_rule and parser.comments_model:
try:
parser._in_parse_comment = True
while True:
......@@ -492,7 +536,7 @@ class EndOfFile(Match):
def _parse(self, parser):
if len(parser.input) == parser.position:
return Terminal(self.rule if self.root else '', self.c_pos, 'EOF')
return Terminal('EOF', self.c_pos, '')
else:
if parser.debug:
print "EOF not matched."
......@@ -535,6 +579,8 @@ class Terminal(ParseTreeNode):
Leaf node of the Parse Tree. Represents matched string.
Attributes:
type (str): The name of the rule that created this terminal.
position (int): A position in the input stream where match occurred.
value (str): Matched string at the given position or missing token
name in the case of an error node.
"""
......@@ -544,11 +590,17 @@ class Terminal(ParseTreeNode):
@property
def desc(self):
if self.value:
return "%s '%s' [%s]" % (self.type, self.value, self.position)
else:
return "%s [%s]" % (self.type, self.position)
def __str__(self):
return self.value
def __repr__(self):
return self.desc
def __eq__(self, other):
return str(self) == str(other)
......@@ -573,7 +625,10 @@ class NonTerminal(ParseTreeNode):
return iter(self.nodes)
def __str__(self):
return "[ %s ]" % ", ".join([str(x) for x in self.nodes])
return "".join([str(x) for x in self.nodes])
def __repr__(self):
return "[ %s ]" % ", ".join([repr(x) for x in self.nodes])
# ----------------------------------------------------
......@@ -625,6 +680,7 @@ class Parser(object):
self.parse_tree = None
self._in_parse_comment = False
self._in_lex_rule = False
def parse(self, _input):
self.position = 0 # Input position
......@@ -823,7 +879,8 @@ class ParserPython(Parser):
retval = expression
elif isinstance(expression, Repetition) or \
isinstance(expression, SyntaxPredicate):
isinstance(expression, SyntaxPredicate) or \
isinstance(expression, Decorator):
retval = expression
retval.nodes.append(inner_from_python(retval.elements))
if any((isinstance(x, CrossRef) for x in retval.nodes)):
......
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_decorator_combine
# Purpose: Test for Combine decorator. Combine decorator
# results in Terminal parse tree node. Whitespaces are
# preserved (they are not skipped) and comments are not matched.
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from unittest import TestCase
from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NonTerminal, Terminal, NoMatch, Combine
from arpeggio.peg import ParserPEG
class TestDecoratorCombine(TestCase):
def test_combine_python(self):
# This will result in NonTerminal node
def root(): return my_rule(), "."
# This will result in Terminal node
def my_rule(): return Combine(ZeroOrMore("a"), OneOrMore("b"))
parser = ParserPython(root, debug=True)
input1 = "abbb."
# Whitespaces are preserved in lexical rules so the following input
# should not be recognized.
input2 = "a b bb."
ptree1 = parser.parse(input1)
def fail_nm():
ptree2 = parser.parse(input2)
self.assertRaises(NoMatch, fail_nm)
self.assertIsInstance(ptree1, NonTerminal)
self.assertIsInstance(ptree1.nodes[0], Terminal)
self.assertEqual(ptree1.nodes[0].value, "abbb")
......@@ -19,8 +19,10 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar)
parsed = str(parser.parse("a b c"))
self.assertEqual(parsed, "[ a, b, c ]")
parsed = parser.parse("a b c")
self.assertEqual(str(parsed), "abc")
self.assertEqual(repr(parsed), "[ 'a' [0], 'b' [2], 'c' [4] ]")
def test_ordered_choice(self):
......@@ -28,11 +30,14 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar)
parsed = str(parser.parse("b"))
self.assertEqual(parsed, "[ b, EOF ]")
parsed = parser.parse("b")
self.assertEqual(str(parsed), "b")
self.assertEqual(repr(parsed), "[ 'b' [0], EOF [1] ]")
parsed = str(parser.parse("c"))
self.assertEqual(parsed, "[ c, EOF ]")
parsed = parser.parse("c")
self.assertEqual(str(parsed), "c")
self.assertEqual(repr(parsed), "[ 'c' [0], EOF [1] ]")
self.assertRaises(NoMatch, lambda: parser.parse("ab"))
self.assertRaises(NoMatch, lambda: parser.parse("bb"))
......@@ -43,11 +48,15 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar)
parsed = str(parser.parse("aaaaaaa"))
self.assertEqual(parsed, "[ a, a, a, a, a, a, a, EOF ]")
parsed = parser.parse("aaaaaaa")
self.assertEqual(str(parsed), "aaaaaaa")
self.assertEqual(repr(parsed), "[ 'a' [0], 'a' [1], 'a' [2], 'a' [3], 'a' [4], 'a' [5], 'a' [6], EOF [7] ]")
parsed = parser.parse("")
parsed = str(parser.parse(""))
self.assertEqual(parsed, "[ EOF ]")
self.assertEqual(str(parsed), "")
self.assertEqual(repr(parsed), "[ EOF [0] ]")
self.assertRaises(NoMatch, lambda: parser.parse("bbb"))
......@@ -57,8 +66,10 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar)
parsed = str(parser.parse("aaaaaaa"))
self.assertEqual(parsed, "[ a, a, a, a, a, a, a ]")
parsed = parser.parse("aaaaaaa")
self.assertEqual(str(parsed), "aaaaaaa")
self.assertEqual(repr(parsed), "[ 'a' [0], 'a' [1], 'a' [2], 'a' [3], 'a' [4], 'a' [5], 'a' [6] ]")
self.assertRaises(NoMatch, lambda: parser.parse(""))
self.assertRaises(NoMatch, lambda: parser.parse("bbb"))
......@@ -69,11 +80,15 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar)
parsed = str(parser.parse("ab"))
self.assertEqual(parsed, "[ a, b, EOF ]")
parsed = parser.parse("ab")
self.assertEqual(str(parsed), "ab")
self.assertEqual(repr(parsed), "[ 'a' [0], 'b' [1], EOF [2] ]")
parsed = str(parser.parse("b"))
self.assertEqual(parsed, "[ b, EOF ]")
parsed = parser.parse("b")
self.assertEqual(str(parsed), "b")
self.assertEqual(repr(parsed), "[ 'b' [0], EOF [1] ]")
self.assertRaises(NoMatch, lambda: parser.parse("aab"))
self.assertRaises(NoMatch, lambda: parser.parse(""))
......@@ -87,8 +102,9 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar)
parsed = str(parser.parse("ab"))
self.assertEqual(parsed, "[ a, b, EOF ]")
parsed = parser.parse("ab")
self.assertEqual(str(parsed), "ab")
self.assertEqual(repr(parsed), "[ 'a' [0], 'b' [1], EOF [2] ]")
# 'And' will try to match 'b' and fail so 'c' will never get matched
self.assertRaises(NoMatch, lambda: parser.parse("ac"))
......@@ -101,8 +117,10 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar)
parsed = str(parser.parse("ac"))
self.assertEqual(parsed, "[ a, c, EOF ]")
parsed = parser.parse("ac")
self.assertEqual(str(parsed), "ac")
self.assertEqual(repr(parsed), "[ 'a' [0], 'c' [1], EOF [2] ]")
# Not will will fail on 'b'
self.assertRaises(NoMatch, lambda: parser.parse("ab"))
......
......@@ -37,7 +37,8 @@ class TestPEGParser(TestCase):
result = parser.parse(input)
self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ [ [ 4 ] ], +, [ [ 5 ], *, [ 7 ], /, [ 3.45 ], *, [ -, 45 ], *, [ (, [ [ [ 2.56 ] ], +, [ [ 32 ] ] ], ) ], /, [ -, 56 ], *, [ (, [ [ [ 2 ] ], -, [ [ 1.34 ] ] ], ) ] ] ], EOF ]")
self.assertEqual(str(result), "4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ [ [ number '4' [0] ] ], '+' [1], [ [ number '5' [2] ], '*' [3], [ number '7' [4] ], '/' [5], [ number '3.45' [6] ], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ [ [ number '2.56' [16] ] ], '+' [20], [ [ number '32' [21] ] ] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ [ [ number '2' [30] ] ], '-' [31], [ [ number '1.34' [32] ] ] ], ')' [36] ] ] ], EOF [37] ]")
def test_reduce_tree(self):
......@@ -47,4 +48,5 @@ class TestPEGParser(TestCase):
self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ 4, +, [ 5, *, 7, /, 3.45, *, [ -, 45 ], *, [ (, [ 2.56, +, 32 ], ) ], /, [ -, 56 ], *, [ (, [ 2, -, 1.34 ], ) ] ] ], EOF ]" )
self.assertEqual(str(result),"4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ number '4' [0], '+' [1], [ number '5' [2], '*' [3], number '7' [4], '/' [5], number '3.45' [6], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ number '2.56' [16], '+' [20], number '32' [21] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ number '2' [30], '-' [31], number '1.34' [32] ], ')' [36] ] ] ], EOF [37] ]")
......@@ -41,7 +41,8 @@ class TestPythonParser(TestCase):
result = parser.parse(input)
self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ [ [ 4 ] ], +, [ [ 5 ], *, [ 7 ], /, [ 3.45 ], *, [ -, 45 ], *, [ (, [ [ [ 2.56 ] ], +, [ [ 32 ] ] ], ) ], /, [ -, 56 ], *, [ (, [ [ [ 2 ] ], -, [ [ 1.34 ] ] ], ) ] ] ], EOF ]")
self.assertEqual(str(result), "4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ [ [ number '4' [0] ] ], '+' [1], [ [ number '5' [2] ], '*' [3], [ number '7' [4] ], '/' [5], [ number '3.45' [6] ], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ [ [ number '2.56' [16] ] ], '+' [20], [ [ number '32' [21] ] ] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ [ [ number '2' [30] ] ], '-' [31], [ [ number '1.34' [32] ] ] ], ')' [36] ] ] ], EOF [37] ]")
def test_reduce_tree(self):
......@@ -51,5 +52,6 @@ class TestPythonParser(TestCase):
self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ 4, +, [ 5, *, 7, /, 3.45, *, [ -, 45 ], *, [ (, [ 2.56, +, 32 ], ) ], /, [ -, 56 ], *, [ (, [ 2, -, 1.34 ], ) ] ] ], EOF ]" )
self.assertEqual(str(result),"4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ number '4' [0], '+' [1], [ number '5' [2], '*' [3], number '7' [4], '/' [5], number '3.45' [6], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ number '2.56' [16], '+' [20], number '32' [21] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ number '2' [30], '-' [31], number '1.34' [32] ], ')' [36] ] ] ], EOF [37] ]")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment