Commit 2951f542 authored by Igor Dejanovic's avatar Igor Dejanovic

refs #4, #6 Support for lexical rules and NonTerminal/Terminal str/repr

Introduced grammar rules decorator. Combine decorator is used to
mark part of the grammar as lexical rule. This rule will always
return Terminal, whitespaces will be preserved, and comments will
not get matched.
parent 713b5f2f
...@@ -151,9 +151,12 @@ class ParsingExpression(object): ...@@ -151,9 +151,12 @@ class ParsingExpression(object):
def _parse_intro(self, parser): def _parse_intro(self, parser):
if parser.debug: if parser.debug:
print "Parsing %s" % self.name print "Parsing %s" % self.name
parser._skip_ws()
# Set the begining position in input stream of # Skip whitespaces if we are not in the lexical rule
if not parser._in_lex_rule:
parser._skip_ws()
# Set the begining position in the input stream of
# this parsing expression # this parsing expression
self.c_pos = parser.position self.c_pos = parser.position
...@@ -165,7 +168,7 @@ class ParsingExpression(object): ...@@ -165,7 +168,7 @@ class ParsingExpression(object):
c_pos = self.c_pos c_pos = self.c_pos
# Memoization. # Memoization.
# If this position is already parsed by this parser expression than use # If this position is already parsed by this parser expression use
# the result # the result
if c_pos in self.result_cache: if c_pos in self.result_cache:
if parser.debug: if parser.debug:
...@@ -181,7 +184,9 @@ class ParsingExpression(object): ...@@ -181,7 +184,9 @@ class ParsingExpression(object):
result = self._parse(parser) result = self._parse(parser)
if result: # Create terminal or non-terminal if result is not
# already a Terminal.
if result and not isinstance(result, Terminal):
if parser.reduce_tree: if parser.reduce_tree:
if isinstance(result, list): if isinstance(result, list):
if self.root: if self.root:
...@@ -360,6 +365,44 @@ class Empty(SyntaxPredicate): ...@@ -360,6 +365,44 @@ class Empty(SyntaxPredicate):
pass pass
class Decorator(ParsingExpression):
"""
Decorator are special kind of parsing expression used to mark
a containing pexpression and give it some special semantics.
For example, decorators are used to mark pexpression as lexical
rules (see :class:Lex).
"""
class Combine(Decorator):
"""
This decorator defines pexpression that represents a lexeme rule.
This rules will always return a Terminal parse tree node.
Whitespaces will be preserved. Comments will not be matched.
"""
def _parse(self, parser):
results = []
parser._in_lex_rule = True
self.c_pos = parser.position
try:
for parser_model_node in self.nodes:
results.append(parser_model_node.parse(parser))
results = flatten(results)
# Create terminal from result
return Terminal(self.rule if self.root else '', self.c_pos, \
"".join([str(result) for result in results]))
except NoMatch:
parser.position = self.c_pos # Backtracking
raise
finally:
parser._in_lex_rule = False
return results
class Match(ParsingExpression): class Match(ParsingExpression):
""" """
Base class for all classes that will try to match something from the input. Base class for all classes that will try to match something from the input.
...@@ -375,14 +418,15 @@ class Match(ParsingExpression): ...@@ -375,14 +418,15 @@ class Match(ParsingExpression):
self._parse_intro(parser) self._parse_intro(parser)
if parser._in_parse_comment: if parser._in_parse_comment:
return self._parse(parser) return self._parse(parser)
comments = [] comments = []
try: try:
match = self._parse(parser) match = self._parse(parser)
except NoMatch, nm: except NoMatch, nm:
# If not matched try to match comment # If not matched and not in lexical rule try to match comment
#TODO: Comment handling refactoring. Should think of better way to #TODO: Comment handling refactoring. Should think of better way to
# handle comments. # handle comments.
if parser.comments_model: if not parser._in_lex_rule and parser.comments_model:
try: try:
parser._in_parse_comment = True parser._in_parse_comment = True
while True: while True:
...@@ -492,7 +536,7 @@ class EndOfFile(Match): ...@@ -492,7 +536,7 @@ class EndOfFile(Match):
def _parse(self, parser): def _parse(self, parser):
if len(parser.input) == parser.position: if len(parser.input) == parser.position:
return Terminal(self.rule if self.root else '', self.c_pos, 'EOF') return Terminal('EOF', self.c_pos, '')
else: else:
if parser.debug: if parser.debug:
print "EOF not matched." print "EOF not matched."
...@@ -535,6 +579,8 @@ class Terminal(ParseTreeNode): ...@@ -535,6 +579,8 @@ class Terminal(ParseTreeNode):
Leaf node of the Parse Tree. Represents matched string. Leaf node of the Parse Tree. Represents matched string.
Attributes: Attributes:
type (str): The name of the rule that created this terminal.
position (int): A position in the input stream where match occurred.
value (str): Matched string at the given position or missing token value (str): Matched string at the given position or missing token
name in the case of an error node. name in the case of an error node.
""" """
...@@ -544,11 +590,17 @@ class Terminal(ParseTreeNode): ...@@ -544,11 +590,17 @@ class Terminal(ParseTreeNode):
@property @property
def desc(self): def desc(self):
return "%s '%s' [%s]" % (self.type, self.value, self.position) if self.value:
return "%s '%s' [%s]" % (self.type, self.value, self.position)
else:
return "%s [%s]" % (self.type, self.position)
def __str__(self): def __str__(self):
return self.value return self.value
def __repr__(self):
return self.desc
def __eq__(self, other): def __eq__(self, other):
return str(self) == str(other) return str(self) == str(other)
...@@ -573,7 +625,10 @@ class NonTerminal(ParseTreeNode): ...@@ -573,7 +625,10 @@ class NonTerminal(ParseTreeNode):
return iter(self.nodes) return iter(self.nodes)
def __str__(self): def __str__(self):
return "[ %s ]" % ", ".join([str(x) for x in self.nodes]) return "".join([str(x) for x in self.nodes])
def __repr__(self):
return "[ %s ]" % ", ".join([repr(x) for x in self.nodes])
# ---------------------------------------------------- # ----------------------------------------------------
...@@ -625,6 +680,7 @@ class Parser(object): ...@@ -625,6 +680,7 @@ class Parser(object):
self.parse_tree = None self.parse_tree = None
self._in_parse_comment = False self._in_parse_comment = False
self._in_lex_rule = False
def parse(self, _input): def parse(self, _input):
self.position = 0 # Input position self.position = 0 # Input position
...@@ -823,7 +879,8 @@ class ParserPython(Parser): ...@@ -823,7 +879,8 @@ class ParserPython(Parser):
retval = expression retval = expression
elif isinstance(expression, Repetition) or \ elif isinstance(expression, Repetition) or \
isinstance(expression, SyntaxPredicate): isinstance(expression, SyntaxPredicate) or \
isinstance(expression, Decorator):
retval = expression retval = expression
retval.nodes.append(inner_from_python(retval.elements)) retval.nodes.append(inner_from_python(retval.elements))
if any((isinstance(x, CrossRef) for x in retval.nodes)): if any((isinstance(x, CrossRef) for x in retval.nodes)):
......
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_decorator_combine
# Purpose: Test for Combine decorator. Combine decorator
# results in Terminal parse tree node. Whitespaces are
# preserved (they are not skipped) and comments are not matched.
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from unittest import TestCase
from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NonTerminal, Terminal, NoMatch, Combine
from arpeggio.peg import ParserPEG
class TestDecoratorCombine(TestCase):
def test_combine_python(self):
# This will result in NonTerminal node
def root(): return my_rule(), "."
# This will result in Terminal node
def my_rule(): return Combine(ZeroOrMore("a"), OneOrMore("b"))
parser = ParserPython(root, debug=True)
input1 = "abbb."
# Whitespaces are preserved in lexical rules so the following input
# should not be recognized.
input2 = "a b bb."
ptree1 = parser.parse(input1)
def fail_nm():
ptree2 = parser.parse(input2)
self.assertRaises(NoMatch, fail_nm)
self.assertIsInstance(ptree1, NonTerminal)
self.assertIsInstance(ptree1.nodes[0], Terminal)
self.assertEqual(ptree1.nodes[0].value, "abbb")
...@@ -19,8 +19,10 @@ class TestParsingExpression(TestCase): ...@@ -19,8 +19,10 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar) parser = ParserPython(grammar)
parsed = str(parser.parse("a b c")) parsed = parser.parse("a b c")
self.assertEqual(parsed, "[ a, b, c ]")
self.assertEqual(str(parsed), "abc")
self.assertEqual(repr(parsed), "[ 'a' [0], 'b' [2], 'c' [4] ]")
def test_ordered_choice(self): def test_ordered_choice(self):
...@@ -28,11 +30,14 @@ class TestParsingExpression(TestCase): ...@@ -28,11 +30,14 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar) parser = ParserPython(grammar)
parsed = str(parser.parse("b")) parsed = parser.parse("b")
self.assertEqual(parsed, "[ b, EOF ]")
self.assertEqual(str(parsed), "b")
self.assertEqual(repr(parsed), "[ 'b' [0], EOF [1] ]")
parsed = str(parser.parse("c")) parsed = parser.parse("c")
self.assertEqual(parsed, "[ c, EOF ]") self.assertEqual(str(parsed), "c")
self.assertEqual(repr(parsed), "[ 'c' [0], EOF [1] ]")
self.assertRaises(NoMatch, lambda: parser.parse("ab")) self.assertRaises(NoMatch, lambda: parser.parse("ab"))
self.assertRaises(NoMatch, lambda: parser.parse("bb")) self.assertRaises(NoMatch, lambda: parser.parse("bb"))
...@@ -43,11 +48,15 @@ class TestParsingExpression(TestCase): ...@@ -43,11 +48,15 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar) parser = ParserPython(grammar)
parsed = str(parser.parse("aaaaaaa")) parsed = parser.parse("aaaaaaa")
self.assertEqual(parsed, "[ a, a, a, a, a, a, a, EOF ]")
self.assertEqual(str(parsed), "aaaaaaa")
self.assertEqual(repr(parsed), "[ 'a' [0], 'a' [1], 'a' [2], 'a' [3], 'a' [4], 'a' [5], 'a' [6], EOF [7] ]")
parsed = parser.parse("")
parsed = str(parser.parse("")) self.assertEqual(str(parsed), "")
self.assertEqual(parsed, "[ EOF ]") self.assertEqual(repr(parsed), "[ EOF [0] ]")
self.assertRaises(NoMatch, lambda: parser.parse("bbb")) self.assertRaises(NoMatch, lambda: parser.parse("bbb"))
...@@ -57,8 +66,10 @@ class TestParsingExpression(TestCase): ...@@ -57,8 +66,10 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar) parser = ParserPython(grammar)
parsed = str(parser.parse("aaaaaaa")) parsed = parser.parse("aaaaaaa")
self.assertEqual(parsed, "[ a, a, a, a, a, a, a ]")
self.assertEqual(str(parsed), "aaaaaaa")
self.assertEqual(repr(parsed), "[ 'a' [0], 'a' [1], 'a' [2], 'a' [3], 'a' [4], 'a' [5], 'a' [6] ]")
self.assertRaises(NoMatch, lambda: parser.parse("")) self.assertRaises(NoMatch, lambda: parser.parse(""))
self.assertRaises(NoMatch, lambda: parser.parse("bbb")) self.assertRaises(NoMatch, lambda: parser.parse("bbb"))
...@@ -69,11 +80,15 @@ class TestParsingExpression(TestCase): ...@@ -69,11 +80,15 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar) parser = ParserPython(grammar)
parsed = str(parser.parse("ab")) parsed = parser.parse("ab")
self.assertEqual(parsed, "[ a, b, EOF ]")
self.assertEqual(str(parsed), "ab")
self.assertEqual(repr(parsed), "[ 'a' [0], 'b' [1], EOF [2] ]")
parsed = str(parser.parse("b")) parsed = parser.parse("b")
self.assertEqual(parsed, "[ b, EOF ]")
self.assertEqual(str(parsed), "b")
self.assertEqual(repr(parsed), "[ 'b' [0], EOF [1] ]")
self.assertRaises(NoMatch, lambda: parser.parse("aab")) self.assertRaises(NoMatch, lambda: parser.parse("aab"))
self.assertRaises(NoMatch, lambda: parser.parse("")) self.assertRaises(NoMatch, lambda: parser.parse(""))
...@@ -87,8 +102,9 @@ class TestParsingExpression(TestCase): ...@@ -87,8 +102,9 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar) parser = ParserPython(grammar)
parsed = str(parser.parse("ab")) parsed = parser.parse("ab")
self.assertEqual(parsed, "[ a, b, EOF ]") self.assertEqual(str(parsed), "ab")
self.assertEqual(repr(parsed), "[ 'a' [0], 'b' [1], EOF [2] ]")
# 'And' will try to match 'b' and fail so 'c' will never get matched # 'And' will try to match 'b' and fail so 'c' will never get matched
self.assertRaises(NoMatch, lambda: parser.parse("ac")) self.assertRaises(NoMatch, lambda: parser.parse("ac"))
...@@ -101,8 +117,10 @@ class TestParsingExpression(TestCase): ...@@ -101,8 +117,10 @@ class TestParsingExpression(TestCase):
parser = ParserPython(grammar) parser = ParserPython(grammar)
parsed = str(parser.parse("ac")) parsed = parser.parse("ac")
self.assertEqual(parsed, "[ a, c, EOF ]")
self.assertEqual(str(parsed), "ac")
self.assertEqual(repr(parsed), "[ 'a' [0], 'c' [1], EOF [2] ]")
# Not will will fail on 'b' # Not will will fail on 'b'
self.assertRaises(NoMatch, lambda: parser.parse("ab")) self.assertRaises(NoMatch, lambda: parser.parse("ab"))
......
...@@ -37,7 +37,8 @@ class TestPEGParser(TestCase): ...@@ -37,7 +37,8 @@ class TestPEGParser(TestCase):
result = parser.parse(input) result = parser.parse(input)
self.assertTrue(isinstance(result, NonTerminal)) self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ [ [ 4 ] ], +, [ [ 5 ], *, [ 7 ], /, [ 3.45 ], *, [ -, 45 ], *, [ (, [ [ [ 2.56 ] ], +, [ [ 32 ] ] ], ) ], /, [ -, 56 ], *, [ (, [ [ [ 2 ] ], -, [ [ 1.34 ] ] ], ) ] ] ], EOF ]") self.assertEqual(str(result), "4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ [ [ number '4' [0] ] ], '+' [1], [ [ number '5' [2] ], '*' [3], [ number '7' [4] ], '/' [5], [ number '3.45' [6] ], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ [ [ number '2.56' [16] ] ], '+' [20], [ [ number '32' [21] ] ] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ [ [ number '2' [30] ] ], '-' [31], [ [ number '1.34' [32] ] ] ], ')' [36] ] ] ], EOF [37] ]")
def test_reduce_tree(self): def test_reduce_tree(self):
...@@ -47,4 +48,5 @@ class TestPEGParser(TestCase): ...@@ -47,4 +48,5 @@ class TestPEGParser(TestCase):
self.assertTrue(isinstance(result, NonTerminal)) self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ 4, +, [ 5, *, 7, /, 3.45, *, [ -, 45 ], *, [ (, [ 2.56, +, 32 ], ) ], /, [ -, 56 ], *, [ (, [ 2, -, 1.34 ], ) ] ] ], EOF ]" ) self.assertEqual(str(result),"4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ number '4' [0], '+' [1], [ number '5' [2], '*' [3], number '7' [4], '/' [5], number '3.45' [6], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ number '2.56' [16], '+' [20], number '32' [21] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ number '2' [30], '-' [31], number '1.34' [32] ], ')' [36] ] ] ], EOF [37] ]")
...@@ -41,7 +41,8 @@ class TestPythonParser(TestCase): ...@@ -41,7 +41,8 @@ class TestPythonParser(TestCase):
result = parser.parse(input) result = parser.parse(input)
self.assertTrue(isinstance(result, NonTerminal)) self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ [ [ 4 ] ], +, [ [ 5 ], *, [ 7 ], /, [ 3.45 ], *, [ -, 45 ], *, [ (, [ [ [ 2.56 ] ], +, [ [ 32 ] ] ], ) ], /, [ -, 56 ], *, [ (, [ [ [ 2 ] ], -, [ [ 1.34 ] ] ], ) ] ] ], EOF ]") self.assertEqual(str(result), "4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ [ [ number '4' [0] ] ], '+' [1], [ [ number '5' [2] ], '*' [3], [ number '7' [4] ], '/' [5], [ number '3.45' [6] ], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ [ [ number '2.56' [16] ] ], '+' [20], [ [ number '32' [21] ] ] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ [ [ number '2' [30] ] ], '-' [31], [ [ number '1.34' [32] ] ] ], ')' [36] ] ] ], EOF [37] ]")
def test_reduce_tree(self): def test_reduce_tree(self):
...@@ -51,5 +52,6 @@ class TestPythonParser(TestCase): ...@@ -51,5 +52,6 @@ class TestPythonParser(TestCase):
self.assertTrue(isinstance(result, NonTerminal)) self.assertTrue(isinstance(result, NonTerminal))
self.assertEqual(str(result), "[ [ 4, +, [ 5, *, 7, /, 3.45, *, [ -, 45 ], *, [ (, [ 2.56, +, 32 ], ) ], /, [ -, 56 ], *, [ (, [ 2, -, 1.34 ], ) ] ] ], EOF ]" ) self.assertEqual(str(result),"4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)")
self.assertEqual(repr(result), "[ [ number '4' [0], '+' [1], [ number '5' [2], '*' [3], number '7' [4], '/' [5], number '3.45' [6], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ number '2.56' [16], '+' [20], number '32' [21] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ number '2' [30], '-' [31], number '1.34' [32] ], ')' [36] ] ] ], EOF [37] ]")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment