1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_parsing_expressions
# Purpose: Test for parsing expressions.
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NoMatch, EOF, Optional, And, Not
from arpeggio import RegExMatch as _
def test_sequence():
def grammar(): return ("a", "b", "c")
parser = ParserPython(grammar)
parsed = parser.parse("a b c")
assert str(parsed) == "a | b | c"
assert repr(parsed) == "[ 'a' [0], 'b' [2], 'c' [4] ]"
def test_ordered_choice():
def grammar(): return ["a", "b", "c"], EOF
parser = ParserPython(grammar)
parsed = parser.parse("b")
assert str(parsed) == "b | "
assert repr(parsed) == "[ 'b' [0], EOF [1] ]"
parsed = parser.parse("c")
assert str(parsed) == "c | "
assert repr(parsed) == "[ 'c' [0], EOF [1] ]"
with pytest.raises(NoMatch):
parser.parse("ab")
with pytest.raises(NoMatch):
parser.parse("bb")
def test_zero_or_more():
def grammar(): return ZeroOrMore("a"), EOF
parser = ParserPython(grammar)
parsed = parser.parse("aaaaaaa")
assert str(parsed) == "a | a | a | a | a | a | a | "
assert repr(parsed) == "[ 'a' [0], 'a' [1], 'a' [2], 'a' [3], 'a' [4], 'a' [5], 'a' [6], EOF [7] ]"
parsed = parser.parse("")
assert str(parsed) == ""
assert repr(parsed) == "[ EOF [0] ]"
with pytest.raises(NoMatch):
parser.parse("bbb")
def test_one_or_more():
def grammar(): return OneOrMore("a")
parser = ParserPython(grammar)
parsed = parser.parse("aaaaaaa")
assert str(parsed) == "a | a | a | a | a | a | a"
assert repr(parsed) == "[ 'a' [0], 'a' [1], 'a' [2], 'a' [3], 'a' [4], 'a' [5], 'a' [6] ]"
with pytest.raises(NoMatch):
parser.parse("")
with pytest.raises(NoMatch):
parser.parse("bbb")
def test_optional():
def grammar(): return Optional("a"), "b", EOF
parser = ParserPython(grammar)
parsed = parser.parse("ab")
assert str(parsed) == "a | b | "
assert repr(parsed) == "[ 'a' [0], 'b' [1], EOF [2] ]"
parsed = parser.parse("b")
assert str(parsed) == "b | "
assert repr(parsed) == "[ 'b' [0], EOF [1] ]"
with pytest.raises(NoMatch):
parser.parse("aab")
with pytest.raises(NoMatch):
parser.parse("")
# Syntax predicates
def test_and():
def grammar(): return "a", And("b"), ["c", "b"], EOF
parser = ParserPython(grammar)
parsed = parser.parse("ab")
assert str(parsed) == "a | b | "
assert repr(parsed) == "[ 'a' [0], 'b' [1], EOF [2] ]"
# 'And' will try to match 'b' and fail so 'c' will never get matched
with pytest.raises(NoMatch):
parser.parse("ac")
# 'And' will not consume 'b' from the input so second 'b' will never match
with pytest.raises(NoMatch):
parser.parse("abb")
def test_not():
def grammar(): return "a", Not("b"), ["b", "c"], EOF
parser = ParserPython(grammar)
parsed = parser.parse("ac")
assert str(parsed) == "a | c | "
assert repr(parsed) == "[ 'a' [0], 'c' [1], EOF [2] ]"
# Not will will fail on 'b'
with pytest.raises(NoMatch):
parser.parse("ab")
# And will not consume 'c' from the input so 'b' will never match
with pytest.raises(NoMatch):
parser.parse("acb")