Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
A
arpeggio-gm
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
backend
arpeggio-gm
Commits
6c12636c
Commit
6c12636c
authored
Jun 11, 2014
by
Igor Dejanovic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bugfix in backtracking
parent
913ce9a6
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
82 additions
and
39 deletions
+82
-39
__init__.py
arpeggio/__init__.py
+82
-39
No files found.
arpeggio/__init__.py
View file @
6c12636c
...
@@ -151,38 +151,47 @@ class ParsingExpression(object):
...
@@ -151,38 +151,47 @@ class ParsingExpression(object):
def
_parse_intro
(
self
,
parser
):
def
_parse_intro
(
self
,
parser
):
if
parser
.
debug
:
if
parser
.
debug
:
print
(
"
Parsing
{}"
.
format
(
self
.
name
))
print
(
"
>> Entering rule
{}"
.
format
(
self
.
name
))
# Skip whitespaces if we are not in the lexical rule
# Skip whitespaces if we are not in the lexical rule
if
not
parser
.
_in_lex_rule
:
if
not
parser
.
_in_lex_rule
:
parser
.
_skip_ws
()
parser
.
_skip_ws
()
# Set the begi
ning position in the input stream of
# # Set the begin
ning position in the input stream of
# this parsing expression
#
# this parsing expression
self
.
c_pos
=
parser
.
position
#
self.c_pos = parser.position
def
parse
(
self
,
parser
):
def
parse
(
self
,
parser
):
self
.
_parse_intro
(
parser
)
self
.
_parse_intro
(
parser
)
# Current position could change in recursive calls
# Current position could change in recursive calls
# so save it.
# so save it.
c_pos
=
self
.
c_pos
c_pos
=
parser
.
position
# Memoization.
# Memoization.
# If this position is already parsed by this parser expression use
# If this position is already parsed by this parser expression use
# the result
# the result
if
c_pos
in
self
.
result_cache
:
if
c_pos
in
self
.
result_cache
:
if
parser
.
debug
:
print
(
"Result for [{}, {}] founded in result_cache."
.
format
(
self
,
self
.
c_pos
))
result
,
new_pos
=
self
.
result_cache
[
c_pos
]
result
,
new_pos
=
self
.
result_cache
[
c_pos
]
parser
.
position
=
new_pos
parser
.
position
=
new_pos
if
parser
.
debug
:
print
(
"** Cache hit for [{}, {}] = '{}'"
.
format
(
self
.
name
,
c_pos
,
unicode
(
result
)))
if
parser
.
debug
:
print
(
"<< Leaving rule {}"
.
format
(
self
.
name
))
return
result
return
result
# We are descending down
# We are descending down
if
parser
.
nm
:
if
parser
.
nm
:
parser
.
nm
.
_up
=
False
parser
.
nm
.
_up
=
False
try
:
result
=
self
.
_parse
(
parser
)
result
=
self
.
_parse
(
parser
)
except
NoMatch
:
parser
.
position
=
c_pos
# Backtracking
raise
finally
:
if
parser
.
debug
:
print
(
"<< Leaving rule {}"
.
format
(
self
.
name
))
# Create terminal or non-terminal if result is not
# Create terminal or non-terminal if result is not
# already a Terminal.
# already a Terminal.
...
@@ -211,7 +220,7 @@ class ParsingExpression(object):
...
@@ -211,7 +220,7 @@ class ParsingExpression(object):
Used to report most generic language element expected at the
Used to report most generic language element expected at the
place of the NoMatch exception.
place of the NoMatch exception.
"""
"""
if
self
.
root
and
self
.
c_pos
==
nm
.
position
and
nm
.
_up
:
if
self
.
root
and
parser
.
position
==
nm
.
position
and
nm
.
_up
:
nm
.
rule
=
self
.
rule
nm
.
rule
=
self
.
rule
...
@@ -227,6 +236,7 @@ class Sequence(ParsingExpression):
...
@@ -227,6 +236,7 @@ class Sequence(ParsingExpression):
if
result
:
if
result
:
results
.
append
(
result
)
results
.
append
(
result
)
except
NoMatch
as
m
:
except
NoMatch
as
m
:
# parser.position = self.c_pos # Backtracking
self
.
_nm_change_rule
(
m
,
parser
)
self
.
_nm_change_rule
(
m
,
parser
)
raise
raise
...
@@ -241,12 +251,13 @@ class OrderedChoice(Sequence):
...
@@ -241,12 +251,13 @@ class OrderedChoice(Sequence):
def
_parse
(
self
,
parser
):
def
_parse
(
self
,
parser
):
result
=
None
result
=
None
match
=
False
match
=
False
c_pos
=
parser
.
position
for
e
in
self
.
nodes
:
for
e
in
self
.
nodes
:
try
:
try
:
result
=
e
.
parse
(
parser
)
result
=
e
.
parse
(
parser
)
match
=
True
match
=
True
except
NoMatch
as
m
:
except
NoMatch
as
m
:
parser
.
position
=
self
.
c_pos
# Backtracking
parser
.
position
=
c_pos
# Backtracking
self
.
_nm_change_rule
(
m
,
parser
)
self
.
_nm_change_rule
(
m
,
parser
)
else
:
else
:
break
break
...
@@ -269,10 +280,11 @@ class Optional(Repetition):
...
@@ -269,10 +280,11 @@ class Optional(Repetition):
"""
"""
def
_parse
(
self
,
parser
):
def
_parse
(
self
,
parser
):
result
=
None
result
=
None
c_pos
=
parser
.
position
try
:
try
:
result
=
self
.
nodes
[
0
]
.
parse
(
parser
)
result
=
self
.
nodes
[
0
]
.
parse
(
parser
)
except
NoMatch
:
except
NoMatch
:
parser
.
position
=
self
.
c_pos
# Backtracking
parser
.
position
=
c_pos
# Backtracking
return
result
return
result
...
@@ -286,10 +298,10 @@ class ZeroOrMore(Repetition):
...
@@ -286,10 +298,10 @@ class ZeroOrMore(Repetition):
results
=
[]
results
=
[]
while
True
:
while
True
:
try
:
try
:
self
.
c_pos
=
parser
.
position
c_pos
=
parser
.
position
results
.
append
(
self
.
nodes
[
0
]
.
parse
(
parser
))
results
.
append
(
self
.
nodes
[
0
]
.
parse
(
parser
))
except
NoMatch
:
except
NoMatch
:
parser
.
position
=
self
.
c_pos
# Backtracking
parser
.
position
=
c_pos
# Backtracking
break
break
return
results
return
results
...
@@ -304,11 +316,11 @@ class OneOrMore(Repetition):
...
@@ -304,11 +316,11 @@ class OneOrMore(Repetition):
first
=
False
first
=
False
while
True
:
while
True
:
try
:
try
:
self
.
c_pos
=
parser
.
position
c_pos
=
parser
.
position
results
.
append
(
self
.
nodes
[
0
]
.
parse
(
parser
))
results
.
append
(
self
.
nodes
[
0
]
.
parse
(
parser
))
first
=
True
first
=
True
except
NoMatch
:
except
NoMatch
:
parser
.
position
=
self
.
c_pos
# Backtracking
parser
.
position
=
c_pos
# Backtracking
if
not
first
:
if
not
first
:
raise
raise
break
break
...
@@ -323,19 +335,21 @@ class SyntaxPredicate(ParsingExpression):
...
@@ -323,19 +335,21 @@ class SyntaxPredicate(ParsingExpression):
consume any input.
consume any input.
"""
"""
class
And
(
SyntaxPredicate
):
class
And
(
SyntaxPredicate
):
"""
"""
This predicate will succeed if the specified expression matches current
This predicate will succeed if the specified expression matches current
input.
input.
"""
"""
def
_parse
(
self
,
parser
):
def
_parse
(
self
,
parser
):
c_pos
=
parser
.
position
for
e
in
self
.
nodes
:
for
e
in
self
.
nodes
:
try
:
try
:
e
.
parse
(
parser
)
e
.
parse
(
parser
)
except
NoMatch
:
except
NoMatch
:
parser
.
position
=
self
.
c_pos
parser
.
position
=
c_pos
raise
raise
parser
.
position
=
self
.
c_pos
parser
.
position
=
c_pos
class
Not
(
SyntaxPredicate
):
class
Not
(
SyntaxPredicate
):
...
@@ -344,14 +358,15 @@ class Not(SyntaxPredicate):
...
@@ -344,14 +358,15 @@ class Not(SyntaxPredicate):
current input.
current input.
"""
"""
def
_parse
(
self
,
parser
):
def
_parse
(
self
,
parser
):
c_pos
=
parser
.
position
for
e
in
self
.
nodes
:
for
e
in
self
.
nodes
:
try
:
try
:
e
.
parse
(
parser
)
e
.
parse
(
parser
)
except
NoMatch
:
except
NoMatch
:
parser
.
position
=
self
.
c_pos
parser
.
position
=
c_pos
return
return
parser
.
position
=
self
.
c_pos
parser
.
position
=
c_pos
parser
.
_nm_raise
(
self
.
name
,
self
.
c_pos
,
parser
)
parser
.
_nm_raise
(
self
.
name
,
c_pos
,
parser
)
class
Empty
(
SyntaxPredicate
):
class
Empty
(
SyntaxPredicate
):
...
@@ -382,7 +397,7 @@ class Combine(Decorator):
...
@@ -382,7 +397,7 @@ class Combine(Decorator):
old_in_lex_rule
=
parser
.
_in_lex_rule
old_in_lex_rule
=
parser
.
_in_lex_rule
parser
.
_in_lex_rule
=
True
parser
.
_in_lex_rule
=
True
self
.
c_pos
=
parser
.
position
c_pos
=
parser
.
position
try
:
try
:
for
parser_model_node
in
self
.
nodes
:
for
parser_model_node
in
self
.
nodes
:
results
.
append
(
parser_model_node
.
parse
(
parser
))
results
.
append
(
parser_model_node
.
parse
(
parser
))
...
@@ -390,10 +405,10 @@ class Combine(Decorator):
...
@@ -390,10 +405,10 @@ class Combine(Decorator):
results
=
flatten
(
results
)
results
=
flatten
(
results
)
# Create terminal from result
# Create terminal from result
return
Terminal
(
self
.
rule
if
self
.
root
else
''
,
self
.
c_pos
,
\
return
Terminal
(
self
.
rule
if
self
.
root
else
''
,
c_pos
,
\
""
.
join
([
str
(
result
)
for
result
in
results
]))
""
.
join
([
str
(
result
)
for
result
in
results
]))
except
NoMatch
:
except
NoMatch
:
parser
.
position
=
self
.
c_pos
# Backtracking
parser
.
position
=
c_pos
# Backtracking
raise
raise
finally
:
finally
:
parser
.
_in_lex_rule
=
old_in_lex_rule
parser
.
_in_lex_rule
=
old_in_lex_rule
...
@@ -417,6 +432,8 @@ class Match(ParsingExpression):
...
@@ -417,6 +432,8 @@ class Match(ParsingExpression):
if
parser
.
_in_parse_comment
:
if
parser
.
_in_parse_comment
:
return
self
.
_parse
(
parser
)
return
self
.
_parse
(
parser
)
c_pos
=
parser
.
position
comments
=
[]
comments
=
[]
try
:
try
:
match
=
self
.
_parse
(
parser
)
match
=
self
.
_parse
(
parser
)
...
@@ -434,7 +451,7 @@ class Match(ParsingExpression):
...
@@ -434,7 +451,7 @@ class Match(ParsingExpression):
# If comment match successfull try terminal match again
# If comment match successfull try terminal match again
if
comments
:
if
comments
:
match
=
self
.
_parse
(
parser
)
match
=
self
.
_parse
(
parser
)
match
.
comments
=
NonTerminal
(
'comment'
,
self
.
c_pos
,
match
.
comments
=
NonTerminal
(
'comment'
,
c_pos
,
comments
)
comments
)
else
:
else
:
parser
.
_nm_raise
(
nm
)
parser
.
_nm_raise
(
nm
)
...
@@ -464,17 +481,19 @@ class RegExMatch(Match):
...
@@ -464,17 +481,19 @@ class RegExMatch(Match):
self
.
regex
=
re
.
compile
(
to_match
)
self
.
regex
=
re
.
compile
(
to_match
)
def
_parse
(
self
,
parser
):
def
_parse
(
self
,
parser
):
m
=
self
.
regex
.
match
(
parser
.
input
[
parser
.
position
:])
c_pos
=
parser
.
position
m
=
self
.
regex
.
match
(
parser
.
input
[
c_pos
:])
if
m
:
if
m
:
parser
.
position
+=
len
(
m
.
group
())
if
parser
.
debug
:
if
parser
.
debug
:
print
(
"Match {} at {}"
.
format
(
m
.
group
(),
self
.
c_pos
))
print
(
"++ Match '
%
s' at
%
d => '
%
s'"
%
(
m
.
group
(),
\
return
Terminal
(
self
.
rule
if
self
.
root
else
''
,
self
.
c_pos
,
c_pos
,
parser
.
context
(
len
(
m
.
group
()))))
parser
.
position
+=
len
(
m
.
group
())
return
Terminal
(
self
.
rule
if
self
.
root
else
''
,
c_pos
,
m
.
group
())
m
.
group
())
else
:
else
:
if
parser
.
debug
:
if
parser
.
debug
:
print
(
"
NoMatch at {}"
.
format
(
self
.
c_pos
))
print
(
"
-- NoMatch at {}"
.
format
(
c_pos
))
parser
.
_nm_raise
(
self
.
name
,
self
.
c_pos
,
parser
)
parser
.
_nm_raise
(
self
.
name
,
c_pos
,
parser
)
class
StrMatch
(
Match
):
class
StrMatch
(
Match
):
...
@@ -489,16 +508,18 @@ class StrMatch(Match):
...
@@ -489,16 +508,18 @@ class StrMatch(Match):
self
.
to_match
=
to_match
self
.
to_match
=
to_match
def
_parse
(
self
,
parser
):
def
_parse
(
self
,
parser
):
if
parser
.
input
[
parser
.
position
:]
.
startswith
(
self
.
to_match
):
c_pos
=
parser
.
position
parser
.
position
+=
len
(
self
.
to_match
)
if
parser
.
input
[
c_pos
:]
.
startswith
(
self
.
to_match
):
if
parser
.
debug
:
if
parser
.
debug
:
print
(
"Match {} at {}"
.
format
(
self
.
to_match
,
self
.
c_pos
))
print
(
"++ Match '{}' at {} => '{}'"
.
format
(
self
.
to_match
,
\
return
Terminal
(
self
.
rule
if
self
.
root
else
''
,
self
.
c_pos
,
c_pos
,
parser
.
context
(
len
(
self
.
to_match
))))
parser
.
position
+=
len
(
self
.
to_match
)
return
Terminal
(
self
.
rule
if
self
.
root
else
''
,
c_pos
,
self
.
to_match
)
self
.
to_match
)
else
:
else
:
if
parser
.
debug
:
if
parser
.
debug
:
print
(
"
NoMatch at {}"
.
format
(
self
.
c_pos
))
print
(
"
-- NoMatch at {}"
.
format
(
c_pos
))
parser
.
_nm_raise
(
self
.
to_match
,
self
.
c_pos
,
parser
)
parser
.
_nm_raise
(
self
.
to_match
,
c_pos
,
parser
)
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
to_match
return
self
.
to_match
...
@@ -535,12 +556,13 @@ class EndOfFile(Match):
...
@@ -535,12 +556,13 @@ class EndOfFile(Match):
return
"EOF"
return
"EOF"
def
_parse
(
self
,
parser
):
def
_parse
(
self
,
parser
):
if
len
(
parser
.
input
)
==
parser
.
position
:
c_pos
=
parser
.
position
return
Terminal
(
'EOF'
,
self
.
c_pos
,
''
)
if
len
(
parser
.
input
)
==
c_pos
:
return
Terminal
(
'** EOF'
,
c_pos
,
''
)
else
:
else
:
if
parser
.
debug
:
if
parser
.
debug
:
print
(
"EOF not matched."
)
print
(
"
!!
EOF not matched."
)
parser
.
_nm_raise
(
self
.
name
,
self
.
c_pos
,
parser
)
parser
.
_nm_raise
(
self
.
name
,
c_pos
,
parser
)
def
EOF
():
return
EndOfFile
()
def
EOF
():
return
EndOfFile
()
...
@@ -797,6 +819,27 @@ class Parser(object):
...
@@ -797,6 +819,27 @@ class Parser(object):
col
-=
1
col
-=
1
return
line
+
1
,
col
+
1
return
line
+
1
,
col
+
1
def
context
(
self
,
length
=
None
,
position
=
None
):
"""
Returns current context substring, i.e. the substring around current
position.
Args:
length(int): If given used to mark with asterisk a length chars from
current position.
position(int): The position in the input stream.
"""
if
not
position
:
position
=
self
.
position
if
length
:
return
"{}*{}*{}"
.
format
(
str
(
self
.
input
[
max
(
position
-
10
,
0
):
position
]),
str
(
self
.
input
[
position
:
position
+
length
]),
str
(
self
.
input
[
position
+
length
:
position
+
10
]))
else
:
return
"{}*{}"
.
format
(
str
(
self
.
input
[
max
(
position
-
10
,
0
):
position
]),
str
(
self
.
input
[
position
:
position
+
10
]))
def
_skip_ws
(
self
):
def
_skip_ws
(
self
):
"""
"""
Skiping whitespace characters.
Skiping whitespace characters.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment