Commit 6c12636c authored by Igor Dejanovic's avatar Igor Dejanovic

bugfix in backtracking

parent 913ce9a6
...@@ -151,38 +151,47 @@ class ParsingExpression(object): ...@@ -151,38 +151,47 @@ class ParsingExpression(object):
def _parse_intro(self, parser): def _parse_intro(self, parser):
if parser.debug: if parser.debug:
print("Parsing {}".format(self.name)) print(">> Entering rule {}".format(self.name))
# Skip whitespaces if we are not in the lexical rule # Skip whitespaces if we are not in the lexical rule
if not parser._in_lex_rule: if not parser._in_lex_rule:
parser._skip_ws() parser._skip_ws()
# Set the begining position in the input stream of # # Set the beginning position in the input stream of
# this parsing expression # # this parsing expression
self.c_pos = parser.position # self.c_pos = parser.position
def parse(self, parser): def parse(self, parser):
self._parse_intro(parser) self._parse_intro(parser)
# Current position could change in recursive calls # Current position could change in recursive calls
# so save it. # so save it.
c_pos = self.c_pos c_pos = parser.position
# Memoization. # Memoization.
# If this position is already parsed by this parser expression use # If this position is already parsed by this parser expression use
# the result # the result
if c_pos in self.result_cache: if c_pos in self.result_cache:
if parser.debug:
print("Result for [{}, {}] founded in result_cache.".format(self, self.c_pos))
result, new_pos = self.result_cache[c_pos] result, new_pos = self.result_cache[c_pos]
parser.position = new_pos parser.position = new_pos
if parser.debug:
print("** Cache hit for [{}, {}] = '{}'".format(self.name, c_pos, unicode(result)))
if parser.debug:
print("<< Leaving rule {}".format(self.name))
return result return result
# We are descending down # We are descending down
if parser.nm: if parser.nm:
parser.nm._up = False parser.nm._up = False
try:
result = self._parse(parser) result = self._parse(parser)
except NoMatch:
parser.position = c_pos # Backtracking
raise
finally:
if parser.debug:
print("<< Leaving rule {}".format(self.name))
# Create terminal or non-terminal if result is not # Create terminal or non-terminal if result is not
# already a Terminal. # already a Terminal.
...@@ -211,7 +220,7 @@ class ParsingExpression(object): ...@@ -211,7 +220,7 @@ class ParsingExpression(object):
Used to report most generic language element expected at the Used to report most generic language element expected at the
place of the NoMatch exception. place of the NoMatch exception.
""" """
if self.root and self.c_pos == nm.position and nm._up: if self.root and parser.position == nm.position and nm._up:
nm.rule = self.rule nm.rule = self.rule
...@@ -227,6 +236,7 @@ class Sequence(ParsingExpression): ...@@ -227,6 +236,7 @@ class Sequence(ParsingExpression):
if result: if result:
results.append(result) results.append(result)
except NoMatch as m: except NoMatch as m:
# parser.position = self.c_pos # Backtracking
self._nm_change_rule(m, parser) self._nm_change_rule(m, parser)
raise raise
...@@ -241,12 +251,13 @@ class OrderedChoice(Sequence): ...@@ -241,12 +251,13 @@ class OrderedChoice(Sequence):
def _parse(self, parser): def _parse(self, parser):
result = None result = None
match = False match = False
c_pos = parser.position
for e in self.nodes: for e in self.nodes:
try: try:
result = e.parse(parser) result = e.parse(parser)
match = True match = True
except NoMatch as m: except NoMatch as m:
parser.position = self.c_pos # Backtracking parser.position = c_pos # Backtracking
self._nm_change_rule(m, parser) self._nm_change_rule(m, parser)
else: else:
break break
...@@ -269,10 +280,11 @@ class Optional(Repetition): ...@@ -269,10 +280,11 @@ class Optional(Repetition):
""" """
def _parse(self, parser): def _parse(self, parser):
result = None result = None
c_pos = parser.position
try: try:
result = self.nodes[0].parse(parser) result = self.nodes[0].parse(parser)
except NoMatch: except NoMatch:
parser.position = self.c_pos # Backtracking parser.position = c_pos # Backtracking
return result return result
...@@ -286,10 +298,10 @@ class ZeroOrMore(Repetition): ...@@ -286,10 +298,10 @@ class ZeroOrMore(Repetition):
results = [] results = []
while True: while True:
try: try:
self.c_pos = parser.position c_pos = parser.position
results.append(self.nodes[0].parse(parser)) results.append(self.nodes[0].parse(parser))
except NoMatch: except NoMatch:
parser.position = self.c_pos # Backtracking parser.position = c_pos # Backtracking
break break
return results return results
...@@ -304,11 +316,11 @@ class OneOrMore(Repetition): ...@@ -304,11 +316,11 @@ class OneOrMore(Repetition):
first = False first = False
while True: while True:
try: try:
self.c_pos = parser.position c_pos = parser.position
results.append(self.nodes[0].parse(parser)) results.append(self.nodes[0].parse(parser))
first = True first = True
except NoMatch: except NoMatch:
parser.position = self.c_pos # Backtracking parser.position = c_pos # Backtracking
if not first: if not first:
raise raise
break break
...@@ -323,19 +335,21 @@ class SyntaxPredicate(ParsingExpression): ...@@ -323,19 +335,21 @@ class SyntaxPredicate(ParsingExpression):
consume any input. consume any input.
""" """
class And(SyntaxPredicate): class And(SyntaxPredicate):
""" """
This predicate will succeed if the specified expression matches current This predicate will succeed if the specified expression matches current
input. input.
""" """
def _parse(self, parser): def _parse(self, parser):
c_pos = parser.position
for e in self.nodes: for e in self.nodes:
try: try:
e.parse(parser) e.parse(parser)
except NoMatch: except NoMatch:
parser.position = self.c_pos parser.position = c_pos
raise raise
parser.position = self.c_pos parser.position = c_pos
class Not(SyntaxPredicate): class Not(SyntaxPredicate):
...@@ -344,14 +358,15 @@ class Not(SyntaxPredicate): ...@@ -344,14 +358,15 @@ class Not(SyntaxPredicate):
current input. current input.
""" """
def _parse(self, parser): def _parse(self, parser):
c_pos = parser.position
for e in self.nodes: for e in self.nodes:
try: try:
e.parse(parser) e.parse(parser)
except NoMatch: except NoMatch:
parser.position = self.c_pos parser.position = c_pos
return return
parser.position = self.c_pos parser.position = c_pos
parser._nm_raise(self.name, self.c_pos, parser) parser._nm_raise(self.name, c_pos, parser)
class Empty(SyntaxPredicate): class Empty(SyntaxPredicate):
...@@ -382,7 +397,7 @@ class Combine(Decorator): ...@@ -382,7 +397,7 @@ class Combine(Decorator):
old_in_lex_rule = parser._in_lex_rule old_in_lex_rule = parser._in_lex_rule
parser._in_lex_rule = True parser._in_lex_rule = True
self.c_pos = parser.position c_pos = parser.position
try: try:
for parser_model_node in self.nodes: for parser_model_node in self.nodes:
results.append(parser_model_node.parse(parser)) results.append(parser_model_node.parse(parser))
...@@ -390,10 +405,10 @@ class Combine(Decorator): ...@@ -390,10 +405,10 @@ class Combine(Decorator):
results = flatten(results) results = flatten(results)
# Create terminal from result # Create terminal from result
return Terminal(self.rule if self.root else '', self.c_pos, \ return Terminal(self.rule if self.root else '', c_pos, \
"".join([str(result) for result in results])) "".join([str(result) for result in results]))
except NoMatch: except NoMatch:
parser.position = self.c_pos # Backtracking parser.position = c_pos # Backtracking
raise raise
finally: finally:
parser._in_lex_rule = old_in_lex_rule parser._in_lex_rule = old_in_lex_rule
...@@ -417,6 +432,8 @@ class Match(ParsingExpression): ...@@ -417,6 +432,8 @@ class Match(ParsingExpression):
if parser._in_parse_comment: if parser._in_parse_comment:
return self._parse(parser) return self._parse(parser)
c_pos = parser.position
comments = [] comments = []
try: try:
match = self._parse(parser) match = self._parse(parser)
...@@ -434,7 +451,7 @@ class Match(ParsingExpression): ...@@ -434,7 +451,7 @@ class Match(ParsingExpression):
# If comment match successfull try terminal match again # If comment match successfull try terminal match again
if comments: if comments:
match = self._parse(parser) match = self._parse(parser)
match.comments = NonTerminal('comment', self.c_pos, match.comments = NonTerminal('comment', c_pos,
comments) comments)
else: else:
parser._nm_raise(nm) parser._nm_raise(nm)
...@@ -464,17 +481,19 @@ class RegExMatch(Match): ...@@ -464,17 +481,19 @@ class RegExMatch(Match):
self.regex = re.compile(to_match) self.regex = re.compile(to_match)
def _parse(self, parser): def _parse(self, parser):
m = self.regex.match(parser.input[parser.position:]) c_pos = parser.position
m = self.regex.match(parser.input[c_pos:])
if m: if m:
parser.position += len(m.group())
if parser.debug: if parser.debug:
print("Match {} at {}".format(m.group(), self.c_pos)) print("++ Match '%s' at %d => '%s'" % (m.group(), \
return Terminal(self.rule if self.root else '', self.c_pos, c_pos, parser.context(len(m.group()))))
parser.position += len(m.group())
return Terminal(self.rule if self.root else '', c_pos,
m.group()) m.group())
else: else:
if parser.debug: if parser.debug:
print("NoMatch at {}".format(self.c_pos)) print("-- NoMatch at {}".format(c_pos))
parser._nm_raise(self.name, self.c_pos, parser) parser._nm_raise(self.name, c_pos, parser)
class StrMatch(Match): class StrMatch(Match):
...@@ -489,16 +508,18 @@ class StrMatch(Match): ...@@ -489,16 +508,18 @@ class StrMatch(Match):
self.to_match = to_match self.to_match = to_match
def _parse(self, parser): def _parse(self, parser):
if parser.input[parser.position:].startswith(self.to_match): c_pos = parser.position
parser.position += len(self.to_match) if parser.input[c_pos:].startswith(self.to_match):
if parser.debug: if parser.debug:
print("Match {} at {}".format(self.to_match, self.c_pos)) print("++ Match '{}' at {} => '{}'".format(self.to_match,\
return Terminal(self.rule if self.root else '', self.c_pos, c_pos, parser.context(len(self.to_match))))
parser.position += len(self.to_match)
return Terminal(self.rule if self.root else '', c_pos,
self.to_match) self.to_match)
else: else:
if parser.debug: if parser.debug:
print("NoMatch at {}".format(self.c_pos)) print("-- NoMatch at {}".format(c_pos))
parser._nm_raise(self.to_match, self.c_pos, parser) parser._nm_raise(self.to_match, c_pos, parser)
def __str__(self): def __str__(self):
return self.to_match return self.to_match
...@@ -535,12 +556,13 @@ class EndOfFile(Match): ...@@ -535,12 +556,13 @@ class EndOfFile(Match):
return "EOF" return "EOF"
def _parse(self, parser): def _parse(self, parser):
if len(parser.input) == parser.position: c_pos = parser.position
return Terminal('EOF', self.c_pos, '') if len(parser.input) == c_pos:
return Terminal('** EOF', c_pos, '')
else: else:
if parser.debug: if parser.debug:
print("EOF not matched.") print("!! EOF not matched.")
parser._nm_raise(self.name, self.c_pos, parser) parser._nm_raise(self.name, c_pos, parser)
def EOF(): return EndOfFile() def EOF(): return EndOfFile()
...@@ -797,6 +819,27 @@ class Parser(object): ...@@ -797,6 +819,27 @@ class Parser(object):
col -= 1 col -= 1
return line + 1, col + 1 return line + 1, col + 1
def context(self, length=None, position=None):
"""
Returns current context substring, i.e. the substring around current
position.
Args:
length(int): If given used to mark with asterisk a length chars from
current position.
position(int): The position in the input stream.
"""
if not position:
position = self.position
if length:
return "{}*{}*{}".format(
str(self.input[max(position - 10, 0):position]),
str(self.input[position:position + length]),
str(self.input[position + length:position+10]))
else:
return "{}*{}".format(
str(self.input[max(position - 10, 0):position]),
str(self.input[position:position + 10]))
def _skip_ws(self): def _skip_ws(self):
""" """
Skiping whitespace characters. Skiping whitespace characters.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment