Commit 40c1b0a9 authored by Igor Dejanovic's avatar Igor Dejanovic

Fixing PEP8 violations.

parent 577f2986
......@@ -16,27 +16,32 @@ import logging
logger = logging.getLogger('arpeggio')
DEFAULT_WS='\t\n\r '
DEFAULT_WS = '\t\n\r '
class ArpeggioError(Exception):
'''Base class for arpeggio errors.'''
def __init__(self, message):
self.message = message
def __str__(self):
return repr(self.message)
class GrammarError(ArpeggioError):
'''
Error raised during parser building phase used to indicate error in the grammar
definition.
Error raised during parser building phase used to indicate error in the
grammar definition.
'''
class SemanticError(ArpeggioError):
'''
Error raised during the phase of semantic analisys used to indicate semantic
error.
Error raised during the phase of semantic analysis used to indicate
semantic error.
'''
class NoMatch(Exception):
'''
Exception raised by the Match classes during parsing to indicate that the
......@@ -44,9 +49,14 @@ class NoMatch(Exception):
'''
def __init__(self, value, position, parser):
self.value = value
self.position = position # Position in the input stream where error occured
# Position in the input stream where error occured
self.position = position
self.parser = parser
self._up = True # By default when NoMatch is thrown we will go up the Parse Model Tree.
# By default when NoMatch is thrown we will go up the Parse Model Tree.
self._up = True
def flatten(_iterable):
......@@ -59,24 +69,29 @@ def flatten(_iterable):
result.append(e)
return result
# ---------------------------------------------------------
# Parser Model (PEG Abstract Semantic Graph) elements
class ParsingExpression(object):
"""
Represents node of the Parser Model.
Root parser expression node will create non-terminal parser tree node while non-root
node will create list of terminals and non-terminals.
Root parser expression node will create non-terminal parser tree node while
non-root node will create list of terminals and non-terminals.
"""
def __init__(self, rule=None, root=False, nodes=None):
'''
@param rule - the name of the parser rule if this is the root of the parser rule.
@param root - Does this parser expression represents the root of the parser rule?
The root parser rule will create non-terminal node of the
parse tree during parsing.
@param rule - the name of the parser rule if this is the root of the
parser rule.
@param root - Does this parser expression represents the
root of the parser rule?
The root parser rule will create non-terminal node of
the parse tree during parsing.
@param nodes - list of child parser expression nodes.
'''
# Memoization. Every node cache the parsing results for the given input positions.
# Memoization. Every node cache the parsing results for the given input
# positions.
self.result_cache = {} # position -> parse tree
self.nodes = nodes
if nodes is None:
......@@ -121,7 +136,6 @@ class ParsingExpression(object):
def _parse_intro(self, parser):
logger.debug("Parsing %s" % self.name)
results = []
parser._skip_ws()
self.c_pos = parser.position
......@@ -131,8 +145,9 @@ class ParsingExpression(object):
#Memoization.
#If this position is already parsed by this parser expression than use
#the result
if self.result_cache.has_key(self.c_pos):
logger.debug("Result for [%s, %s] founded in result_cache." % (self, self.c_pos))
if self.c_pos in self.result_cache:
logger.debug("Result for [%s, %s] founded in result_cache." %
(self, self.c_pos))
result, new_pos = self.result_cache[self.c_pos]
parser.position = new_pos
return result
......@@ -145,10 +160,10 @@ class ParsingExpression(object):
if result:
if parser.reduce_tree:
if isinstance(result,list):
if isinstance(result, list):
if self.root:
result = flatten(result)
if len(result)>1:
if len(result) > 1:
result = NonTerminal(self.rule, self.c_pos, result)
else:
result = result[0]
......@@ -166,21 +181,23 @@ class ParsingExpression(object):
def _nm_change_rule(self, nm, parser):
'''
Change rule for the given NoMatch object to a more generic if
we did not consume any input and we are moving up the parser model tree.
Used to report most generic language element expected at the place of
the NoMatch exception.
we did not consume any input and we are moving up the parser model
tree. Used to report most generic language element expected at the
place of the NoMatch exception.
'''
if self.root and self.c_pos == nm.position and nm._up:
nm.value = self.rule
class Sequence(ParsingExpression):
'''
Will match sequence of parser expressions in exact order they are defined.
'''
def __init__(self, elements=None, rule=None, root=False, nodes=None):
'''
@param elements - list used as a stageing structure for python based grammar definition.
Used in _from_python for building nodes list of child parser expressions.
@param elements - list used as a staging structure for python based
grammar definition. Used in _from_python for building nodes list of
child parser expressions.
'''
super(Sequence, self).__init__(rule, root, nodes)
self.elements = elements
......@@ -230,7 +247,7 @@ class Repetition(ParsingExpression):
'''
def __init__(self, *elements, **kwargs):
super(Repetition, self).__init__(None)
if len(elements)==1:
if len(elements) == 1:
elements = elements[0]
self.elements = elements
......@@ -255,10 +272,11 @@ class Optional(Repetition):
return result
class ZeroOrMore(Repetition):
'''
ZeroOrMore will try to match parser expression specified zero or more times.
It will never fail.
ZeroOrMore will try to match parser expression specified zero or more
times. It will never fail.
'''
def _parse(self, parser):
results = []
......@@ -272,6 +290,7 @@ class ZeroOrMore(Repetition):
return results
class OneOrMore(Repetition):
'''
OneOrMore will try to match parser expression specified one or more times.
......@@ -292,14 +311,15 @@ class OneOrMore(Repetition):
return results
class SyntaxPredicate(ParsingExpression):
'''
Base class for all syntax predicates (and, not).
Predicates are parser expressions that will do the match but will not consume
any input.
Predicates are parser expressions that will do the match but will not
consume any input.
'''
def __init__(self, *elements, **kwargs):
if len(elements)==1:
if len(elements) == 1:
elements = elements[0]
self.elements = elements
......@@ -310,9 +330,11 @@ class SyntaxPredicate(ParsingExpression):
super(SyntaxPredicate, self).__init__(None)
class And(SyntaxPredicate):
'''
This predicate will succeed if the specified expression matches current input.
This predicate will succeed if the specified expression matches current
input.
'''
def _parse(self, parser):
for e in self.nodes:
......@@ -326,7 +348,8 @@ class And(SyntaxPredicate):
class Not(SyntaxPredicate):
'''
This predicate will succeed if the specified expression doesn't match current input.
This predicate will succeed if the specified expression doesn't match
current input.
'''
def _parse(self, parser):
for e in self.nodes:
......@@ -338,12 +361,13 @@ class Not(SyntaxPredicate):
parser.position = self.c_pos
parser._nm_raise(self.name, self.c_pos, parser)
class Match(ParsingExpression):
'''
Base class for all classes that will try to match something from the input.
'''
def __init__(self, rule, root=False):
super(Match,self).__init__(rule, root)
super(Match, self).__init__(rule, root)
@property
def name(self):
......@@ -370,7 +394,8 @@ class Match(ParsingExpression):
# If comment match successfull try terminal match again
if comments:
match = self._parse(parser)
match.comments = NonTerminal('comment', self.c_pos, comments)
match.comments = NonTerminal('comment', self.c_pos,
comments)
else:
parser._nm_raise(nm)
finally:
......@@ -402,10 +427,13 @@ class RegExMatch(Match):
if m:
parser.position += len(m.group())
logger.debug("Match %s at %d" % (m.group(), self.c_pos))
return Terminal(self.rule if self.root else '', self.c_pos, m.group())
return Terminal(self.rule if self.root else '', self.c_pos,
m.group())
else:
logger.debug("NoMatch at %d" % self.c_pos)
parser._nm_raise(self.root if self.root else self.name, self.c_pos, parser)
parser._nm_raise(self.root if self.root else self.name, self.c_pos,
parser)
class StrMatch(Match):
'''
......@@ -422,7 +450,8 @@ class StrMatch(Match):
if parser.input[parser.position:].startswith(self.to_match):
parser.position += len(self.to_match)
logger.debug("Match %s at %d" % (self.to_match, self.c_pos))
return Terminal(self.rule if self.root else '', self.c_pos, self.to_match)
return Terminal(self.rule if self.root else '', self.c_pos,
self.to_match)
else:
logger.debug("NoMatch at %d" % self.c_pos)
parser._nm_raise(self.to_match, self.c_pos, parser)
......@@ -446,6 +475,7 @@ class Kwd(StrMatch):
self.root = True
self.rule = 'keyword'
class EndOfFile(Match):
'''
Match class that will succeed in case end of input is reached.
......@@ -480,10 +510,12 @@ class ParseTreeNode(object):
'''
def __init__(self, type, position, error):
'''
@param type - the name of the rule that created this node or empty string in case
this node is created by a non-root parser model node.
@param position - position in the input stream where match occured.
@param error - is this a false parse tree node created during error recovery?
@param type - the name of the rule that created this node or empty
string in case this node is created by a non-root
parser model node.
@param position - position in the input stream where match occurred.
@param error - is this a false parse tree node created during error
recovery?
'''
self.type = type
self.position = position
......@@ -494,13 +526,15 @@ class ParseTreeNode(object):
def name(self):
return "%s [%s]" % (self.type, self.position)
class Terminal(ParseTreeNode):
'''
Leaf node of the Parse Tree. Represents matched string.
'''
def __init__(self, type, position, value, error=False):
'''
@param value - matched string or missing token name in case of an error node.
@param value - matched string or missing token name in case of an error
node.
'''
super(Terminal, self).__init__(type, position, error)
self.value = value
......@@ -513,7 +547,7 @@ class Terminal(ParseTreeNode):
return self.value
def __eq__(self, other):
return str(self)==str(other)
return str(self) == str(other)
class NonTerminal(ParseTreeNode):
......@@ -540,12 +574,13 @@ class SemanticAction(object):
'''
Semantic actions are executed during semantic analysis. They are in charge
of producing Abstract Semantic Graph (ASG) out of the parse tree.
Every non-terminal and terminal can have semantic action defined which will be
triggered during semantic analisys.
Semantic action triggering is separated in two passes. first_pass method is required
and the method called second_pass is optional and will be called if exists after
the first pass. Second pass can be used for forward referencing,
e.g. linking to the declaration registered in the first pass stage.
Every non-terminal and terminal can have semantic action defined which will
be triggered during semantic analysis.
Semantic action triggering is separated in two passes. first_pass method is
required and the method called second_pass is optional and will be called
if exists after the first pass. Second pass can be used for forward
referencing, e.g. linking to the declaration registered in the first pass
stage.
'''
def first_pass(self, parser, node, nodes):
'''
......@@ -553,15 +588,18 @@ class SemanticAction(object):
'''
raise NotImplementedError()
# ----------------------------------------------------
# Parsers
class Parser(object):
def __init__(self, skipws=True, ws=DEFAULT_WS, reduce_tree=False):
'''
@skipws - if True whitespaces will not be part of parse tree.
@ws - rule for matching ws
@reduce_tree - if true nonterminals with single child will be eliminated.
@reduce_tree - if true nonterminals with single child will be
eliminated.
'''
self.skipws = skipws
self.ws = ws
......@@ -585,8 +623,9 @@ class Parser(object):
def getASG(self, sem_actions=None):
'''
Creates Abstract Semantic Graph (ASG) from the parse tree.
@param sem_actions - semantic actions dictionary to use for semantic analysis.
Rule names are the keys and semantic action objects are values.
@param sem_actions - semantic actions dictionary to use for semantic
analysis. Rule names are the keys and semantic
action objects are values.
'''
if not self.parse_tree:
raise Exception("Parse tree is empty. You did call parse(), didn't you?")
......@@ -604,18 +643,19 @@ class Parser(object):
def tree_walk(node):
'''
Walking the parse tree and calling first_pass for every registered semantic
actions and creating list of object that needs to be called in the second pass.
Walking the parse tree and calling first_pass for every registered
semantic actions and creating list of object that needs to be
called in the second pass.
'''
nodes = []
if isinstance(node, NonTerminal):
for n in node.nodes:
nodes.append(tree_walk(n))
if sem_actions.has_key(node.type):
if node.type in sem_actions:
retval = sem_actions[node.type].first_pass(self, node, nodes)
if hasattr(sem_actions[node.type], "second_pass"):
for_second_pass.append((node.type,retval))
for_second_pass.append((node.type, retval))
else:
if isinstance(node, NonTerminal):
retval = NonTerminal(node.type, node.position, nodes)
......@@ -624,7 +664,6 @@ class Parser(object):
return retval
logger.debug("ASG: First pass")
asg = tree_walk(self.parse_tree)
......@@ -635,7 +674,6 @@ class Parser(object):
return asg
def pos_to_linecol(self, pos):
'''
Calculate (line, column) tuple for the given position in the stream.
......@@ -646,7 +684,8 @@ class Parser(object):
self.line_ends.append(self.input.index("\n"))
while True:
try:
self.line_ends.append(self.input.index("\n", self.line_ends[-1]+1))
self.line_ends.append(
self.input.index("\n", self.line_ends[-1] + 1))
except ValueError:
break
except ValueError:
......@@ -655,17 +694,18 @@ class Parser(object):
line = bisect.bisect_left(self.line_ends, pos)
col = pos
if line > 0:
col -= self.line_ends[line-1]
if self.input[self.line_ends[line-1]] in '\n\r':
col -= self.line_ends[line - 1]
if self.input[self.line_ends[line - 1]] in '\n\r':
col -= 1
return line+1, col+1
return line + 1, col + 1
def _skip_ws(self):
'''
Skiping whitespace characters.
'''
if self.skipws:
while self.position<len(self.input) and self.input[self.position] in self.ws:
while self.position < len(self.input) and \
self.input[self.position] in self.ws:
self.position += 1
def _skip_comments(self):
......@@ -683,7 +723,7 @@ class Parser(object):
@param args - NoMatch instance or value, position, parser
'''
if not self._in_parse_comment:
if len(args)==1 and isinstance(args[0], NoMatch):
if len(args) == 1 and isinstance(args[0], NoMatch):
if self.nm is None or args[0].position > self.nm.position:
self.nm = args[0]
else:
......@@ -694,13 +734,14 @@ class Parser(object):
class ParserPython(Parser):
def __init__(self, language_def, comment_def=None, skipws=True, ws=DEFAULT_WS, \
reduce_tree=False):
def __init__(self, language_def, comment_def=None, skipws=True,
ws=DEFAULT_WS, reduce_tree=False):
super(ParserPython, self).__init__(skipws, ws, reduce_tree)
# PEG Abstract Syntax Graph
self.parser_model = self._from_python(language_def)
self.comments_model = self._from_python(comment_def) if comment_def else None
self.comments_model = self._from_python(comment_def) \
if comment_def else None
# Comments should be optional and there can be more of them
if self.comments_model: # and not isinstance(self.comments_model, ZeroOrMore):
......@@ -712,8 +753,9 @@ class ParserPython(Parser):
def _from_python(self, expression):
"""
Create parser model from the definition given in the form of python functions returning
lists, tuples, callables, strings and ParsingExpression objects.
Create parser model from the definition given in the form of python
functions returning lists, tuples, callables, strings and
ParsingExpression objects.
@returns - Parser Model (PEG Abstract Semantic Graph)
"""
__rule_cache = {"EndOfFile": EndOfFile()}
......@@ -728,17 +770,19 @@ class ParserPython(Parser):
retval = None
if callable(expression): # Is this expression a parser rule?
rule = expression.__name__
if __rule_cache.has_key(rule):
if rule in __rule_cache:
logger.debug("Rule %s founded in cache." % rule)
if isinstance(__rule_cache.get(rule), CrossRef):
self.__cross_refs += 1
logger.debug("CrossRef usage: %s" % __rule_cache.get(rule).rule_name)
logger.debug("CrossRef usage: %s" %
__rule_cache.get(rule).rule_name)
return __rule_cache.get(rule)
expression_expression = expression()
if callable(expression_expression):
raise GrammarError(
"Rule element can't be just another rule in '%s'." % rule)
"Rule element can't be just another rule in '%s'." %
rule)
# Semantic action for the rule
if hasattr(expression, "sem"):
......@@ -753,12 +797,14 @@ class ParserPython(Parser):
# Update cache
__rule_cache[rule] = retval
logger.debug("New rule: %s -> %s" % (rule, retval.__class__.__name__))
logger.debug("New rule: %s -> %s" %
(rule, retval.__class__.__name__))
elif isinstance(expression, Match):
retval = expression
elif isinstance(expression, Repetition) or isinstance(expression, SyntaxPredicate):
elif isinstance(expression, Repetition) or \
isinstance(expression, SyntaxPredicate):
retval = expression
retval.nodes.append(inner_from_python(retval.elements))
if any((isinstance(x, CrossRef) for x in retval.nodes)):
......@@ -778,7 +824,8 @@ class ParserPython(Parser):
retval = StrMatch(expression)
else:
raise GrammarError("Unrecognized grammar element '%s' in rule %s." % (str(expression), rule))
raise GrammarError("Unrecognized grammar element '%s'." %
str(expression))
return retval
......@@ -797,4 +844,3 @@ class ParserPython(Parser):
def errors(self):
pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment