Commit 5df7664c authored by Igor Dejanovic's avatar Igor Dejanovic

Support for unicode and python 2/3 compatibility

parent c19a0e25
...@@ -12,6 +12,12 @@ ...@@ -12,6 +12,12 @@
############################################################################### ###############################################################################
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import sys
if sys.version < '3':
text = unicode
else:
text = str
import codecs import codecs
import re import re
import bisect import bisect
...@@ -85,15 +91,18 @@ class NoMatch(Exception): ...@@ -85,15 +91,18 @@ class NoMatch(Exception):
def __str__(self): def __str__(self):
return "Expected '{}' at position {} => '{}'."\ return "Expected '{}' at position {} => '{}'."\
.format(self.exp_str, .format(self.exp_str,
str(self.parser.pos_to_linecol(self.position)), text(self.parser.pos_to_linecol(self.position)),
self.parser.context(position=self.position)) self.parser.context(position=self.position))
def __unicode__(self):
return self.__str__()
def flatten(_iterable): def flatten(_iterable):
'''Flattening of python iterables.''' '''Flattening of python iterables.'''
result = [] result = []
for e in _iterable: for e in _iterable:
if hasattr(e, "__iter__") and not type(e) in [str, NonTerminal]: if hasattr(e, "__iter__") and not type(e) in [text, NonTerminal]:
result.extend(flatten(e)) result.extend(flatten(e))
else: else:
result.append(e) result.append(e)
...@@ -217,7 +226,7 @@ class ParsingExpression(object): ...@@ -217,7 +226,7 @@ class ParsingExpression(object):
parser.position = new_pos parser.position = new_pos
if parser.debug: if parser.debug:
print("** Cache hit for [{}, {}] = '{}' : new_pos={}" print("** Cache hit for [{}, {}] = '{}' : new_pos={}"
.format(self.name, c_pos, str(result), str(new_pos))) .format(self.name, c_pos, text(result), text(new_pos)))
# print("<< Leaving rule {}".format(self.name)) # print("<< Leaving rule {}".format(self.name))
# If NoMatch is recorded at this position raise. # If NoMatch is recorded at this position raise.
...@@ -534,7 +543,7 @@ class Combine(Decorator): ...@@ -534,7 +543,7 @@ class Combine(Decorator):
# Create terminal from result # Create terminal from result
return Terminal(self, c_pos, return Terminal(self, c_pos,
"".join([str(result) for result in results])) "".join([text(result) for result in results]))
except NoMatch: except NoMatch:
parser.position = c_pos # Backtracking parser.position = c_pos # Backtracking
raise raise
...@@ -597,6 +606,9 @@ class RegExMatch(Match): ...@@ -597,6 +606,9 @@ class RegExMatch(Match):
def __str__(self): def __str__(self):
return self.to_match return self.to_match
def __unicode__(self):
return self.__str__()
def _parse(self, parser): def _parse(self, parser):
c_pos = parser.position c_pos = parser.position
m = self.regex.match(parser.input[c_pos:]) m = self.regex.match(parser.input[c_pos:])
...@@ -653,8 +665,11 @@ class StrMatch(Match): ...@@ -653,8 +665,11 @@ class StrMatch(Match):
def __str__(self): def __str__(self):
return self.to_match return self.to_match
def __unicode__(self):
return self.__str__()
def __eq__(self, other): def __eq__(self, other):
return self.to_match == str(other) return self.to_match == text(other)
def __hash__(self): def __hash__(self):
return hash(self.to_match) return hash(self.to_match)
...@@ -759,11 +774,14 @@ class Terminal(ParseTreeNode): ...@@ -759,11 +774,14 @@ class Terminal(ParseTreeNode):
def __str__(self): def __str__(self):
return self.value return self.value
def __unicode__(self):
return self.__str__()
def __repr__(self): def __repr__(self):
return self.desc return self.desc
def __eq__(self, other): def __eq__(self, other):
return str(self) == str(other) return text(self) == text(other)
class NonTerminal(ParseTreeNode, list): class NonTerminal(ParseTreeNode, list):
...@@ -790,7 +808,7 @@ class NonTerminal(ParseTreeNode, list): ...@@ -790,7 +808,7 @@ class NonTerminal(ParseTreeNode, list):
@property @property
def value(self): def value(self):
"""Terminal protocol.""" """Terminal protocol."""
return str(self) return text(self)
@property @property
def desc(self): def desc(self):
...@@ -799,6 +817,9 @@ class NonTerminal(ParseTreeNode, list): ...@@ -799,6 +817,9 @@ class NonTerminal(ParseTreeNode, list):
def __str__(self): def __str__(self):
return " | ".join([str(x) for x in self]) return " | ".join([str(x) for x in self])
def __unicode__(self):
return self.__str__()
def __repr__(self): def __repr__(self):
return "[ %s ]" % ", ".join([repr(x) for x in self]) return "[ %s ]" % ", ".join([repr(x) for x in self])
...@@ -870,7 +891,7 @@ class SemanticAction(object): ...@@ -870,7 +891,7 @@ class SemanticAction(object):
if isinstance(node, Terminal): if isinstance(node, Terminal):
# Default for Terminal is to convert to string unless suppress flag # Default for Terminal is to convert to string unless suppress flag
# is set in which case it is suppressed by setting to None. # is set in which case it is suppressed by setting to None.
retval = str(node) if not node.suppress else None retval = text(node) if not node.suppress else None
else: else:
retval = node retval = node
# Special case. If only one child exist return it. # Special case. If only one child exist return it.
...@@ -890,7 +911,7 @@ class SemanticAction(object): ...@@ -890,7 +911,7 @@ class SemanticAction(object):
# by default convert non-terminal to string # by default convert non-terminal to string
if parser.debug: if parser.debug:
print("*** Warning: Multiple non-string objects found in applying default semantic action. Converting non-terminal to string.") print("*** Warning: Multiple non-string objects found in applying default semantic action. Converting non-terminal to string.")
retval = str(node) retval = text(node)
break break
else: else:
# Return the only non-string child # Return the only non-string child
...@@ -938,7 +959,7 @@ class SemanticActionBodyWithBraces(SemanticAction): ...@@ -938,7 +959,7 @@ class SemanticActionBodyWithBraces(SemanticAction):
class SemanticActionToString(SemanticAction): class SemanticActionToString(SemanticAction):
def first_pass(self, parser, node, children): def first_pass(self, parser, node, children):
return str(node) return text(node)
# ---------------------------------------------------- # ----------------------------------------------------
# Parsers # Parsers
...@@ -1082,7 +1103,7 @@ class Parser(object): ...@@ -1082,7 +1103,7 @@ class Parser(object):
if self.debug: if self.debug:
print("Walking down ", node.name, " type:", print("Walking down ", node.name, " type:",
type(node).__name__, "str:", str(node)) type(node).__name__, "str:", text(node))
children = SemanticActionResults() children = SemanticActionResults()
if isinstance(node, NonTerminal): if isinstance(node, NonTerminal):
...@@ -1092,11 +1113,11 @@ class Parser(object): ...@@ -1092,11 +1113,11 @@ class Parser(object):
children.append_result(n.rule_name, child) children.append_result(n.rule_name, child)
if self.debug: if self.debug:
print("Processing ", node.name, "= '", str(node), print("Processing ", node.name, "= '", text(node),
"' type:", type(node).__name__, "' type:", type(node).__name__,
"len:", len(node) if isinstance(node, list) else "") "len:", len(node) if isinstance(node, list) else "")
for i, a in enumerate(children): for i, a in enumerate(children):
print("\t%d:" % (i + 1), str(a), "type:", type(a).__name__) print("\t%d:" % (i + 1), text(a), "type:", type(a).__name__)
if node.rule_name in sem_actions: if node.rule_name in sem_actions:
sem_action = sem_actions[node.rule_name] sem_action = sem_actions[node.rule_name]
...@@ -1129,7 +1150,7 @@ class Parser(object): ...@@ -1129,7 +1150,7 @@ class Parser(object):
if retval is None: if retval is None:
print("\tSuppressed.") print("\tSuppressed.")
else: else:
print("\tResolved to = ", str(retval), print("\tResolved to = ", text(retval),
" type:", type(retval).__name__) " type:", type(retval).__name__)
return retval return retval
...@@ -1183,13 +1204,13 @@ class Parser(object): ...@@ -1183,13 +1204,13 @@ class Parser(object):
position = self.position position = self.position
if length: if length:
retval = "{}*{}*{}".format( retval = "{}*{}*{}".format(
str(self.input[max(position - 10, 0):position]), text(self.input[max(position - 10, 0):position]),
str(self.input[position:position + length]), text(self.input[position:position + length]),
str(self.input[position + length:position + 10])) text(self.input[position + length:position + 10]))
else: else:
retval = "{}*{}".format( retval = "{}*{}".format(
str(self.input[max(position - 10, 0):position]), text(self.input[max(position - 10, 0):position]),
str(self.input[position:position + 10])) text(self.input[position:position + 10]))
return retval.replace('\n', ' ').replace('\r', '') return retval.replace('\n', ' ').replace('\r', '')
...@@ -1362,12 +1383,12 @@ class ParserPython(Parser): ...@@ -1362,12 +1383,12 @@ class ParserPython(Parser):
if any((isinstance(x, CrossRef) for x in retval.nodes)): if any((isinstance(x, CrossRef) for x in retval.nodes)):
__for_resolving.append(retval) __for_resolving.append(retval)
elif type(expression) is str: elif type(expression) is text:
retval = StrMatch(expression, ignore_case=self.ignore_case) retval = StrMatch(expression, ignore_case=self.ignore_case)
else: else:
raise GrammarError("Unrecognized grammar element '%s'." % raise GrammarError("Unrecognized grammar element '%s'." %
str(expression)) text(expression))
return retval return retval
......
...@@ -7,11 +7,17 @@ ...@@ -7,11 +7,17 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import print_function from __future__ import print_function, unicode_literals
import sys
if sys.version < '3':
text = unicode
else:
text = str
import copy import copy
from arpeggio import * from arpeggio import *
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
from arpeggio.export import PMDOTExporter, PTDOTExporter #from arpeggio.export import PMDOTExporter, PTDOTExporter
__all__ = ['ParserPEG'] __all__ = ['ParserPEG']
...@@ -93,7 +99,7 @@ class SemGrammar(SemanticAction): ...@@ -93,7 +99,7 @@ class SemGrammar(SemanticAction):
Resolving cross-references in second pass. Resolving cross-references in second pass.
''' '''
if parser.debug: if parser.debug:
print("Second pass:", type(node), str(node)) print("Second pass:", type(node), text(node))
self.resolved = set() self.resolved = set()
self._resolve(parser, node) self._resolve(parser, node)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
# #
# This example demonstrates grammar and parser for bibtex files. # This example demonstrates grammar and parser for bibtex files.
####################################################################### #######################################################################
from __future__ import print_function from __future__ import print_function, unicode_literals
import pprint import pprint
import sys, os import sys, os
...@@ -113,7 +113,7 @@ def main(debug=False, file_name=None): ...@@ -113,7 +113,7 @@ def main(debug=False, file_name=None):
if not file_name: if not file_name:
file_name = os.path.join(os.path.dirname(__file__), 'bibtex_example.bib') file_name = os.path.join(os.path.dirname(__file__), 'bibtex_example.bib')
with open(file_name, "r") as bibtexfile: with codecs.open(file_name, "r", encoding="utf-8") as bibtexfile:
bibtexfile_content = bibtexfile.read() bibtexfile_content = bibtexfile.read()
# We create a parse tree or abstract syntax tree out of # We create a parse tree or abstract syntax tree out of
......
...@@ -10,6 +10,12 @@ ...@@ -10,6 +10,12 @@
# notation. # notation.
####################################################################### #######################################################################
from __future__ import unicode_literals, print_function
try:
text=unicode
except:
text=str
from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF, SemanticAction,\ from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF, SemanticAction,\
ParserPython ParserPython
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
...@@ -70,7 +76,7 @@ def exprSA(parser, node, children): ...@@ -70,7 +76,7 @@ def exprSA(parser, node, children):
expr = 0 expr = 0
start = 0 start = 0
# Check for unary + or - operator # Check for unary + or - operator
if str(children[0]) in "+-": if text(children[0]) in "+-":
start = 1 start = 1
for i in range(start, len(children), 2): for i in range(start, len(children), 2):
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# Parser model as well as parse tree exported to dot files should be # Parser model as well as parse tree exported to dot files should be
# the same as parser model and parse tree generated in calc.py example. # the same as parser model and parse tree generated in calc.py example.
####################################################################### #######################################################################
from __future__ import absolute_import from __future__ import absolute_import, unicode_literals, print_function
from arpeggio.peg import ParserPEG from arpeggio.peg import ParserPEG
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
# Copyright: (c) 2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com> # Copyright: (c) 2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License # License: MIT License
############################################################################## ##############################################################################
from __future__ import unicode_literals
from arpeggio import * from arpeggio import *
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
# (see http://pyparsing.wikispaces.com/). # (see http://pyparsing.wikispaces.com/).
############################################################################## ##############################################################################
from __future__ import unicode_literals
json_bnf = """ json_bnf = """
object object
......
...@@ -10,13 +10,16 @@ ...@@ -10,13 +10,16 @@
# This example demonstrates building PEG parser using PEG based grammar of PEG # This example demonstrates building PEG parser using PEG based grammar of PEG
# grammar definition language. # grammar definition language.
############################################################################## ##############################################################################
from __future__ import unicode_literals
from arpeggio import * from arpeggio import *
from arpeggio.export import PMDOTExporter, PTDOTExporter from arpeggio.export import PMDOTExporter
from arpeggio.peg import ParserPEG from arpeggio.peg import ParserPEG
# Semantic actions # Semantic actions
from arpeggio.peg import SemGrammar, sem_rule, sem_sequence, sem_ordered_choice,\ from arpeggio.peg import SemGrammar, sem_rule, sem_sequence, sem_ordered_choice,\
sem_sufix, sem_prefix, sem_strmatch, sem_regex, sem_rule_crossref sem_sufix, sem_prefix, sem_strmatch, sem_regex, sem_rule_crossref
sem_actions = { sem_actions = {
"peggrammar": SemGrammar(), "peggrammar": SemGrammar(),
...@@ -60,6 +63,7 @@ peg_grammar = r""" ...@@ -60,6 +63,7 @@ peg_grammar = r"""
comment <- '//' r'.*\n'; comment <- '//' r'.*\n';
""" """
def main(debug=False): def main(debug=False):
# ParserPEG will use ParserPython to parse peg_grammar definition and # ParserPEG will use ParserPython to parse peg_grammar definition and
...@@ -82,7 +86,7 @@ def main(debug=False): ...@@ -82,7 +86,7 @@ def main(debug=False):
# This graph should be the same as peg_peg_parser_model.dot because # This graph should be the same as peg_peg_parser_model.dot because
# they define the same parser. # they define the same parser.
PMDOTExporter().exportFile(asg, PMDOTExporter().exportFile(asg,
"peg_peg_asg.dot") "peg_peg_asg.dot")
# If we replace parser_mode with ASG constructed parser it will still # If we replace parser_mode with ASG constructed parser it will still
# parse PEG grammars # parse PEG grammars
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
# right # right
# end # end
####################################################################### #######################################################################
from __future__ import print_function from __future__ import print_function, unicode_literals
from arpeggio import * from arpeggio import *
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
# right # right
# end # end
####################################################################### #######################################################################
from __future__ import print_function from __future__ import print_function, unicode_literals
from arpeggio import * from arpeggio import *
from arpeggio.peg import ParserPEG from arpeggio.peg import ParserPEG
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
# It is taken and adapted from pyPEG project (see http://www.fdik.org/pyPEG/). # It is taken and adapted from pyPEG project (see http://www.fdik.org/pyPEG/).
####################################################################### #######################################################################
from __future__ import unicode_literals
from arpeggio import * from arpeggio import *
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
......
from __future__ import unicode_literals
import pytest
from arpeggio import SemanticAction, ParserPython from arpeggio import SemanticAction, ParserPython
def test_direct_rule_call(): def test_direct_rule_call():
......
from StringIO import StringIO from __future__ import unicode_literals
import pytest
import sys import sys
from arpeggio import ParserPython from arpeggio import ParserPython
......
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com> # Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NonTerminal, Terminal, NoMatch, Combine from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NonTerminal, Terminal, NoMatch, Combine
from arpeggio.peg import ParserPEG from arpeggio.peg import ParserPEG
......
...@@ -8,10 +8,19 @@ ...@@ -8,10 +8,19 @@
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com> # Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
from arpeggio import ParserPython, SemanticAction, ParseTreeNode from arpeggio import ParserPython, SemanticAction, ParseTreeNode
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
try:
# For python 2.x
text=unicode
except:
# For python 3.x
text=str
def grammar(): return parentheses, 'strmatch' def grammar(): return parentheses, 'strmatch'
def parentheses(): return '(', rulea, ')' def parentheses(): return '(', rulea, ')'
def rulea(): return ['+', '-'], number def rulea(): return ['+', '-'], number
...@@ -24,7 +33,7 @@ parse_tree_node = False ...@@ -24,7 +33,7 @@ parse_tree_node = False
class ParenthesesSA(SemanticAction): class ParenthesesSA(SemanticAction):
def first_pass(self, parser, node, children): def first_pass(self, parser, node, children):
global p_removed, parse_tree_node global p_removed, parse_tree_node
p_removed = str(children[0]) != '(' p_removed = text(children[0]) != '('
parse_tree_node = isinstance(children[0], ParseTreeNode) parse_tree_node = isinstance(children[0], ParseTreeNode)
return children[0] if len(children)==1 else children[1] return children[0] if len(children)==1 else children[1]
...@@ -32,7 +41,7 @@ class ParenthesesSA(SemanticAction): ...@@ -32,7 +41,7 @@ class ParenthesesSA(SemanticAction):
class RuleSA(SemanticAction): class RuleSA(SemanticAction):
def first_pass(self, parser, node, children): def first_pass(self, parser, node, children):
global number_str global number_str
number_str = type(children[1]) == str number_str = type(children[1]) == text
return children[1] return children[1]
......
from __future__ import unicode_literals
import pytest import pytest
# Grammar # Grammar
......
...@@ -7,12 +7,13 @@ ...@@ -7,12 +7,13 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
import os import os
from arpeggio.export import PMDOTExporter, PTDOTExporter from arpeggio.export import PMDOTExporter, PTDOTExporter
# Grammar # Grammar
from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF , ParserPython, Sequence, NonTerminal from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF, ParserPython
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
# Grammar # Grammar
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NoMatch, EOF, Optional, And, Not from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NoMatch, EOF, Optional, And, Not
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
......
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_pathologic_models
# Purpose: Test for grammar models that could lead to infinite loops are
# handled properly.
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest import pytest
from arpeggio import ZeroOrMore, Optional, ParserPython, NoMatch from arpeggio import ZeroOrMore, Optional, ParserPython, NoMatch
def test_optional_inside_zeroormore(): def test_optional_inside_zeroormore():
"""
Test optional match inside a zero or more.
Optional should always succeed thus inducing ZeroOrMore
to try the match again.
Arpeggio handle this using soft failures.
"""
def grammar(): return ZeroOrMore(Optional('a')) def grammar(): return ZeroOrMore(Optional('a'))
parser = ParserPython(grammar) parser = ParserPython(grammar)
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
# Grammar # Grammar
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
# Grammar # Grammar
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
# Grammar # Grammar
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
# License: MIT License # License: MIT License
####################################################################### #######################################################################
from __future__ import unicode_literals
import pytest import pytest
# Grammar # Grammar
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment