Commit 5df7664c authored by Igor Dejanovic's avatar Igor Dejanovic

Support for unicode and python 2/3 compatibility

parent c19a0e25
......@@ -12,6 +12,12 @@
###############################################################################
from __future__ import print_function, unicode_literals
import sys
if sys.version < '3':
text = unicode
else:
text = str
import codecs
import re
import bisect
......@@ -85,15 +91,18 @@ class NoMatch(Exception):
def __str__(self):
return "Expected '{}' at position {} => '{}'."\
.format(self.exp_str,
str(self.parser.pos_to_linecol(self.position)),
text(self.parser.pos_to_linecol(self.position)),
self.parser.context(position=self.position))
def __unicode__(self):
return self.__str__()
def flatten(_iterable):
'''Flattening of python iterables.'''
result = []
for e in _iterable:
if hasattr(e, "__iter__") and not type(e) in [str, NonTerminal]:
if hasattr(e, "__iter__") and not type(e) in [text, NonTerminal]:
result.extend(flatten(e))
else:
result.append(e)
......@@ -217,7 +226,7 @@ class ParsingExpression(object):
parser.position = new_pos
if parser.debug:
print("** Cache hit for [{}, {}] = '{}' : new_pos={}"
.format(self.name, c_pos, str(result), str(new_pos)))
.format(self.name, c_pos, text(result), text(new_pos)))
# print("<< Leaving rule {}".format(self.name))
# If NoMatch is recorded at this position raise.
......@@ -534,7 +543,7 @@ class Combine(Decorator):
# Create terminal from result
return Terminal(self, c_pos,
"".join([str(result) for result in results]))
"".join([text(result) for result in results]))
except NoMatch:
parser.position = c_pos # Backtracking
raise
......@@ -597,6 +606,9 @@ class RegExMatch(Match):
def __str__(self):
return self.to_match
def __unicode__(self):
return self.__str__()
def _parse(self, parser):
c_pos = parser.position
m = self.regex.match(parser.input[c_pos:])
......@@ -653,8 +665,11 @@ class StrMatch(Match):
def __str__(self):
return self.to_match
def __unicode__(self):
return self.__str__()
def __eq__(self, other):
return self.to_match == str(other)
return self.to_match == text(other)
def __hash__(self):
return hash(self.to_match)
......@@ -759,11 +774,14 @@ class Terminal(ParseTreeNode):
def __str__(self):
return self.value
def __unicode__(self):
return self.__str__()
def __repr__(self):
return self.desc
def __eq__(self, other):
return str(self) == str(other)
return text(self) == text(other)
class NonTerminal(ParseTreeNode, list):
......@@ -790,7 +808,7 @@ class NonTerminal(ParseTreeNode, list):
@property
def value(self):
"""Terminal protocol."""
return str(self)
return text(self)
@property
def desc(self):
......@@ -799,6 +817,9 @@ class NonTerminal(ParseTreeNode, list):
def __str__(self):
return " | ".join([str(x) for x in self])
def __unicode__(self):
return self.__str__()
def __repr__(self):
return "[ %s ]" % ", ".join([repr(x) for x in self])
......@@ -870,7 +891,7 @@ class SemanticAction(object):
if isinstance(node, Terminal):
# Default for Terminal is to convert to string unless suppress flag
# is set in which case it is suppressed by setting to None.
retval = str(node) if not node.suppress else None
retval = text(node) if not node.suppress else None
else:
retval = node
# Special case. If only one child exist return it.
......@@ -890,7 +911,7 @@ class SemanticAction(object):
# by default convert non-terminal to string
if parser.debug:
print("*** Warning: Multiple non-string objects found in applying default semantic action. Converting non-terminal to string.")
retval = str(node)
retval = text(node)
break
else:
# Return the only non-string child
......@@ -938,7 +959,7 @@ class SemanticActionBodyWithBraces(SemanticAction):
class SemanticActionToString(SemanticAction):
def first_pass(self, parser, node, children):
return str(node)
return text(node)
# ----------------------------------------------------
# Parsers
......@@ -1082,7 +1103,7 @@ class Parser(object):
if self.debug:
print("Walking down ", node.name, " type:",
type(node).__name__, "str:", str(node))
type(node).__name__, "str:", text(node))
children = SemanticActionResults()
if isinstance(node, NonTerminal):
......@@ -1092,11 +1113,11 @@ class Parser(object):
children.append_result(n.rule_name, child)
if self.debug:
print("Processing ", node.name, "= '", str(node),
print("Processing ", node.name, "= '", text(node),
"' type:", type(node).__name__,
"len:", len(node) if isinstance(node, list) else "")
for i, a in enumerate(children):
print("\t%d:" % (i + 1), str(a), "type:", type(a).__name__)
print("\t%d:" % (i + 1), text(a), "type:", type(a).__name__)
if node.rule_name in sem_actions:
sem_action = sem_actions[node.rule_name]
......@@ -1129,7 +1150,7 @@ class Parser(object):
if retval is None:
print("\tSuppressed.")
else:
print("\tResolved to = ", str(retval),
print("\tResolved to = ", text(retval),
" type:", type(retval).__name__)
return retval
......@@ -1183,13 +1204,13 @@ class Parser(object):
position = self.position
if length:
retval = "{}*{}*{}".format(
str(self.input[max(position - 10, 0):position]),
str(self.input[position:position + length]),
str(self.input[position + length:position + 10]))
text(self.input[max(position - 10, 0):position]),
text(self.input[position:position + length]),
text(self.input[position + length:position + 10]))
else:
retval = "{}*{}".format(
str(self.input[max(position - 10, 0):position]),
str(self.input[position:position + 10]))
text(self.input[max(position - 10, 0):position]),
text(self.input[position:position + 10]))
return retval.replace('\n', ' ').replace('\r', '')
......@@ -1362,12 +1383,12 @@ class ParserPython(Parser):
if any((isinstance(x, CrossRef) for x in retval.nodes)):
__for_resolving.append(retval)
elif type(expression) is str:
elif type(expression) is text:
retval = StrMatch(expression, ignore_case=self.ignore_case)
else:
raise GrammarError("Unrecognized grammar element '%s'." %
str(expression))
text(expression))
return retval
......
......@@ -7,11 +7,17 @@
# License: MIT License
#######################################################################
from __future__ import print_function
from __future__ import print_function, unicode_literals
import sys
if sys.version < '3':
text = unicode
else:
text = str
import copy
from arpeggio import *
from arpeggio import RegExMatch as _
from arpeggio.export import PMDOTExporter, PTDOTExporter
#from arpeggio.export import PMDOTExporter, PTDOTExporter
__all__ = ['ParserPEG']
......@@ -93,7 +99,7 @@ class SemGrammar(SemanticAction):
Resolving cross-references in second pass.
'''
if parser.debug:
print("Second pass:", type(node), str(node))
print("Second pass:", type(node), text(node))
self.resolved = set()
self._resolve(parser, node)
......
......@@ -8,7 +8,7 @@
#
# This example demonstrates grammar and parser for bibtex files.
#######################################################################
from __future__ import print_function
from __future__ import print_function, unicode_literals
import pprint
import sys, os
......@@ -113,7 +113,7 @@ def main(debug=False, file_name=None):
if not file_name:
file_name = os.path.join(os.path.dirname(__file__), 'bibtex_example.bib')
with open(file_name, "r") as bibtexfile:
with codecs.open(file_name, "r", encoding="utf-8") as bibtexfile:
bibtexfile_content = bibtexfile.read()
# We create a parse tree or abstract syntax tree out of
......
......@@ -10,6 +10,12 @@
# notation.
#######################################################################
from __future__ import unicode_literals, print_function
try:
text=unicode
except:
text=str
from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF, SemanticAction,\
ParserPython
from arpeggio import RegExMatch as _
......@@ -70,7 +76,7 @@ def exprSA(parser, node, children):
expr = 0
start = 0
# Check for unary + or - operator
if str(children[0]) in "+-":
if text(children[0]) in "+-":
start = 1
for i in range(start, len(children), 2):
......
......@@ -12,7 +12,7 @@
# Parser model as well as parse tree exported to dot files should be
# the same as parser model and parse tree generated in calc.py example.
#######################################################################
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals, print_function
from arpeggio.peg import ParserPEG
......
......@@ -5,6 +5,8 @@
# Copyright: (c) 2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
##############################################################################
from __future__ import unicode_literals
from arpeggio import *
from arpeggio import RegExMatch as _
......
......@@ -9,6 +9,7 @@
# (see http://pyparsing.wikispaces.com/).
##############################################################################
from __future__ import unicode_literals
json_bnf = """
object
......
......@@ -10,13 +10,16 @@
# This example demonstrates building PEG parser using PEG based grammar of PEG
# grammar definition language.
##############################################################################
from __future__ import unicode_literals
from arpeggio import *
from arpeggio.export import PMDOTExporter, PTDOTExporter
from arpeggio.export import PMDOTExporter
from arpeggio.peg import ParserPEG
# Semantic actions
from arpeggio.peg import SemGrammar, sem_rule, sem_sequence, sem_ordered_choice,\
sem_sufix, sem_prefix, sem_strmatch, sem_regex, sem_rule_crossref
sem_sufix, sem_prefix, sem_strmatch, sem_regex, sem_rule_crossref
sem_actions = {
"peggrammar": SemGrammar(),
......@@ -60,6 +63,7 @@ peg_grammar = r"""
comment <- '//' r'.*\n';
"""
def main(debug=False):
# ParserPEG will use ParserPython to parse peg_grammar definition and
......@@ -82,7 +86,7 @@ def main(debug=False):
# This graph should be the same as peg_peg_parser_model.dot because
# they define the same parser.
PMDOTExporter().exportFile(asg,
"peg_peg_asg.dot")
"peg_peg_asg.dot")
# If we replace parser_mode with ASG constructed parser it will still
# parse PEG grammars
......
......@@ -18,7 +18,7 @@
# right
# end
#######################################################################
from __future__ import print_function
from __future__ import print_function, unicode_literals
from arpeggio import *
......
......@@ -18,7 +18,7 @@
# right
# end
#######################################################################
from __future__ import print_function
from __future__ import print_function, unicode_literals
from arpeggio import *
from arpeggio.peg import ParserPEG
......
......@@ -9,6 +9,7 @@
# It is taken and adapted from pyPEG project (see http://www.fdik.org/pyPEG/).
#######################################################################
from __future__ import unicode_literals
from arpeggio import *
from arpeggio import RegExMatch as _
......
from __future__ import unicode_literals
import pytest
from arpeggio import SemanticAction, ParserPython
def test_direct_rule_call():
......
from StringIO import StringIO
from __future__ import unicode_literals
import pytest
import sys
from arpeggio import ParserPython
......
......@@ -8,6 +8,8 @@
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NonTerminal, Terminal, NoMatch, Combine
from arpeggio.peg import ParserPEG
......
......@@ -8,10 +8,19 @@
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
from arpeggio import ParserPython, SemanticAction, ParseTreeNode
from arpeggio import RegExMatch as _
try:
# For python 2.x
text=unicode
except:
# For python 3.x
text=str
def grammar(): return parentheses, 'strmatch'
def parentheses(): return '(', rulea, ')'
def rulea(): return ['+', '-'], number
......@@ -24,7 +33,7 @@ parse_tree_node = False
class ParenthesesSA(SemanticAction):
def first_pass(self, parser, node, children):
global p_removed, parse_tree_node
p_removed = str(children[0]) != '('
p_removed = text(children[0]) != '('
parse_tree_node = isinstance(children[0], ParseTreeNode)
return children[0] if len(children)==1 else children[1]
......@@ -32,7 +41,7 @@ class ParenthesesSA(SemanticAction):
class RuleSA(SemanticAction):
def first_pass(self, parser, node, children):
global number_str
number_str = type(children[1]) == str
number_str = type(children[1]) == text
return children[1]
......
from __future__ import unicode_literals
import pytest
# Grammar
......
......@@ -7,12 +7,13 @@
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
import os
from arpeggio.export import PMDOTExporter, PTDOTExporter
# Grammar
from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF , ParserPython, Sequence, NonTerminal
from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF, ParserPython
from arpeggio import RegExMatch as _
......
......@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
# Grammar
......
......@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
from arpeggio import ParserPython, ZeroOrMore, OneOrMore, NoMatch, EOF, Optional, And, Not
from arpeggio import RegExMatch as _
......
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_pathologic_models
# Purpose: Test for grammar models that could lead to infinite loops are
# handled properly.
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
from arpeggio import ZeroOrMore, Optional, ParserPython, NoMatch
def test_optional_inside_zeroormore():
"""
Test optional match inside a zero or more.
Optional should always succeed thus inducing ZeroOrMore
to try the match again.
Arpeggio handle this using soft failures.
"""
def grammar(): return ZeroOrMore(Optional('a'))
parser = ParserPython(grammar)
......
......@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
# Grammar
......
......@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
# Grammar
......
......@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
# Grammar
......
......@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from __future__ import unicode_literals
import pytest
# Grammar
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment