Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
A
arpeggio-gm
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
backend
arpeggio-gm
Commits
5df7664c
Commit
5df7664c
authored
Sep 30, 2014
by
Igor Dejanovic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Support for unicode and python 2/3 compatibility
parent
c19a0e25
Hide whitespace changes
Inline
Side-by-side
Showing
24 changed files
with
116 additions
and
37 deletions
+116
-37
__init__.py
arpeggio/__init__.py
+42
-21
peg.py
arpeggio/peg.py
+9
-3
bibtex.py
examples/bibtex.py
+2
-2
calc.py
examples/calc.py
+7
-1
calc_peg.py
examples/calc_peg.py
+1
-1
csv.py
examples/csv.py
+2
-0
json.py
examples/json.py
+1
-0
peg_peg.py
examples/peg_peg.py
+7
-3
robot.py
examples/robot.py
+1
-1
robot_peg.py
examples/robot_peg.py
+1
-1
simple.py
examples/simple.py
+1
-0
test_direct_rule_call.py
tests/unit/regressions/test_direct_rule_call.py
+2
-0
test_memoization.py
tests/unit/regressions/test_memoization.py
+2
-1
test_decorator_combine.py
tests/unit/test_decorator_combine.py
+2
-0
test_default_semantic_action.py
tests/unit/test_default_semantic_action.py
+11
-2
test_eolterm.py
tests/unit/test_eolterm.py
+1
-0
test_exporter.py
tests/unit/test_exporter.py
+2
-1
test_flags.py
tests/unit/test_flags.py
+1
-0
test_parsing_expressions.py
tests/unit/test_parsing_expressions.py
+1
-0
test_pathologic_models.py
tests/unit/test_pathologic_models.py
+16
-0
test_ptnode_navigation_expressions.py
tests/unit/test_ptnode_navigation_expressions.py
+1
-0
test_python_parser.py
tests/unit/test_python_parser.py
+1
-0
test_reduce_tree.py
tests/unit/test_reduce_tree.py
+1
-0
test_semantic_action_results.py
tests/unit/test_semantic_action_results.py
+1
-0
No files found.
arpeggio/__init__.py
View file @
5df7664c
...
...
@@ -12,6 +12,12 @@
###############################################################################
from
__future__
import
print_function
,
unicode_literals
import
sys
if
sys
.
version
<
'3'
:
text
=
unicode
else
:
text
=
str
import
codecs
import
re
import
bisect
...
...
@@ -85,15 +91,18 @@ class NoMatch(Exception):
def
__str__
(
self
):
return
"Expected '{}' at position {} => '{}'."
\
.
format
(
self
.
exp_str
,
str
(
self
.
parser
.
pos_to_linecol
(
self
.
position
)),
text
(
self
.
parser
.
pos_to_linecol
(
self
.
position
)),
self
.
parser
.
context
(
position
=
self
.
position
))
def
__unicode__
(
self
):
return
self
.
__str__
()
def
flatten
(
_iterable
):
'''Flattening of python iterables.'''
result
=
[]
for
e
in
_iterable
:
if
hasattr
(
e
,
"__iter__"
)
and
not
type
(
e
)
in
[
str
,
NonTerminal
]:
if
hasattr
(
e
,
"__iter__"
)
and
not
type
(
e
)
in
[
text
,
NonTerminal
]:
result
.
extend
(
flatten
(
e
))
else
:
result
.
append
(
e
)
...
...
@@ -217,7 +226,7 @@ class ParsingExpression(object):
parser
.
position
=
new_pos
if
parser
.
debug
:
print
(
"** Cache hit for [{}, {}] = '{}' : new_pos={}"
.
format
(
self
.
name
,
c_pos
,
str
(
result
),
str
(
new_pos
)))
.
format
(
self
.
name
,
c_pos
,
text
(
result
),
text
(
new_pos
)))
# print("<< Leaving rule {}".format(self.name))
# If NoMatch is recorded at this position raise.
...
...
@@ -534,7 +543,7 @@ class Combine(Decorator):
# Create terminal from result
return
Terminal
(
self
,
c_pos
,
""
.
join
([
str
(
result
)
for
result
in
results
]))
""
.
join
([
text
(
result
)
for
result
in
results
]))
except
NoMatch
:
parser
.
position
=
c_pos
# Backtracking
raise
...
...
@@ -597,6 +606,9 @@ class RegExMatch(Match):
def
__str__
(
self
):
return
self
.
to_match
def
__unicode__
(
self
):
return
self
.
__str__
()
def
_parse
(
self
,
parser
):
c_pos
=
parser
.
position
m
=
self
.
regex
.
match
(
parser
.
input
[
c_pos
:])
...
...
@@ -653,8 +665,11 @@ class StrMatch(Match):
def
__str__
(
self
):
return
self
.
to_match
def
__unicode__
(
self
):
return
self
.
__str__
()
def
__eq__
(
self
,
other
):
return
self
.
to_match
==
str
(
other
)
return
self
.
to_match
==
text
(
other
)
def
__hash__
(
self
):
return
hash
(
self
.
to_match
)
...
...
@@ -759,11 +774,14 @@ class Terminal(ParseTreeNode):
def
__str__
(
self
):
return
self
.
value
def
__unicode__
(
self
):
return
self
.
__str__
()
def
__repr__
(
self
):
return
self
.
desc
def
__eq__
(
self
,
other
):
return
str
(
self
)
==
str
(
other
)
return
text
(
self
)
==
text
(
other
)
class
NonTerminal
(
ParseTreeNode
,
list
):
...
...
@@ -790,7 +808,7 @@ class NonTerminal(ParseTreeNode, list):
@property
def
value
(
self
):
"""Terminal protocol."""
return
str
(
self
)
return
text
(
self
)
@property
def
desc
(
self
):
...
...
@@ -799,6 +817,9 @@ class NonTerminal(ParseTreeNode, list):
def
__str__
(
self
):
return
" | "
.
join
([
str
(
x
)
for
x
in
self
])
def
__unicode__
(
self
):
return
self
.
__str__
()
def
__repr__
(
self
):
return
"[
%
s ]"
%
", "
.
join
([
repr
(
x
)
for
x
in
self
])
...
...
@@ -870,7 +891,7 @@ class SemanticAction(object):
if
isinstance
(
node
,
Terminal
):
# Default for Terminal is to convert to string unless suppress flag
# is set in which case it is suppressed by setting to None.
retval
=
str
(
node
)
if
not
node
.
suppress
else
None
retval
=
text
(
node
)
if
not
node
.
suppress
else
None
else
:
retval
=
node
# Special case. If only one child exist return it.
...
...
@@ -890,7 +911,7 @@ class SemanticAction(object):
# by default convert non-terminal to string
if
parser
.
debug
:
print
(
"*** Warning: Multiple non-string objects found in applying default semantic action. Converting non-terminal to string."
)
retval
=
str
(
node
)
retval
=
text
(
node
)
break
else
:
# Return the only non-string child
...
...
@@ -938,7 +959,7 @@ class SemanticActionBodyWithBraces(SemanticAction):
class
SemanticActionToString
(
SemanticAction
):
def
first_pass
(
self
,
parser
,
node
,
children
):
return
str
(
node
)
return
text
(
node
)
# ----------------------------------------------------
# Parsers
...
...
@@ -1082,7 +1103,7 @@ class Parser(object):
if
self
.
debug
:
print
(
"Walking down "
,
node
.
name
,
" type:"
,
type
(
node
)
.
__name__
,
"str:"
,
str
(
node
))
type
(
node
)
.
__name__
,
"str:"
,
text
(
node
))
children
=
SemanticActionResults
()
if
isinstance
(
node
,
NonTerminal
):
...
...
@@ -1092,11 +1113,11 @@ class Parser(object):
children
.
append_result
(
n
.
rule_name
,
child
)
if
self
.
debug
:
print
(
"Processing "
,
node
.
name
,
"= '"
,
str
(
node
),
print
(
"Processing "
,
node
.
name
,
"= '"
,
text
(
node
),
"' type:"
,
type
(
node
)
.
__name__
,
"len:"
,
len
(
node
)
if
isinstance
(
node
,
list
)
else
""
)
for
i
,
a
in
enumerate
(
children
):
print
(
"
\t
%
d:"
%
(
i
+
1
),
str
(
a
),
"type:"
,
type
(
a
)
.
__name__
)
print
(
"
\t
%
d:"
%
(
i
+
1
),
text
(
a
),
"type:"
,
type
(
a
)
.
__name__
)
if
node
.
rule_name
in
sem_actions
:
sem_action
=
sem_actions
[
node
.
rule_name
]
...
...
@@ -1129,7 +1150,7 @@ class Parser(object):
if
retval
is
None
:
print
(
"
\t
Suppressed."
)
else
:
print
(
"
\t
Resolved to = "
,
str
(
retval
),
print
(
"
\t
Resolved to = "
,
text
(
retval
),
" type:"
,
type
(
retval
)
.
__name__
)
return
retval
...
...
@@ -1183,13 +1204,13 @@ class Parser(object):
position
=
self
.
position
if
length
:
retval
=
"{}*{}*{}"
.
format
(
str
(
self
.
input
[
max
(
position
-
10
,
0
):
position
]),
str
(
self
.
input
[
position
:
position
+
length
]),
str
(
self
.
input
[
position
+
length
:
position
+
10
]))
text
(
self
.
input
[
max
(
position
-
10
,
0
):
position
]),
text
(
self
.
input
[
position
:
position
+
length
]),
text
(
self
.
input
[
position
+
length
:
position
+
10
]))
else
:
retval
=
"{}*{}"
.
format
(
str
(
self
.
input
[
max
(
position
-
10
,
0
):
position
]),
str
(
self
.
input
[
position
:
position
+
10
]))
text
(
self
.
input
[
max
(
position
-
10
,
0
):
position
]),
text
(
self
.
input
[
position
:
position
+
10
]))
return
retval
.
replace
(
'
\n
'
,
' '
)
.
replace
(
'
\r
'
,
''
)
...
...
@@ -1362,12 +1383,12 @@ class ParserPython(Parser):
if
any
((
isinstance
(
x
,
CrossRef
)
for
x
in
retval
.
nodes
)):
__for_resolving
.
append
(
retval
)
elif
type
(
expression
)
is
str
:
elif
type
(
expression
)
is
text
:
retval
=
StrMatch
(
expression
,
ignore_case
=
self
.
ignore_case
)
else
:
raise
GrammarError
(
"Unrecognized grammar element '
%
s'."
%
str
(
expression
))
text
(
expression
))
return
retval
...
...
arpeggio/peg.py
View file @
5df7664c
...
...
@@ -7,11 +7,17 @@
# License: MIT License
#######################################################################
from
__future__
import
print_function
from
__future__
import
print_function
,
unicode_literals
import
sys
if
sys
.
version
<
'3'
:
text
=
unicode
else
:
text
=
str
import
copy
from
arpeggio
import
*
from
arpeggio
import
RegExMatch
as
_
from
arpeggio.export
import
PMDOTExporter
,
PTDOTExporter
#
from arpeggio.export import PMDOTExporter, PTDOTExporter
__all__
=
[
'ParserPEG'
]
...
...
@@ -93,7 +99,7 @@ class SemGrammar(SemanticAction):
Resolving cross-references in second pass.
'''
if
parser
.
debug
:
print
(
"Second pass:"
,
type
(
node
),
str
(
node
))
print
(
"Second pass:"
,
type
(
node
),
text
(
node
))
self
.
resolved
=
set
()
self
.
_resolve
(
parser
,
node
)
...
...
examples/bibtex.py
View file @
5df7664c
...
...
@@ -8,7 +8,7 @@
#
# This example demonstrates grammar and parser for bibtex files.
#######################################################################
from
__future__
import
print_function
from
__future__
import
print_function
,
unicode_literals
import
pprint
import
sys
,
os
...
...
@@ -113,7 +113,7 @@ def main(debug=False, file_name=None):
if
not
file_name
:
file_name
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'bibtex_example.bib'
)
with
open
(
file_name
,
"r
"
)
as
bibtexfile
:
with
codecs
.
open
(
file_name
,
"r"
,
encoding
=
"utf-8
"
)
as
bibtexfile
:
bibtexfile_content
=
bibtexfile
.
read
()
# We create a parse tree or abstract syntax tree out of
...
...
examples/calc.py
View file @
5df7664c
...
...
@@ -10,6 +10,12 @@
# notation.
#######################################################################
from
__future__
import
unicode_literals
,
print_function
try
:
text
=
unicode
except
:
text
=
str
from
arpeggio
import
Optional
,
ZeroOrMore
,
OneOrMore
,
EOF
,
SemanticAction
,
\
ParserPython
from
arpeggio
import
RegExMatch
as
_
...
...
@@ -70,7 +76,7 @@ def exprSA(parser, node, children):
expr
=
0
start
=
0
# Check for unary + or - operator
if
str
(
children
[
0
])
in
"+-"
:
if
text
(
children
[
0
])
in
"+-"
:
start
=
1
for
i
in
range
(
start
,
len
(
children
),
2
):
...
...
examples/calc_peg.py
View file @
5df7664c
...
...
@@ -12,7 +12,7 @@
# Parser model as well as parse tree exported to dot files should be
# the same as parser model and parse tree generated in calc.py example.
#######################################################################
from
__future__
import
absolute_import
from
__future__
import
absolute_import
,
unicode_literals
,
print_function
from
arpeggio.peg
import
ParserPEG
...
...
examples/csv.py
View file @
5df7664c
...
...
@@ -5,6 +5,8 @@
# Copyright: (c) 2014 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
##############################################################################
from
__future__
import
unicode_literals
from
arpeggio
import
*
from
arpeggio
import
RegExMatch
as
_
...
...
examples/json.py
View file @
5df7664c
...
...
@@ -9,6 +9,7 @@
# (see http://pyparsing.wikispaces.com/).
##############################################################################
from
__future__
import
unicode_literals
json_bnf
=
"""
object
...
...
examples/peg_peg.py
View file @
5df7664c
...
...
@@ -10,13 +10,16 @@
# This example demonstrates building PEG parser using PEG based grammar of PEG
# grammar definition language.
##############################################################################
from
__future__
import
unicode_literals
from
arpeggio
import
*
from
arpeggio.export
import
PMDOTExporter
,
PTDOTExporter
from
arpeggio.export
import
PMDOTExporter
from
arpeggio.peg
import
ParserPEG
# Semantic actions
from
arpeggio.peg
import
SemGrammar
,
sem_rule
,
sem_sequence
,
sem_ordered_choice
,
\
sem_sufix
,
sem_prefix
,
sem_strmatch
,
sem_regex
,
sem_rule_crossref
sem_sufix
,
sem_prefix
,
sem_strmatch
,
sem_regex
,
sem_rule_crossref
sem_actions
=
{
"peggrammar"
:
SemGrammar
(),
...
...
@@ -60,6 +63,7 @@ peg_grammar = r"""
comment <- '//' r'.*\n';
"""
def
main
(
debug
=
False
):
# ParserPEG will use ParserPython to parse peg_grammar definition and
...
...
@@ -82,7 +86,7 @@ def main(debug=False):
# This graph should be the same as peg_peg_parser_model.dot because
# they define the same parser.
PMDOTExporter
()
.
exportFile
(
asg
,
"peg_peg_asg.dot"
)
"peg_peg_asg.dot"
)
# If we replace parser_mode with ASG constructed parser it will still
# parse PEG grammars
...
...
examples/robot.py
View file @
5df7664c
...
...
@@ -18,7 +18,7 @@
# right
# end
#######################################################################
from
__future__
import
print_function
from
__future__
import
print_function
,
unicode_literals
from
arpeggio
import
*
...
...
examples/robot_peg.py
View file @
5df7664c
...
...
@@ -18,7 +18,7 @@
# right
# end
#######################################################################
from
__future__
import
print_function
from
__future__
import
print_function
,
unicode_literals
from
arpeggio
import
*
from
arpeggio.peg
import
ParserPEG
...
...
examples/simple.py
View file @
5df7664c
...
...
@@ -9,6 +9,7 @@
# It is taken and adapted from pyPEG project (see http://www.fdik.org/pyPEG/).
#######################################################################
from
__future__
import
unicode_literals
from
arpeggio
import
*
from
arpeggio
import
RegExMatch
as
_
...
...
tests/unit/regressions/test_direct_rule_call.py
View file @
5df7664c
from
__future__
import
unicode_literals
import
pytest
from
arpeggio
import
SemanticAction
,
ParserPython
def
test_direct_rule_call
():
...
...
tests/unit/regressions/test_memoization.py
View file @
5df7664c
from
StringIO
import
StringIO
from
__future__
import
unicode_literals
import
pytest
import
sys
from
arpeggio
import
ParserPython
...
...
tests/unit/test_decorator_combine.py
View file @
5df7664c
...
...
@@ -8,6 +8,8 @@
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
from
arpeggio
import
ParserPython
,
ZeroOrMore
,
OneOrMore
,
NonTerminal
,
Terminal
,
NoMatch
,
Combine
from
arpeggio.peg
import
ParserPEG
...
...
tests/unit/test_default_semantic_action.py
View file @
5df7664c
...
...
@@ -8,10 +8,19 @@
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
from
arpeggio
import
ParserPython
,
SemanticAction
,
ParseTreeNode
from
arpeggio
import
RegExMatch
as
_
try
:
# For python 2.x
text
=
unicode
except
:
# For python 3.x
text
=
str
def
grammar
():
return
parentheses
,
'strmatch'
def
parentheses
():
return
'('
,
rulea
,
')'
def
rulea
():
return
[
'+'
,
'-'
],
number
...
...
@@ -24,7 +33,7 @@ parse_tree_node = False
class
ParenthesesSA
(
SemanticAction
):
def
first_pass
(
self
,
parser
,
node
,
children
):
global
p_removed
,
parse_tree_node
p_removed
=
str
(
children
[
0
])
!=
'('
p_removed
=
text
(
children
[
0
])
!=
'('
parse_tree_node
=
isinstance
(
children
[
0
],
ParseTreeNode
)
return
children
[
0
]
if
len
(
children
)
==
1
else
children
[
1
]
...
...
@@ -32,7 +41,7 @@ class ParenthesesSA(SemanticAction):
class
RuleSA
(
SemanticAction
):
def
first_pass
(
self
,
parser
,
node
,
children
):
global
number_str
number_str
=
type
(
children
[
1
])
==
str
number_str
=
type
(
children
[
1
])
==
text
return
children
[
1
]
...
...
tests/unit/test_eolterm.py
View file @
5df7664c
from
__future__
import
unicode_literals
import
pytest
# Grammar
...
...
tests/unit/test_exporter.py
View file @
5df7664c
...
...
@@ -7,12 +7,13 @@
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
import
os
from
arpeggio.export
import
PMDOTExporter
,
PTDOTExporter
# Grammar
from
arpeggio
import
Optional
,
ZeroOrMore
,
OneOrMore
,
EOF
,
ParserPython
,
Sequence
,
NonTerminal
from
arpeggio
import
Optional
,
ZeroOrMore
,
OneOrMore
,
EOF
,
ParserPython
from
arpeggio
import
RegExMatch
as
_
...
...
tests/unit/test_flags.py
View file @
5df7664c
...
...
@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
# Grammar
...
...
tests/unit/test_parsing_expressions.py
View file @
5df7664c
...
...
@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
from
arpeggio
import
ParserPython
,
ZeroOrMore
,
OneOrMore
,
NoMatch
,
EOF
,
Optional
,
And
,
Not
from
arpeggio
import
RegExMatch
as
_
...
...
tests/unit/test_pathologic_models.py
View file @
5df7664c
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_pathologic_models
# Purpose: Test for grammar models that could lead to infinite loops are
# handled properly.
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
from
arpeggio
import
ZeroOrMore
,
Optional
,
ParserPython
,
NoMatch
def
test_optional_inside_zeroormore
():
"""
Test optional match inside a zero or more.
Optional should always succeed thus inducing ZeroOrMore
to try the match again.
Arpeggio handle this using soft failures.
"""
def
grammar
():
return
ZeroOrMore
(
Optional
(
'a'
))
parser
=
ParserPython
(
grammar
)
...
...
tests/unit/test_ptnode_navigation_expressions.py
View file @
5df7664c
...
...
@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
# Grammar
...
...
tests/unit/test_python_parser.py
View file @
5df7664c
...
...
@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
# Grammar
...
...
tests/unit/test_reduce_tree.py
View file @
5df7664c
...
...
@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
# Grammar
...
...
tests/unit/test_semantic_action_results.py
View file @
5df7664c
...
...
@@ -7,6 +7,7 @@
# License: MIT License
#######################################################################
from
__future__
import
unicode_literals
import
pytest
# Grammar
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment