Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
A
arpeggio-gm
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
backend
arpeggio-gm
Commits
0a96eea5
Commit
0a96eea5
authored
Aug 03, 2014
by
Igor Dejanovic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Support for parser and str/regex match flags (ignore_case, multiline)
parent
94685021
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
111 additions
and
12 deletions
+111
-12
__init__.py
arpeggio/__init__.py
+51
-9
peg.py
arpeggio/peg.py
+6
-3
test_flags.py
tests/unit/test_flags.py
+54
-0
No files found.
arpeggio/__init__.py
View file @
0a96eea5
...
...
@@ -488,14 +488,25 @@ class RegExMatch(Match):
Args:
to_match (regex string): A regular expression string to match.
It will be used to create regular expression using re.compile.
ignore_case(bool): If case insensitive match is needed.
Default is None to support propagation from global parser setting.
multiline(bool): If regex matching is in multiline mode.
Default is None to support propagation from global parser setting.
'''
def
__init__
(
self
,
to_match
,
rule
=
None
,
flags
=
None
):
def
__init__
(
self
,
to_match
,
rule
=
None
,
ignore_case
=
None
,
multiline
=
None
):
super
(
RegExMatch
,
self
)
.
__init__
(
rule
)
self
.
to_match
=
to_match
if
flags
is
not
None
:
self
.
regex
=
re
.
compile
(
to_match
,
flags
)
else
:
self
.
regex
=
re
.
compile
(
to_match
)
self
.
ignore_case
=
ignore_case
self
.
multiline
=
multiline
def
compile
(
self
):
flags
=
0
if
self
.
ignore_case
:
flags
|=
re
.
IGNORECASE
if
self
.
multiline
:
flags
|=
re
.
MULTILINE
self
.
regex
=
re
.
compile
(
self
.
to_match
,
flags
)
def
__str__
(
self
):
return
self
.
to_match
...
...
@@ -522,14 +533,22 @@ class StrMatch(Match):
Args:
to_match (str): A string to match.
ignore_case(bool): If case insensitive match is needed.
Default is None to support propagation from global parser setting.
"""
def
__init__
(
self
,
to_match
,
rule
=
None
,
root
=
False
):
def
__init__
(
self
,
to_match
,
rule
=
None
,
root
=
False
,
ignore_case
=
None
):
super
(
StrMatch
,
self
)
.
__init__
(
rule
,
root
)
self
.
to_match
=
to_match
self
.
ignore_case
=
ignore_case
def
_parse
(
self
,
parser
):
c_pos
=
parser
.
position
if
parser
.
input
[
c_pos
:]
.
startswith
(
self
.
to_match
):
input_frag
=
parser
.
input
[
c_pos
:
c_pos
+
len
(
self
.
to_match
)]
if
self
.
ignore_case
:
match
=
input_frag
.
lower
()
==
self
.
to_match
.
lower
()
else
:
match
=
input_frag
==
self
.
to_match
if
match
:
if
parser
.
debug
:
print
(
"++ Match '{}' at {} => '{}'"
.
format
(
self
.
to_match
,
\
c_pos
,
parser
.
context
(
len
(
self
.
to_match
))))
...
...
@@ -771,14 +790,20 @@ class Parser(object):
ws (str): A string consisting of whitespace characters.
reduce_tree (bool): If true non-terminals with single child will be
eliminated from the parse tree. Default is True.
multiline(bool): If RegExMatch is going to match in multiline mode
(default=False).
ignore_case(bool): If case is ignored (default=False)
debug (bool): If true debugging messages will be printed.
comments_model: parser model for comments.
"""
def
__init__
(
self
,
skipws
=
True
,
ws
=
DEFAULT_WS
,
reduce_tree
=
False
,
debug
=
False
):
debug
=
False
,
multiline
=
False
,
ignore_case
=
False
):
self
.
skipws
=
skipws
self
.
ws
=
ws
self
.
reduce_tree
=
reduce_tree
self
.
ignore_case
=
ignore_case
self
.
multiline
=
multiline
self
.
debug
=
debug
self
.
comments_model
=
None
self
.
sem_actions
=
{}
...
...
@@ -1045,6 +1070,23 @@ class ParserPython(Parser):
print
(
"New rule: {} -> {}"
.
format
(
rule
,
retval
.
__class__
.
__name__
))
elif
isinstance
(
expression
,
StrMatch
):
if
expression
.
ignore_case
is
None
:
expression
.
ignore_case
=
self
.
ignore_case
retval
=
expression
elif
isinstance
(
expression
,
RegExMatch
):
# Regular expression are not compiled yet
# to support global settings propagation from
# parser.
if
expression
.
ignore_case
is
None
:
expression
.
ignore_case
=
self
.
ignore_case
if
expression
.
multiline
is
None
:
expression
.
multiline
=
self
.
multiline
expression
.
compile
()
retval
=
expression
elif
isinstance
(
expression
,
Match
):
retval
=
expression
...
...
@@ -1067,7 +1109,7 @@ class ParserPython(Parser):
__for_resolving
.
append
(
retval
)
elif
type
(
expression
)
is
str
:
retval
=
StrMatch
(
expression
)
retval
=
StrMatch
(
expression
,
ignore_case
=
self
.
ignore_case
)
else
:
raise
GrammarError
(
"Unrecognized grammar element '
%
s'."
%
...
...
arpeggio/peg.py
View file @
0a96eea5
...
...
@@ -192,15 +192,18 @@ class SemRuleCrossRef(SemanticAction):
class
SemRegEx
(
SemanticAction
):
def
first_pass
(
self
,
parser
,
node
,
children
):
return
RegExMatch
(
children
[
0
])
match
=
RegExMatch
(
children
[
0
],
ignore_case
=
parser
.
ignore_case
,
multiline
=
parser
.
multiline
)
match
.
compile
()
return
match
class
SemStrMatch
(
SemanticAction
):
def
first_pass
(
self
,
parser
,
node
,
children
):
match_str
=
node
.
value
[
1
:
-
1
]
match_str
=
match_str
.
replace
(
"
\\
'"
,
"'"
)
match_str
=
match_str
.
replace
(
"
\\\\
"
,
"
\\
"
)
return
StrMatch
(
match_str
)
return
StrMatch
(
match_str
,
ignore_case
=
parser
.
ignore_case
)
grammar
.
sem
=
SemGrammar
()
...
...
tests/unit/test_flags.py
0 → 100644
View file @
0a96eea5
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_flags
# Purpose: Test for parser flags
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
import
pytest
# Grammar
from
arpeggio
import
ParserPython
,
Optional
,
EOF
from
arpeggio
import
RegExMatch
as
_
from
arpeggio
import
NoMatch
def
foo
():
return
'r'
,
bar
,
baz
,
Optional
(
buz
),
Optional
(
ml
),
EOF
def
bar
():
return
'BAR'
def
baz
():
return
_
(
r'1\w+'
)
def
buz
():
return
_
(
r'Aba*'
,
ignore_case
=
True
)
def
ml
():
return
_
(
r'//.*$'
,
multiline
=
True
)
@pytest.fixture
def
parser_ci
():
return
ParserPython
(
foo
,
ignore_case
=
True
)
@pytest.fixture
def
parser_nonci
():
return
ParserPython
(
foo
,
ignore_case
=
False
)
def
test_parse_tree_ci
(
parser_ci
):
input_str
=
"R bar 1baz"
parse_tree
=
parser_ci
.
parse
(
input_str
)
assert
parse_tree
is
not
None
def
test_parse_tree_nonci
(
parser_nonci
):
input_str
=
"R bar 1baz"
with
pytest
.
raises
(
NoMatch
):
parser_nonci
.
parse
(
input_str
)
def
test_parse_multiline
(
parser_ci
):
input_str
=
"""r bar 1baz //adfadsfadf asdfadsfadsf adfadf"""
parse_tree
=
parser_ci
.
parse
(
input_str
)
assert
parse_tree
is
not
None
def
test_flags_override
(
parser_nonci
):
# Parser is not case insensitive
# But the buz match is.
input_str
=
"r BAR 1baz abaaaaAAaaa"
parse_tree
=
parser_nonci
.
parse
(
input_str
)
assert
parse_tree
is
not
None
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment