Commit 0a96eea5 authored by Igor Dejanovic's avatar Igor Dejanovic

Support for parser and str/regex match flags (ignore_case, multiline)

parent 94685021
...@@ -488,14 +488,25 @@ class RegExMatch(Match): ...@@ -488,14 +488,25 @@ class RegExMatch(Match):
Args: Args:
to_match (regex string): A regular expression string to match. to_match (regex string): A regular expression string to match.
It will be used to create regular expression using re.compile. It will be used to create regular expression using re.compile.
ignore_case(bool): If case insensitive match is needed.
Default is None to support propagation from global parser setting.
multiline(bool): If regex matching is in multiline mode.
Default is None to support propagation from global parser setting.
''' '''
def __init__(self, to_match, rule=None, flags=None): def __init__(self, to_match, rule=None, ignore_case=None, multiline=None):
super(RegExMatch, self).__init__(rule) super(RegExMatch, self).__init__(rule)
self.to_match = to_match self.to_match = to_match
if flags is not None: self.ignore_case = ignore_case
self.regex = re.compile(to_match, flags) self.multiline = multiline
else:
self.regex = re.compile(to_match) def compile(self):
flags = 0
if self.ignore_case:
flags |= re.IGNORECASE
if self.multiline:
flags |= re.MULTILINE
self.regex = re.compile(self.to_match, flags)
def __str__(self): def __str__(self):
return self.to_match return self.to_match
...@@ -522,14 +533,22 @@ class StrMatch(Match): ...@@ -522,14 +533,22 @@ class StrMatch(Match):
Args: Args:
to_match (str): A string to match. to_match (str): A string to match.
ignore_case(bool): If case insensitive match is needed.
Default is None to support propagation from global parser setting.
""" """
def __init__(self, to_match, rule=None, root=False): def __init__(self, to_match, rule=None, root=False, ignore_case=None):
super(StrMatch, self).__init__(rule, root) super(StrMatch, self).__init__(rule, root)
self.to_match = to_match self.to_match = to_match
self.ignore_case = ignore_case
def _parse(self, parser): def _parse(self, parser):
c_pos = parser.position c_pos = parser.position
if parser.input[c_pos:].startswith(self.to_match): input_frag = parser.input[c_pos:c_pos+len(self.to_match)]
if self.ignore_case:
match = input_frag.lower()==self.to_match.lower()
else:
match = input_frag == self.to_match
if match:
if parser.debug: if parser.debug:
print("++ Match '{}' at {} => '{}'".format(self.to_match,\ print("++ Match '{}' at {} => '{}'".format(self.to_match,\
c_pos, parser.context(len(self.to_match)))) c_pos, parser.context(len(self.to_match))))
...@@ -771,14 +790,20 @@ class Parser(object): ...@@ -771,14 +790,20 @@ class Parser(object):
ws (str): A string consisting of whitespace characters. ws (str): A string consisting of whitespace characters.
reduce_tree (bool): If true non-terminals with single child will be reduce_tree (bool): If true non-terminals with single child will be
eliminated from the parse tree. Default is True. eliminated from the parse tree. Default is True.
multiline(bool): If RegExMatch is going to match in multiline mode
(default=False).
ignore_case(bool): If case is ignored (default=False)
debug (bool): If true debugging messages will be printed. debug (bool): If true debugging messages will be printed.
comments_model: parser model for comments. comments_model: parser model for comments.
""" """
def __init__(self, skipws=True, ws=DEFAULT_WS, reduce_tree=False, def __init__(self, skipws=True, ws=DEFAULT_WS, reduce_tree=False,
debug=False): debug=False, multiline=False, ignore_case=False):
self.skipws = skipws self.skipws = skipws
self.ws = ws self.ws = ws
self.reduce_tree = reduce_tree self.reduce_tree = reduce_tree
self.ignore_case = ignore_case
self.multiline = multiline
self.debug = debug self.debug = debug
self.comments_model = None self.comments_model = None
self.sem_actions = {} self.sem_actions = {}
...@@ -1045,6 +1070,23 @@ class ParserPython(Parser): ...@@ -1045,6 +1070,23 @@ class ParserPython(Parser):
print("New rule: {} -> {}" print("New rule: {} -> {}"
.format(rule, retval.__class__.__name__)) .format(rule, retval.__class__.__name__))
elif isinstance(expression, StrMatch):
if expression.ignore_case is None:
expression.ignore_case = self.ignore_case
retval = expression
elif isinstance(expression, RegExMatch):
# Regular expression are not compiled yet
# to support global settings propagation from
# parser.
if expression.ignore_case is None:
expression.ignore_case = self.ignore_case
if expression.multiline is None:
expression.multiline = self.multiline
expression.compile()
retval = expression
elif isinstance(expression, Match): elif isinstance(expression, Match):
retval = expression retval = expression
...@@ -1067,7 +1109,7 @@ class ParserPython(Parser): ...@@ -1067,7 +1109,7 @@ class ParserPython(Parser):
__for_resolving.append(retval) __for_resolving.append(retval)
elif type(expression) is str: elif type(expression) is str:
retval = StrMatch(expression) retval = StrMatch(expression, ignore_case=self.ignore_case)
else: else:
raise GrammarError("Unrecognized grammar element '%s'." % raise GrammarError("Unrecognized grammar element '%s'." %
......
...@@ -192,15 +192,18 @@ class SemRuleCrossRef(SemanticAction): ...@@ -192,15 +192,18 @@ class SemRuleCrossRef(SemanticAction):
class SemRegEx(SemanticAction): class SemRegEx(SemanticAction):
def first_pass(self, parser, node, children): def first_pass(self, parser, node, children):
return RegExMatch(children[0]) match = RegExMatch(children[0],
ignore_case=parser.ignore_case,
multiline=parser.multiline)
match.compile()
return match
class SemStrMatch(SemanticAction): class SemStrMatch(SemanticAction):
def first_pass(self, parser, node, children): def first_pass(self, parser, node, children):
match_str = node.value[1:-1] match_str = node.value[1:-1]
match_str = match_str.replace("\\'", "'") match_str = match_str.replace("\\'", "'")
match_str = match_str.replace("\\\\", "\\") match_str = match_str.replace("\\\\", "\\")
return StrMatch(match_str) return StrMatch(match_str, ignore_case=parser.ignore_case)
grammar.sem = SemGrammar() grammar.sem = SemGrammar()
......
# -*- coding: utf-8 -*-
#######################################################################
# Name: test_flags
# Purpose: Test for parser flags
# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
import pytest
# Grammar
from arpeggio import ParserPython, Optional, EOF
from arpeggio import RegExMatch as _
from arpeggio import NoMatch
def foo(): return 'r', bar, baz, Optional(buz), Optional(ml), EOF
def bar(): return 'BAR'
def baz(): return _(r'1\w+')
def buz(): return _(r'Aba*', ignore_case=True)
def ml(): return _(r'//.*$', multiline=True)
@pytest.fixture
def parser_ci():
return ParserPython(foo, ignore_case=True)
@pytest.fixture
def parser_nonci():
return ParserPython(foo, ignore_case=False)
def test_parse_tree_ci(parser_ci):
input_str = "R bar 1baz"
parse_tree = parser_ci.parse(input_str)
assert parse_tree is not None
def test_parse_tree_nonci(parser_nonci):
input_str = "R bar 1baz"
with pytest.raises(NoMatch):
parser_nonci.parse(input_str)
def test_parse_multiline(parser_ci):
input_str = """r bar 1baz //adfadsfadf asdfadsfadsf adfadf"""
parse_tree = parser_ci.parse(input_str)
assert parse_tree is not None
def test_flags_override(parser_nonci):
# Parser is not case insensitive
# But the buz match is.
input_str = "r BAR 1baz abaaaaAAaaa"
parse_tree = parser_nonci.parse(input_str)
assert parse_tree is not None
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment