Support for parser and str/regex match flags (ignore_case, multiline)

0a96eea5 · Igor Dejanovic · 94685021 · 0a96eea5 · 0a96eea5 · 0a96eea5
Commit 0a96eea5 authored Aug 03, 2014 by Igor Dejanovic
Hide whitespace changes
Inline Side-by-side

Showing with 111 additions and 12 deletions

__init__.py arpeggio/__init__.py +51 -9

peg.py arpeggio/peg.py +6 -3

test_flags.py tests/unit/test_flags.py +54 -0

No files found.
--- a/arpeggio/__init__.py
+++ b/arpeggio/__init__.py
@@ -488,14 +488,25 @@ class RegExMatch(Match):
    Args:
        to_match (regex string): A regular expression string to match.
            It will be used to create regular expression using re.compile.
+        ignore_case(bool): If case insensitive match is needed.
+            Default is None to support propagation from global parser setting.
+        multiline(bool): If regex matching is in multiline mode.
+            Default is None to support propagation from global parser setting.
+
    '''
-    def __init__(self, to_match, rule=None, flags=None):
+    def __init__(self, to_match, rule=None, ignore_case=None, multiline=None):
        super(RegExMatch, self).__init__(rule)
        self.to_match = to_match
-        if flags is not None:
-            self.regex = re.compile(to_match, flags)
-        else:
-            self.regex = re.compile(to_match)
+        self.ignore_case = ignore_case
+        self.multiline = multiline
+
+    def compile(self):
+        flags = 0
+        if self.ignore_case:
+            flags |= re.IGNORECASE
+        if self.multiline:
+            flags |= re.MULTILINE
+        self.regex = re.compile(self.to_match, flags)

    def __str__(self):
        return self.to_match
@@ -522,14 +533,22 @@ class StrMatch(Match):

    Args:
        to_match (str): A string to match.
+        ignore_case(bool): If case insensitive match is needed.
+            Default is None to support propagation from global parser setting.
    """
-    def __init__(self, to_match, rule=None, root=False):
+    def __init__(self, to_match, rule=None, root=False, ignore_case=None):
        super(StrMatch, self).__init__(rule, root)
        self.to_match = to_match
+        self.ignore_case = ignore_case

    def _parse(self, parser):
        c_pos = parser.position
-        if parser.input[c_pos:].startswith(self.to_match):
+        input_frag = parser.input[c_pos:c_pos+len(self.to_match)]
+        if self.ignore_case:
+            match = input_frag.lower()==self.to_match.lower()
+        else:
+            match = input_frag == self.to_match
+        if match:
            if parser.debug:
                print("++ Match '{}' at {} => '{}'".format(self.to_match,\
                        c_pos, parser.context(len(self.to_match))))
@@ -771,14 +790,20 @@ class Parser(object):
        ws (str): A string consisting of whitespace characters.
        reduce_tree (bool): If true non-terminals with single child will be
            eliminated from the parse tree. Default is True.
+        multiline(bool): If RegExMatch is going to match in multiline mode
+            (default=False).
+        ignore_case(bool): If case is ignored (default=False)
        debug (bool): If true debugging messages will be printed.
        comments_model: parser model for comments.
+
    """
    def __init__(self, skipws=True, ws=DEFAULT_WS, reduce_tree=False,
-                 debug=False):
+                 debug=False, multiline=False, ignore_case=False):
        self.skipws = skipws
        self.ws = ws
        self.reduce_tree = reduce_tree
+        self.ignore_case = ignore_case
+        self.multiline = multiline
        self.debug = debug
        self.comments_model = None
        self.sem_actions = {}
@@ -1045,6 +1070,23 @@ class ParserPython(Parser):
                    print("New rule: {} -> {}"
                          .format(rule, retval.__class__.__name__))

+            elif isinstance(expression, StrMatch):
+                if expression.ignore_case is None:
+                    expression.ignore_case = self.ignore_case
+                retval = expression
+
+            elif isinstance(expression, RegExMatch):
+                # Regular expression are not compiled yet
+                # to support global settings propagation from
+                # parser.
+                if expression.ignore_case is None:
+                    expression.ignore_case = self.ignore_case
+                if expression.multiline is None:
+                    expression.multiline = self.multiline
+                expression.compile()
+
+                retval = expression
+
            elif isinstance(expression, Match):
                retval = expression

@@ -1067,7 +1109,7 @@ class ParserPython(Parser):
                    __for_resolving.append(retval)

            elif type(expression) is str:
-                retval = StrMatch(expression)
+                retval = StrMatch(expression, ignore_case=self.ignore_case)

            else:
                raise GrammarError("Unrecognized grammar element '%s'." %

--- a/arpeggio/peg.py
+++ b/arpeggio/peg.py
@@ -192,15 +192,18 @@ class SemRuleCrossRef(SemanticAction):

 class SemRegEx(SemanticAction):
    def first_pass(self, parser, node, children):
-        return RegExMatch(children[0])
-
+        match = RegExMatch(children[0],
+                ignore_case=parser.ignore_case,
+                multiline=parser.multiline)
+        match.compile()
+        return match

 class SemStrMatch(SemanticAction):
    def first_pass(self, parser, node, children):
        match_str = node.value[1:-1]
        match_str = match_str.replace("\\'", "'")
        match_str = match_str.replace("\\\\", "\\")
-        return StrMatch(match_str)
+        return StrMatch(match_str, ignore_case=parser.ignore_case)


 grammar.sem = SemGrammar()

--- a/tests/unit/test_flags.py
+++ b/tests/unit/test_flags.py
+# -*- coding: utf-8 -*-
+#######################################################################
+# Name: test_flags
+# Purpose: Test for parser flags
+# Author: Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
+# Copyright: (c) 2014 Igor R. Dejanović <igor DOT dejanovic AT gmail DOT com>
+# License: MIT License
+#######################################################################
+
+import pytest
+
+# Grammar
+from arpeggio import ParserPython, Optional, EOF
+from arpeggio import RegExMatch as _
+from arpeggio import NoMatch
+
+
+def foo():    return 'r', bar, baz, Optional(buz), Optional(ml), EOF
+def bar():      return 'BAR'
+def baz():      return _(r'1\w+')
+def buz():      return _(r'Aba*', ignore_case=True)
+def ml():       return _(r'//.*$', multiline=True)
+
+@pytest.fixture
+def parser_ci():
+    return ParserPython(foo, ignore_case=True)
+
+@pytest.fixture
+def parser_nonci():
+    return ParserPython(foo, ignore_case=False)
+
+def test_parse_tree_ci(parser_ci):
+    input_str = "R bar 1baz"
+    parse_tree = parser_ci.parse(input_str)
+    assert parse_tree is not None
+
+def test_parse_tree_nonci(parser_nonci):
+    input_str = "R bar 1baz"
+    with pytest.raises(NoMatch):
+        parser_nonci.parse(input_str)
+
+def test_parse_multiline(parser_ci):
+    input_str = """r bar 1baz //adfadsfadf asdfadsfadsf adfadf"""
+    parse_tree = parser_ci.parse(input_str)
+    assert parse_tree is not None
+
+def test_flags_override(parser_nonci):
+    # Parser is not case insensitive
+    # But the buz match is.
+    input_str = "r BAR 1baz abaaaaAAaaa"
+    parse_tree = parser_nonci.parse(input_str)
+    assert parse_tree is not None
+
+