Commit 4392c896 authored by Igor Dejanovic's avatar Igor Dejanovic

Improved bibtex example. Using Combine to support nested braces.

parent db8d661a
...@@ -6,8 +6,9 @@ ...@@ -6,8 +6,9 @@
# Copyright: (c) 2013 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com> # Copyright: (c) 2013 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License # License: MIT License
# #
# This example demonstrates grammar for bibtex files. # This example demonstrates grammar and parser for bibtex files.
####################################################################### #######################################################################
import pprint
import sys import sys
from arpeggio import * from arpeggio import *
...@@ -15,14 +16,26 @@ from arpeggio.export import PMDOTExporter, PTDOTExporter ...@@ -15,14 +16,26 @@ from arpeggio.export import PMDOTExporter, PTDOTExporter
from arpeggio import RegExMatch as _ from arpeggio import RegExMatch as _
def bibfile(): return ZeroOrMore([bibentry, comment]), EndOfFile # Grammar
def bibentry(): return bibtype, "{", bibkey, ",", field, ZeroOrMore(",", field), "}" def bibfile(): return ZeroOrMore([comment_entry, bibentry, comment]), EndOfFile
def bibtype(): return _(r'@\w+') def comment_entry(): return "@comment", "{", _(r'[^}]*'), "}"
def bibkey(): return _(r'[^\s,]+'), def bibentry(): return bibtype, "{", bibkey, ",", field, ZeroOrMore(",", field), "}"
def field(): return fieldname, "=", '"', fieldvalue, '"' def field(): return fieldname, "=", fieldvalue
def fieldname(): return _(r'\w+') def fieldvalue(): return [fieldvalue_braces, fieldvalue_quotes]
def fieldvalue(): return _(r'[^"]*') def fieldvalue_braces(): return "{", fieldvalue_braced_content, "}"
def comment(): return _(r'[^@]+') def fieldvalue_quotes(): return '"', fieldvalue_quoted_content, '"'
# Lexical rules
def fieldname(): return _(r'[-\w]+')
def comment(): return _(r'[^@]+')
def bibtype(): return _(r'@\w+')
def bibkey(): return _(r'[^\s,]+')
def fieldvalue_quoted_content(): return _(r'((\\")|[^"])*')
def fieldvalue_braced_content(): return Combine(ZeroOrMore(Optional(And("{"), fieldvalue_inner),\
fieldvalue_part))
def fieldvalue_part(): return _(r'((\\")|[^{}])+')
def fieldvalue_inner(): return "{", fieldvalue_braced_content, "}"
# Semantic actions # Semantic actions
class BibFileSem(SemanticAction): class BibFileSem(SemanticAction):
...@@ -32,7 +45,9 @@ class BibFileSem(SemanticAction): ...@@ -32,7 +45,9 @@ class BibFileSem(SemanticAction):
def first_pass(self, parser, node, nodes): def first_pass(self, parser, node, nodes):
if parser.debug: if parser.debug:
print "Processing Bibfile" print "Processing Bibfile"
return nodes[:-1]
# Return only dict nodes
return [x for x in nodes if type(x) is dict]
class BibEntrySem(SemanticAction): class BibEntrySem(SemanticAction):
...@@ -60,16 +75,18 @@ class FieldSem(SemanticAction): ...@@ -60,16 +75,18 @@ class FieldSem(SemanticAction):
def first_pass(self, parser, node, nodes): def first_pass(self, parser, node, nodes):
if parser.debug: if parser.debug:
print " Processing field %s" % nodes[0] print " Processing field %s" % nodes[0]
field = (nodes[0].value, nodes[3]) field = (nodes[0].value, nodes[2])
return field return field
class FieldValueSem(SemanticAction): class FieldValueSem(SemanticAction):
""" """
Serbian Serbian letters form latex encoding to Unicode. Serbian Serbian letters form latex encoding to Unicode.
Remove braces. Remove newlines.
""" """
def first_pass(self, parser, node, nodes): def first_pass(self, parser, node, nodes):
return node.value.replace(r"\'{c}", u"ć")\ value = nodes[1].value
value = value.replace(r"\'{c}", u"ć")\
.replace(r"\'{C}", u"Ć")\ .replace(r"\'{C}", u"Ć")\
.replace(r"\v{c}", u"č")\ .replace(r"\v{c}", u"č")\
.replace(r"\v{C}", u"Č")\ .replace(r"\v{C}", u"Č")\
...@@ -77,18 +94,21 @@ class FieldValueSem(SemanticAction): ...@@ -77,18 +94,21 @@ class FieldValueSem(SemanticAction):
.replace(r"\v{Z}", u"Ž")\ .replace(r"\v{Z}", u"Ž")\
.replace(r"\v{s}", u"š")\ .replace(r"\v{s}", u"š")\
.replace(r"\v{S}", u"Š") .replace(r"\v{S}", u"Š")
value = re.sub("[\n{}]", '', value)
return value
# Connecting rules with semantic actions # Connecting rules with semantic actions
bibfile.sem = BibFileSem() bibfile.sem = BibFileSem()
bibentry.sem = BibEntrySem() bibentry.sem = BibEntrySem()
field.sem = FieldSem() field.sem = FieldSem()
fieldvalue.sem = FieldValueSem() fieldvalue_braces.sem = FieldValueSem()
fieldvalue_quotes.sem = FieldValueSem()
if __name__ == "__main__": if __name__ == "__main__":
# First we will make a parser - an instance of the bib parser model. # First we will make a parser - an instance of the bib parser model.
# Parser model is given in the form of python constructs therefore we # Parser model is given in the form of python constructs therefore we
# are using ParserPython class. # are using ParserPython class.
parser = ParserPython(bibfile, reduce_tree=True, debug=True) parser = ParserPython(bibfile, reduce_tree=True)
# Then we export it to a dot file in order to visualise it. This is # Then we export it to a dot file in order to visualise it. This is
# particulary handy for debugging purposes. # particulary handy for debugging purposes.
...@@ -112,7 +132,11 @@ if __name__ == "__main__": ...@@ -112,7 +132,11 @@ if __name__ == "__main__":
# getASG will start semantic analysis. # getASG will start semantic analysis.
# In this case semantic analysis will list of bibentry maps. # In this case semantic analysis will list of bibentry maps.
print parser.getASG() ast = parser.getASG()
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(ast)
else: else:
print "Usage: python bibtex.py file_to_parse" print "Usage: python bibtex.py file_to_parse"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment