Merge branch 'brupelo-master'

7 years ago · 34dae9d6aa
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,6 @@ python:
  - "3.6"
  - "pypy"  # PyPy2 5.8.0
  - "pypy3" # Pypy3 5.8.0-beta0
 install: pip install tox-travis
 script:
  - pip install -r nearley-requirements.txt
  - python -m tests
  - tox
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Most importantly, Lark will save you time and prevent you from getting parsing h
 Lark has no dependencies.
 [![Build Status](https://travis-ci.org/erezsh/lark.svg?branch=master)](https://travis-ci.org/erezsh/lark)
 [![Build Status](https://travis-ci.org/lark-parser/lark.svg?branch=master)](https://travis-ci.org/lark-parser/lark)
 ### Hello World
--- a/examples/README.md
+++ b/examples/README.md
@@ -7,6 +7,7 @@
 - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language)
 - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity
 - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter.
 - [lark\_grammar.py](lark_grammar.py) + [lark.g](lark.g) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer)
 ### Advanced
--- a/examples/lark.g
+++ b/examples/lark.g
@@ -0,0 +1,49 @@
 start: (_item | _NL)*
 _item: rule
     | token
     | statement
 rule: RULE priority? ":" expansions _NL
 token: TOKEN priority? ":" expansions _NL
 priority: "." NUMBER
 statement: "%ignore" expansions _NL                -> ignore
         | "%import" import_args ["->" TOKEN] _NL  -> import
 import_args: name ("." name)*
 ?expansions: alias (_VBAR alias)*
 ?alias: expansion ["->" RULE]
 ?expansion: expr*
 ?expr: atom [OP | "~" NUMBER [".." NUMBER]]
 ?atom: "(" expansions ")"
     | "[" expansions "]" -> maybe
     | STRING ".." STRING -> literal_range
     | name
     | (REGEXP | STRING) -> literal
 name: RULE
    | TOKEN
 _VBAR: _NL? "|"
 OP: /[+*][?]?|[?](?![a-z])/
 RULE: /!?[_?]?[a-z][_a-z0-9]*/
 TOKEN: /_?[A-Z][_A-Z0-9]*/
 STRING: _STRING "i"?
 REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/
 _NL: /(\r?\n)+\s*/
 %import common.ESCAPED_STRING -> _STRING
 %import common.INT -> NUMBER
 %import common.WS_INLINE
 COMMENT: "//" /[^\n]/*
 %ignore WS_INLINE
 %ignore COMMENT
--- a/examples/lark_grammar.py
+++ b/examples/lark_grammar.py
@@ -0,0 +1,18 @@
 from lark import Lark
 parser = Lark(open('examples/lark.g'), parser="lalr")
 grammar_files = [
    'examples/python2.g',
    'examples/python3.g',
    'examples/lark.g',
    'lark/grammars/common.g',
 ]
 def test():
    for grammar_file in grammar_files:
        tree = parser.parse(open(grammar_file).read())
    print("All grammars parsed successfully")
 if __name__ == '__main__':
    test()
--- a/examples/qscintilla_json.py
+++ b/examples/qscintilla_json.py
@@ -0,0 +1,201 @@
 #
 # This example shows how to write a syntax-highlighted editor with Qt and Lark
 #
 # Requirements:
 #
 #   PyQt5==5.10.1
 #   QScintilla==2.10.4
 import sys
 import textwrap
 from PyQt5.Qt import *  # noqa
 from PyQt5.Qsci import QsciScintilla
 from PyQt5.Qsci import QsciLexerCustom
 from lark import Lark
 class LexerJson(QsciLexerCustom):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.create_parser()
        self.create_styles()
    def create_styles(self):
        deeppink = QColor(249, 38, 114)
        khaki = QColor(230, 219, 116)
        mediumpurple = QColor(174, 129, 255)
        mediumturquoise = QColor(81, 217, 205)
        yellowgreen = QColor(166, 226, 46)
        lightcyan = QColor(213, 248, 232)
        darkslategrey = QColor(39, 40, 34)
        styles = {
            0: mediumturquoise,
            1: mediumpurple,
            2: yellowgreen,
            3: deeppink,
            4: khaki,
            5: lightcyan
        }
        for style, color in styles.items():
            self.setColor(color, style)
            self.setPaper(darkslategrey, style)
            self.setFont(self.parent().font(), style)
        self.token_styles = {
            "__COLON": 5,
            "__COMMA": 5,
            "__LBRACE": 5,
            "__LSQB": 5,
            "__RBRACE": 5,
            "__RSQB": 5,
            "FALSE": 0,
            "NULL": 0,
            "TRUE": 0,
            "STRING": 4,
            "NUMBER": 1,
        }
    def create_parser(self):
        grammar = '''
            anons: ":" "{" "}" "," "[" "]"
            TRUE: "true"
            FALSE: "false"
            NULL: "NULL"
            %import common.ESCAPED_STRING -> STRING
            %import common.SIGNED_NUMBER  -> NUMBER
            %import common.WS
            %ignore WS
        '''
        self.lark = Lark(grammar, parser=None, lexer='standard')
        # All tokens: print([t.name for t in self.lark.parser.lexer.tokens])
    def defaultPaper(self, style):
        return QColor(39, 40, 34)
    def language(self):
        return "Json"
    def description(self, style):
        return {v: k for k, v in self.token_styles.items()}.get(style, "")
    def styleText(self, start, end):
        self.startStyling(start)
        text = self.parent().text()[start:end]
        last_pos = 0
        try:
            for token in self.lark.lex(text):
                ws_len = token.pos_in_stream - last_pos
                if ws_len:
                    self.setStyling(ws_len, 0)    # whitespace
                token_len = len(bytearray(token, "utf-8"))
                self.setStyling(
                    token_len, self.token_styles.get(token.type, 0))
                last_pos = token.pos_in_stream + token_len
        except Exception as e:
            print(e)
 class EditorAll(QsciScintilla):
    def __init__(self, parent=None):
        super().__init__(parent)
        # Set font defaults
        font = QFont()
        font.setFamily('Consolas')
        font.setFixedPitch(True)
        font.setPointSize(8)
        font.setBold(True)
        self.setFont(font)
        # Set margin defaults
        fontmetrics = QFontMetrics(font)
        self.setMarginsFont(font)
        self.setMarginWidth(0, fontmetrics.width("000") + 6)
        self.setMarginLineNumbers(0, True)
        self.setMarginsForegroundColor(QColor(128, 128, 128))
        self.setMarginsBackgroundColor(QColor(39, 40, 34))
        self.setMarginType(1, self.SymbolMargin)
        self.setMarginWidth(1, 12)
        # Set indentation defaults
        self.setIndentationsUseTabs(False)
        self.setIndentationWidth(4)
        self.setBackspaceUnindents(True)
        self.setIndentationGuides(True)
        # self.setFolding(QsciScintilla.CircledFoldStyle)
        # Set caret defaults
        self.setCaretForegroundColor(QColor(247, 247, 241))
        self.setCaretWidth(2)
        # Set selection color defaults
        self.setSelectionBackgroundColor(QColor(61, 61, 52))
        self.resetSelectionForegroundColor()
        # Set multiselection defaults
        self.SendScintilla(QsciScintilla.SCI_SETMULTIPLESELECTION, True)
        self.SendScintilla(QsciScintilla.SCI_SETMULTIPASTE, 1)
        self.SendScintilla(
            QsciScintilla.SCI_SETADDITIONALSELECTIONTYPING, True)
        lexer = LexerJson(self)
        self.setLexer(lexer)
 EXAMPLE_TEXT = textwrap.dedent("""\
        {
            "_id": "5b05ffcbcf8e597939b3f5ca",
            "about": "Excepteur consequat commodo esse voluptate aute aliquip ad sint deserunt commodo eiusmod irure. Sint aliquip sit magna duis eu est culpa aliqua excepteur ut tempor nulla. Aliqua ex pariatur id labore sit. Quis sit ex aliqua veniam exercitation laboris anim adipisicing. Lorem nisi reprehenderit ullamco labore qui sit ut aliqua tempor consequat pariatur proident.",
            "address": "665 Malbone Street, Thornport, Louisiana, 243",
            "age": 23,
            "balance": "$3,216.91",
            "company": "BULLJUICE",
            "email": "elisekelley@bulljuice.com",
            "eyeColor": "brown",
            "gender": "female",
            "guid": "d3a6d865-0f64-4042-8a78-4f53de9b0707",
            "index": 0,
            "isActive": false,
            "isActive2": true,
            "latitude": -18.660714,
            "longitude": -85.378048,
            "name": "Elise Kelley",
            "phone": "+1 (808) 543-3966",
            "picture": "http://placehold.it/32x32",
            "registered": "2017-09-30T03:47:40 -02:00",
            "tags": [
                "et",
                "nostrud",
                "in",
                "fugiat",
                "incididunt",
                "labore",
                "nostrud"
            ]
        }\
    """)
 def main():
    app = QApplication(sys.argv)
    ex = EditorAll()
    ex.setWindowTitle(__file__)
    ex.setText(EXAMPLE_TEXT)
    ex.resize(800, 600)
    ex.show()
    sys.exit(app.exec_())
 if __name__ == "__main__":
    main()
--- a/lark/grammars/common.g
+++ b/lark/grammars/common.g
@@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER
 //
 // Strings
 //
 //STRING: /"(\\\"|\\\\|[^"\n])*?"i?/
 STRING_INNER: ("\\\""|/[^"]/)
 ESCAPED_STRING: "\"" STRING_INNER* "\""
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -122,7 +122,7 @@ RULES = {
    'statement': ['ignore', 'import'],
    'ignore': ['_IGNORE expansions _NL'],
    'import': ['_IMPORT import_args _NL',
               '_IMPORT import_args _TO TOKEN'],
               '_IMPORT import_args _TO TOKEN _NL'],
    'import_args': ['_import_args'],
    '_import_args': ['name', '_import_args _DOT name'],
@@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer):
        return p
    def expansion(self, items):
        assert items
        if len(items) == 1:
            return items[0]
        if len({i.flags for i in items}) > 1:
@@ -486,6 +487,11 @@ class Grammar:
        # Convert token-trees to strings/regexps
        transformer = PrepareLiterals() * TokenTreeToPattern()
        for name, (token_tree, priority) in token_defs:
            for t in token_tree.find_data('expansion'):
                if not t.children:
                    raise GrammarError("Tokens cannot be empty (%s)" % name)
        tokens = [TokenDef(name, transformer.transform(token_tree), priority)
                  for name, (token_tree, priority) in token_defs]
--- a/tox.ini
+++ b/tox.ini
@@ -0,0 +1,24 @@
 [tox]
 envlist = py27, py34, py35, py36, pypy, pypy3
 skip_missing_interpreters=true
 [travis]
 2.7 = py27
 3.4 = py34
 3.5 = py35
 3.6 = py36
 pypy = pypy
 pypy3 = pypy3
 [testenv]
 whitelist_externals = git
 deps =
    -rnearley-requirements.txt
 # to always force recreation and avoid unexpected side effects
 recreate=True
 commands=
    git submodule sync -q
    git submodule update --init
    python -m tests