Browse Source

Added examples/lark.g - Reference implementation of the Lark grammar

(inspired by issue #116)
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 7 years ago
parent
commit
51644a6c58
5 changed files with 76 additions and 1 deletions
  1. +1
    -0
      examples/README.md
  2. +49
    -0
      examples/lark.g
  3. +18
    -0
      examples/lark_grammar.py
  4. +1
    -0
      lark/grammars/common.g
  5. +7
    -1
      lark/load_grammar.py

+ 1
- 0
examples/README.md View File

@@ -7,6 +7,7 @@
- [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language)
- [fruitflies.py](fruitflies.py) - A demonstration of ambiguity - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity
- [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter.
- [lark\_grammar.py](lark_grammar.py) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer)


### Advanced ### Advanced




+ 49
- 0
examples/lark.g View File

@@ -0,0 +1,49 @@
start: (_item | _NL)*

_item: rule
| token
| statement

rule: RULE priority? ":" expansions _NL
token: TOKEN priority? ":" expansions _NL

priority: "." NUMBER

statement: "%ignore" expansions _NL -> ignore
| "%import" import_args ["->" TOKEN] _NL -> import

import_args: name ("." name)*

?expansions: alias (_VBAR alias)*

?alias: expansion ["->" RULE]

?expansion: expr*

?expr: atom [OP | "~" NUMBER [".." NUMBER]]

?atom: "(" expansions ")"
| "[" expansions "]" -> maybe
| STRING ".." STRING -> literal_range
| name
| (REGEXP | STRING) -> literal

name: RULE
| TOKEN

_VBAR: _NL? "|"
OP: /[+*][?]?|[?](?![a-z])/
RULE: /!?[_?]?[a-z][_a-z0-9]*/
TOKEN: /_?[A-Z][_A-Z0-9]*/
STRING: _STRING "i"?
REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/
_NL: /(\r?\n)+\s*/

%import common.ESCAPED_STRING -> _STRING
%import common.INT -> NUMBER
%import common.WS_INLINE

COMMENT: "//" /[^\n]/*

%ignore WS_INLINE
%ignore COMMENT

+ 18
- 0
examples/lark_grammar.py View File

@@ -0,0 +1,18 @@
from lark import Lark

parser = Lark(open('examples/lark.g'), parser="lalr")

grammar_files = [
'examples/python2.g',
'examples/python3.g',
'examples/lark.g',
'lark/grammars/common.g',
]

def test():
for grammar_file in grammar_files:
tree = parser.parse(open(grammar_file).read())
print("All grammars parsed successfully")

if __name__ == '__main__':
test()

+ 1
- 0
lark/grammars/common.g View File

@@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER
// //
// Strings // Strings
// //
//STRING: /"(\\\"|\\\\|[^"\n])*?"i?/
STRING_INNER: ("\\\""|/[^"]/) STRING_INNER: ("\\\""|/[^"]/)
ESCAPED_STRING: "\"" STRING_INNER* "\"" ESCAPED_STRING: "\"" STRING_INNER* "\""




+ 7
- 1
lark/load_grammar.py View File

@@ -122,7 +122,7 @@ RULES = {
'statement': ['ignore', 'import'], 'statement': ['ignore', 'import'],
'ignore': ['_IGNORE expansions _NL'], 'ignore': ['_IGNORE expansions _NL'],
'import': ['_IMPORT import_args _NL', 'import': ['_IMPORT import_args _NL',
'_IMPORT import_args _TO TOKEN'],
'_IMPORT import_args _TO TOKEN _NL'],
'import_args': ['_import_args'], 'import_args': ['_import_args'],
'_import_args': ['name', '_import_args _DOT name'], '_import_args': ['name', '_import_args _DOT name'],


@@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer):
return p return p


def expansion(self, items): def expansion(self, items):
assert items
if len(items) == 1: if len(items) == 1:
return items[0] return items[0]
if len({i.flags for i in items}) > 1: if len({i.flags for i in items}) > 1:
@@ -486,6 +487,11 @@ class Grammar:


# Convert token-trees to strings/regexps # Convert token-trees to strings/regexps
transformer = PrepareLiterals() * TokenTreeToPattern() transformer = PrepareLiterals() * TokenTreeToPattern()
for name, (token_tree, priority) in token_defs:
for t in token_tree.find_data('expansion'):
if not t.children:
raise GrammarError("Tokens cannot be empty (%s)" % name)

tokens = [TokenDef(name, transformer.transform(token_tree), priority) tokens = [TokenDef(name, transformer.transform(token_tree), priority)
for name, (token_tree, priority) in token_defs] for name, (token_tree, priority) in token_defs]




Loading…
Cancel
Save