@@ -6,6 +6,6 @@ python: | |||||
- "3.6" | - "3.6" | ||||
- "pypy" # PyPy2 5.8.0 | - "pypy" # PyPy2 5.8.0 | ||||
- "pypy3" # Pypy3 5.8.0-beta0 | - "pypy3" # Pypy3 5.8.0-beta0 | ||||
install: pip install tox-travis | |||||
script: | script: | ||||
- pip install -r nearley-requirements.txt | |||||
- python -m tests | |||||
- tox |
@@ -31,7 +31,7 @@ Most importantly, Lark will save you time and prevent you from getting parsing h | |||||
Lark has no dependencies. | Lark has no dependencies. | ||||
[](https://travis-ci.org/erezsh/lark) | |||||
[](https://travis-ci.org/lark-parser/lark) | |||||
### Hello World | ### Hello World | ||||
@@ -7,6 +7,7 @@ | |||||
- [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | ||||
- [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | ||||
- [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | ||||
- [lark\_grammar.py](lark_grammar.py) + [lark.g](lark.g) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) | |||||
### Advanced | ### Advanced | ||||
@@ -0,0 +1,49 @@ | |||||
start: (_item | _NL)* | |||||
_item: rule | |||||
| token | |||||
| statement | |||||
rule: RULE priority? ":" expansions _NL | |||||
token: TOKEN priority? ":" expansions _NL | |||||
priority: "." NUMBER | |||||
statement: "%ignore" expansions _NL -> ignore | |||||
| "%import" import_args ["->" TOKEN] _NL -> import | |||||
import_args: name ("." name)* | |||||
?expansions: alias (_VBAR alias)* | |||||
?alias: expansion ["->" RULE] | |||||
?expansion: expr* | |||||
?expr: atom [OP | "~" NUMBER [".." NUMBER]] | |||||
?atom: "(" expansions ")" | |||||
| "[" expansions "]" -> maybe | |||||
| STRING ".." STRING -> literal_range | |||||
| name | |||||
| (REGEXP | STRING) -> literal | |||||
name: RULE | |||||
| TOKEN | |||||
_VBAR: _NL? "|" | |||||
OP: /[+*][?]?|[?](?![a-z])/ | |||||
RULE: /!?[_?]?[a-z][_a-z0-9]*/ | |||||
TOKEN: /_?[A-Z][_A-Z0-9]*/ | |||||
STRING: _STRING "i"? | |||||
REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ | |||||
_NL: /(\r?\n)+\s*/ | |||||
%import common.ESCAPED_STRING -> _STRING | |||||
%import common.INT -> NUMBER | |||||
%import common.WS_INLINE | |||||
COMMENT: "//" /[^\n]/* | |||||
%ignore WS_INLINE | |||||
%ignore COMMENT |
@@ -0,0 +1,18 @@ | |||||
from lark import Lark | |||||
parser = Lark(open('examples/lark.g'), parser="lalr") | |||||
grammar_files = [ | |||||
'examples/python2.g', | |||||
'examples/python3.g', | |||||
'examples/lark.g', | |||||
'lark/grammars/common.g', | |||||
] | |||||
def test(): | |||||
for grammar_file in grammar_files: | |||||
tree = parser.parse(open(grammar_file).read()) | |||||
print("All grammars parsed successfully") | |||||
if __name__ == '__main__': | |||||
test() |
@@ -0,0 +1,201 @@ | |||||
# | |||||
# This example shows how to write a syntax-highlighted editor with Qt and Lark | |||||
# | |||||
# Requirements: | |||||
# | |||||
# PyQt5==5.10.1 | |||||
# QScintilla==2.10.4 | |||||
import sys | |||||
import textwrap | |||||
from PyQt5.Qt import * # noqa | |||||
from PyQt5.Qsci import QsciScintilla | |||||
from PyQt5.Qsci import QsciLexerCustom | |||||
from lark import Lark | |||||
class LexerJson(QsciLexerCustom): | |||||
def __init__(self, parent=None): | |||||
super().__init__(parent) | |||||
self.create_parser() | |||||
self.create_styles() | |||||
def create_styles(self): | |||||
deeppink = QColor(249, 38, 114) | |||||
khaki = QColor(230, 219, 116) | |||||
mediumpurple = QColor(174, 129, 255) | |||||
mediumturquoise = QColor(81, 217, 205) | |||||
yellowgreen = QColor(166, 226, 46) | |||||
lightcyan = QColor(213, 248, 232) | |||||
darkslategrey = QColor(39, 40, 34) | |||||
styles = { | |||||
0: mediumturquoise, | |||||
1: mediumpurple, | |||||
2: yellowgreen, | |||||
3: deeppink, | |||||
4: khaki, | |||||
5: lightcyan | |||||
} | |||||
for style, color in styles.items(): | |||||
self.setColor(color, style) | |||||
self.setPaper(darkslategrey, style) | |||||
self.setFont(self.parent().font(), style) | |||||
self.token_styles = { | |||||
"__COLON": 5, | |||||
"__COMMA": 5, | |||||
"__LBRACE": 5, | |||||
"__LSQB": 5, | |||||
"__RBRACE": 5, | |||||
"__RSQB": 5, | |||||
"FALSE": 0, | |||||
"NULL": 0, | |||||
"TRUE": 0, | |||||
"STRING": 4, | |||||
"NUMBER": 1, | |||||
} | |||||
def create_parser(self): | |||||
grammar = ''' | |||||
anons: ":" "{" "}" "," "[" "]" | |||||
TRUE: "true" | |||||
FALSE: "false" | |||||
NULL: "NULL" | |||||
%import common.ESCAPED_STRING -> STRING | |||||
%import common.SIGNED_NUMBER -> NUMBER | |||||
%import common.WS | |||||
%ignore WS | |||||
''' | |||||
self.lark = Lark(grammar, parser=None, lexer='standard') | |||||
# All tokens: print([t.name for t in self.lark.parser.lexer.tokens]) | |||||
def defaultPaper(self, style): | |||||
return QColor(39, 40, 34) | |||||
def language(self): | |||||
return "Json" | |||||
def description(self, style): | |||||
return {v: k for k, v in self.token_styles.items()}.get(style, "") | |||||
def styleText(self, start, end): | |||||
self.startStyling(start) | |||||
text = self.parent().text()[start:end] | |||||
last_pos = 0 | |||||
try: | |||||
for token in self.lark.lex(text): | |||||
ws_len = token.pos_in_stream - last_pos | |||||
if ws_len: | |||||
self.setStyling(ws_len, 0) # whitespace | |||||
token_len = len(bytearray(token, "utf-8")) | |||||
self.setStyling( | |||||
token_len, self.token_styles.get(token.type, 0)) | |||||
last_pos = token.pos_in_stream + token_len | |||||
except Exception as e: | |||||
print(e) | |||||
class EditorAll(QsciScintilla): | |||||
def __init__(self, parent=None): | |||||
super().__init__(parent) | |||||
# Set font defaults | |||||
font = QFont() | |||||
font.setFamily('Consolas') | |||||
font.setFixedPitch(True) | |||||
font.setPointSize(8) | |||||
font.setBold(True) | |||||
self.setFont(font) | |||||
# Set margin defaults | |||||
fontmetrics = QFontMetrics(font) | |||||
self.setMarginsFont(font) | |||||
self.setMarginWidth(0, fontmetrics.width("000") + 6) | |||||
self.setMarginLineNumbers(0, True) | |||||
self.setMarginsForegroundColor(QColor(128, 128, 128)) | |||||
self.setMarginsBackgroundColor(QColor(39, 40, 34)) | |||||
self.setMarginType(1, self.SymbolMargin) | |||||
self.setMarginWidth(1, 12) | |||||
# Set indentation defaults | |||||
self.setIndentationsUseTabs(False) | |||||
self.setIndentationWidth(4) | |||||
self.setBackspaceUnindents(True) | |||||
self.setIndentationGuides(True) | |||||
# self.setFolding(QsciScintilla.CircledFoldStyle) | |||||
# Set caret defaults | |||||
self.setCaretForegroundColor(QColor(247, 247, 241)) | |||||
self.setCaretWidth(2) | |||||
# Set selection color defaults | |||||
self.setSelectionBackgroundColor(QColor(61, 61, 52)) | |||||
self.resetSelectionForegroundColor() | |||||
# Set multiselection defaults | |||||
self.SendScintilla(QsciScintilla.SCI_SETMULTIPLESELECTION, True) | |||||
self.SendScintilla(QsciScintilla.SCI_SETMULTIPASTE, 1) | |||||
self.SendScintilla( | |||||
QsciScintilla.SCI_SETADDITIONALSELECTIONTYPING, True) | |||||
lexer = LexerJson(self) | |||||
self.setLexer(lexer) | |||||
EXAMPLE_TEXT = textwrap.dedent("""\ | |||||
{ | |||||
"_id": "5b05ffcbcf8e597939b3f5ca", | |||||
"about": "Excepteur consequat commodo esse voluptate aute aliquip ad sint deserunt commodo eiusmod irure. Sint aliquip sit magna duis eu est culpa aliqua excepteur ut tempor nulla. Aliqua ex pariatur id labore sit. Quis sit ex aliqua veniam exercitation laboris anim adipisicing. Lorem nisi reprehenderit ullamco labore qui sit ut aliqua tempor consequat pariatur proident.", | |||||
"address": "665 Malbone Street, Thornport, Louisiana, 243", | |||||
"age": 23, | |||||
"balance": "$3,216.91", | |||||
"company": "BULLJUICE", | |||||
"email": "elisekelley@bulljuice.com", | |||||
"eyeColor": "brown", | |||||
"gender": "female", | |||||
"guid": "d3a6d865-0f64-4042-8a78-4f53de9b0707", | |||||
"index": 0, | |||||
"isActive": false, | |||||
"isActive2": true, | |||||
"latitude": -18.660714, | |||||
"longitude": -85.378048, | |||||
"name": "Elise Kelley", | |||||
"phone": "+1 (808) 543-3966", | |||||
"picture": "http://placehold.it/32x32", | |||||
"registered": "2017-09-30T03:47:40 -02:00", | |||||
"tags": [ | |||||
"et", | |||||
"nostrud", | |||||
"in", | |||||
"fugiat", | |||||
"incididunt", | |||||
"labore", | |||||
"nostrud" | |||||
] | |||||
}\ | |||||
""") | |||||
def main(): | |||||
app = QApplication(sys.argv) | |||||
ex = EditorAll() | |||||
ex.setWindowTitle(__file__) | |||||
ex.setText(EXAMPLE_TEXT) | |||||
ex.resize(800, 600) | |||||
ex.show() | |||||
sys.exit(app.exec_()) | |||||
if __name__ == "__main__": | |||||
main() |
@@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER | |||||
// | // | ||||
// Strings | // Strings | ||||
// | // | ||||
//STRING: /"(\\\"|\\\\|[^"\n])*?"i?/ | |||||
STRING_INNER: ("\\\""|/[^"]/) | STRING_INNER: ("\\\""|/[^"]/) | ||||
ESCAPED_STRING: "\"" STRING_INNER* "\"" | ESCAPED_STRING: "\"" STRING_INNER* "\"" | ||||
@@ -122,7 +122,7 @@ RULES = { | |||||
'statement': ['ignore', 'import'], | 'statement': ['ignore', 'import'], | ||||
'ignore': ['_IGNORE expansions _NL'], | 'ignore': ['_IGNORE expansions _NL'], | ||||
'import': ['_IMPORT import_args _NL', | 'import': ['_IMPORT import_args _NL', | ||||
'_IMPORT import_args _TO TOKEN'], | |||||
'_IMPORT import_args _TO TOKEN _NL'], | |||||
'import_args': ['_import_args'], | 'import_args': ['_import_args'], | ||||
'_import_args': ['name', '_import_args _DOT name'], | '_import_args': ['name', '_import_args _DOT name'], | ||||
@@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer): | |||||
return p | return p | ||||
def expansion(self, items): | def expansion(self, items): | ||||
assert items | |||||
if len(items) == 1: | if len(items) == 1: | ||||
return items[0] | return items[0] | ||||
if len({i.flags for i in items}) > 1: | if len({i.flags for i in items}) > 1: | ||||
@@ -486,6 +487,11 @@ class Grammar: | |||||
# Convert token-trees to strings/regexps | # Convert token-trees to strings/regexps | ||||
transformer = PrepareLiterals() * TokenTreeToPattern() | transformer = PrepareLiterals() * TokenTreeToPattern() | ||||
for name, (token_tree, priority) in token_defs: | |||||
for t in token_tree.find_data('expansion'): | |||||
if not t.children: | |||||
raise GrammarError("Tokens cannot be empty (%s)" % name) | |||||
tokens = [TokenDef(name, transformer.transform(token_tree), priority) | tokens = [TokenDef(name, transformer.transform(token_tree), priority) | ||||
for name, (token_tree, priority) in token_defs] | for name, (token_tree, priority) in token_defs] | ||||
@@ -0,0 +1,24 @@ | |||||
[tox] | |||||
envlist = py27, py34, py35, py36, pypy, pypy3 | |||||
skip_missing_interpreters=true | |||||
[travis] | |||||
2.7 = py27 | |||||
3.4 = py34 | |||||
3.5 = py35 | |||||
3.6 = py36 | |||||
pypy = pypy | |||||
pypy3 = pypy3 | |||||
[testenv] | |||||
whitelist_externals = git | |||||
deps = | |||||
-rnearley-requirements.txt | |||||
# to always force recreation and avoid unexpected side effects | |||||
recreate=True | |||||
commands= | |||||
git submodule sync -q | |||||
git submodule update --init | |||||
python -m tests |