@@ -1,3 +1,7 @@ | |||||
# | |||||
# This example shows how to write a basic calculator with variables. | |||||
# | |||||
from lark import Lark, InlineTransformer | from lark import Lark, InlineTransformer | ||||
calc_grammar = """ | calc_grammar = """ | ||||
@@ -0,0 +1,42 @@ | |||||
# | |||||
# This example demonstrates lex-less parsing using the earley_nolex frontend | |||||
# | |||||
# Using a lexer for configuration files is tricky, because values don't | |||||
# have to be surrounded by delimiters. | |||||
# In this example with skip lexing and let the Earley parser resolve the ambiguity. | |||||
# | |||||
# Future versions of lark will make it easier to write these kinds of grammars. | |||||
# | |||||
from lark import Lark, Transformer | |||||
parser = Lark(r""" | |||||
start: _nl? section+ | |||||
section: "[" name "]" _nl item+ | |||||
item: name "=" value _nl | |||||
name: /[a-zA-Z_]/ /\w/* | |||||
value: /./+ | |||||
_nl: (_CR? _LF)+ | |||||
_CR : /\r/ | |||||
_LF : /\n/ | |||||
""", parser="earley_nolex") | |||||
class RestoreTokens(Transformer): | |||||
value = ''.join | |||||
name = ''.join | |||||
def test(): | |||||
sample_conf = """ | |||||
[bla] | |||||
a=Hello | |||||
this="that",4 | |||||
""" | |||||
r = parser.parse(sample_conf) | |||||
print(RestoreTokens().transform(r).pretty()) | |||||
if __name__ == '__main__': | |||||
test() |
@@ -1,8 +1,12 @@ | |||||
"""This example demonstrates usage of the Indenter class. | |||||
Since indentation is context-sensitive, a postlex stage is introduced to manufacture INDENT/DEDENT tokens. | |||||
It is crucial for the indenter that the NL_type matches the spaces (and tabs) after the newline. | |||||
""" | |||||
# | |||||
# This example demonstrates usage of the Indenter class. | |||||
# | |||||
# Since indentation is context-sensitive, a postlex stage is introduced to | |||||
# manufacture INDENT/DEDENT tokens. | |||||
# | |||||
# It is crucial for the indenter that the NL_type matches | |||||
# the spaces (and tabs) after the newline. | |||||
# | |||||
from lark.lark import Lark | from lark.lark import Lark | ||||
from lark.indenter import Indenter | from lark.indenter import Indenter | ||||
@@ -1,3 +1,9 @@ | |||||
# | |||||
# This example shows how to write a basic JSON parser | |||||
# | |||||
# The code is short and clear, but has good performance. | |||||
# | |||||
import sys | import sys | ||||
from lark import Lark, inline_args, Transformer | from lark import Lark, inline_args, Transformer | ||||
@@ -1,9 +1,10 @@ | |||||
import re | import re | ||||
import sre_parse | |||||
from .lexer import Lexer | from .lexer import Lexer | ||||
from .parsers.lalr_analysis import GrammarAnalyzer | from .parsers.lalr_analysis import GrammarAnalyzer | ||||
from .common import is_terminal | |||||
from .common import is_terminal, GrammarError | |||||
from .parsers import lalr_parser, earley | from .parsers import lalr_parser, earley | ||||
class WithLexer: | class WithLexer: | ||||
@@ -54,7 +55,7 @@ class Earley(WithLexer): | |||||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
return res[0] | return res[0] | ||||
class Earley2: | |||||
class Earley_NoLex: | |||||
def __init__(self, lexer_conf, parser_conf): | def __init__(self, lexer_conf, parser_conf): | ||||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
@@ -68,7 +69,11 @@ class Earley2: | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
for sym in expansion: | for sym in expansion: | ||||
if is_terminal(sym): | if is_terminal(sym): | ||||
yield sym, re.compile(self.token_by_name[sym].to_regexp()) | |||||
regexp = self.token_by_name[sym].to_regexp() | |||||
width = sre_parse.parse(regexp).getwidth() | |||||
if not width == (1,1): | |||||
raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width)) | |||||
yield sym, re.compile(regexp) | |||||
else: | else: | ||||
yield sym | yield sym | ||||
@@ -77,4 +82,4 @@ class Earley2: | |||||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
return res[0] | return res[0] | ||||
ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||||
ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex } |
@@ -43,9 +43,11 @@ class State(object): | |||||
# PORT: originally tests regexp | # PORT: originally tests regexp | ||||
if self.expect_symbol[1] is not None: | if self.expect_symbol[1] is not None: | ||||
match = self.expect_symbol[1].match(stream, pos) | |||||
match = self.expect_symbol[1].match(inp) | |||||
if match: | |||||
return self.next_state(inp) | |||||
if self.expect_symbol[0] == inp.type: | |||||
elif self.expect_symbol[0] == inp.type: | |||||
return self.next_state(inp) | return self.next_state(inp) | ||||
def consume_nonterminal(self, inp): | def consume_nonterminal(self, inp): | ||||