Selaa lähdekoodia

Added the earley_nolex frontend, and a conf_nolex example to use it

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 vuotta sitten
vanhempi
commit
da15f99edb
6 muutettua tiedostoa jossa 74 lisäystä ja 11 poistoa
  1. +4
    -0
      examples/calc.py
  2. +42
    -0
      examples/conf_nolex.py
  3. +9
    -5
      examples/indented_tree.py
  4. +6
    -0
      examples/json_parser.py
  5. +9
    -4
      lark/parser_frontends.py
  6. +4
    -2
      lark/parsers/earley.py

+ 4
- 0
examples/calc.py Näytä tiedosto

@@ -1,3 +1,7 @@
#
# This example shows how to write a basic calculator with variables.
#

from lark import Lark, InlineTransformer

calc_grammar = """


+ 42
- 0
examples/conf_nolex.py Näytä tiedosto

@@ -0,0 +1,42 @@
#
# This example demonstrates lex-less parsing using the earley_nolex frontend
#
# Using a lexer for configuration files is tricky, because values don't
# have to be surrounded by delimiters.
# In this example with skip lexing and let the Earley parser resolve the ambiguity.
#
# Future versions of lark will make it easier to write these kinds of grammars.
#

from lark import Lark, Transformer

parser = Lark(r"""
start: _nl? section+
section: "[" name "]" _nl item+
item: name "=" value _nl
name: /[a-zA-Z_]/ /\w/*
value: /./+
_nl: (_CR? _LF)+

_CR : /\r/
_LF : /\n/
""", parser="earley_nolex")

class RestoreTokens(Transformer):
value = ''.join
name = ''.join


def test():
sample_conf = """
[bla]

a=Hello
this="that",4
"""

r = parser.parse(sample_conf)
print(RestoreTokens().transform(r).pretty())

if __name__ == '__main__':
test()

+ 9
- 5
examples/indented_tree.py Näytä tiedosto

@@ -1,8 +1,12 @@
"""This example demonstrates usage of the Indenter class.

Since indentation is context-sensitive, a postlex stage is introduced to manufacture INDENT/DEDENT tokens.
It is crucial for the indenter that the NL_type matches the spaces (and tabs) after the newline.
"""
#
# This example demonstrates usage of the Indenter class.
#
# Since indentation is context-sensitive, a postlex stage is introduced to
# manufacture INDENT/DEDENT tokens.
#
# It is crucial for the indenter that the NL_type matches
# the spaces (and tabs) after the newline.
#

from lark.lark import Lark
from lark.indenter import Indenter


+ 6
- 0
examples/json_parser.py Näytä tiedosto

@@ -1,3 +1,9 @@
#
# This example shows how to write a basic JSON parser
#
# The code is short and clear, but has good performance.
#

import sys

from lark import Lark, inline_args, Transformer


+ 9
- 4
lark/parser_frontends.py Näytä tiedosto

@@ -1,9 +1,10 @@
import re
import sre_parse

from .lexer import Lexer
from .parsers.lalr_analysis import GrammarAnalyzer

from .common import is_terminal
from .common import is_terminal, GrammarError
from .parsers import lalr_parser, earley

class WithLexer:
@@ -54,7 +55,7 @@ class Earley(WithLexer):
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]

class Earley2:
class Earley_NoLex:
def __init__(self, lexer_conf, parser_conf):
self.token_by_name = {t.name:t for t in lexer_conf.tokens}

@@ -68,7 +69,11 @@ class Earley2:
def _prepare_expansion(self, expansion):
for sym in expansion:
if is_terminal(sym):
yield sym, re.compile(self.token_by_name[sym].to_regexp())
regexp = self.token_by_name[sym].to_regexp()
width = sre_parse.parse(regexp).getwidth()
if not width == (1,1):
raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width))
yield sym, re.compile(regexp)
else:
yield sym

@@ -77,4 +82,4 @@ class Earley2:
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]

ENGINE_DICT = { 'lalr': LALR, 'earley': Earley }
ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex }

+ 4
- 2
lark/parsers/earley.py Näytä tiedosto

@@ -43,9 +43,11 @@ class State(object):
# PORT: originally tests regexp

if self.expect_symbol[1] is not None:
match = self.expect_symbol[1].match(stream, pos)
match = self.expect_symbol[1].match(inp)
if match:
return self.next_state(inp)

if self.expect_symbol[0] == inp.type:
elif self.expect_symbol[0] == inp.type:
return self.next_state(inp)

def consume_nonterminal(self, inp):


Ladataan…
Peruuta
Tallenna