Ver código fonte

Merge pull request #708 from MegaIng/always_accept_filter

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Shinan 4 anos atrás
committed by GitHub
pai
commit
e4001d5e11
Nenhuma chave conhecida encontrada para esta assinatura no banco de dados ID da chave GPG: 4AEE18F83AFDEB23
6 arquivos alterados com 38 adições e 5 exclusões
  1. +2
    -0
      lark-stubs/__init__.pyi
  2. +3
    -1
      lark-stubs/lark.pyi
  3. +6
    -1
      lark/lark.py
  4. +2
    -2
      lark/load_grammar.py
  5. +2
    -1
      lark/tree_matcher.py
  6. +23
    -0
      tests/test_parser.py

+ 2
- 0
lark-stubs/__init__.pyi Ver arquivo

@@ -5,5 +5,7 @@ from .visitors import *
from .exceptions import *
from .lexer import *
from .lark import *
from logging import Logger as _Logger

logger: _Logger
__version__: str = ...

+ 3
- 1
lark-stubs/lark.pyi Ver arquivo

@@ -2,7 +2,7 @@

from typing import (
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Literal, Protocol,
Literal, Protocol, Iterable,
)
from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef
@@ -14,6 +14,8 @@ class PostLex(Protocol):

def process(self, stream: Iterator[Token]) -> Iterator[Token]:
...
always_accept: Iterable[str]


class LarkOptions:


+ 6
- 1
lark/lark.py Ver arquivo

@@ -269,8 +269,13 @@ class Lark(Serialize):
# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, re_module)

if self.options.postlex is not None:
terminals_to_keep = set(self.options.postlex.always_accept)
else:
terminals_to_keep = set()

# Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)

if self.options.edit_terminals:
for t in self.terminals:


+ 2
- 2
lark/load_grammar.py Ver arquivo

@@ -526,7 +526,7 @@ class Grammar:
self.rule_defs = rule_defs
self.ignore = ignore

def compile(self, start):
def compile(self, start, terminals_to_keep):
# We change the trees in-place (to support huge grammars)
# So deepcopy allows calling compile more than once.
term_defs = deepcopy(list(self.term_defs))
@@ -641,7 +641,7 @@ class Grammar:
used_terms = {t.name for r in compiled_rules
for t in r.expansion
if isinstance(t, Terminal)}
terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore)
terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)
if unused:
logger.debug("Unused terminals: %s", [t.name for t in unused])



+ 2
- 1
lark/tree_matcher.py Ver arquivo

@@ -81,7 +81,8 @@ class TreeMatcher:
def __init__(self, parser):
# XXX TODO calling compile twice returns different results!
assert parser.options.maybe_placeholders == False
self.tokens, rules, _extra = parser.grammar.compile(parser.options.start)
# XXX TODO: we just ignore the potential existence of a postlexer
self.tokens, rules, _extra = parser.grammar.compile(parser.options.start, set())

self.rules_for_root = defaultdict(list)



+ 23
- 0
tests/test_parser.py Ver arquivo

@@ -1781,6 +1781,29 @@ def _make_parser_test(LEXER, PARSER):
%import bad_test.NUMBER
"""
self.assertRaises(IOError, _Lark, grammar)
@unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic")
def test_postlex_declare(self): # Note: this test does a lot. maybe split it up?
class TestPostLexer:
def process(self, stream):
for t in stream:
if t.type == 'A':
t.type = 'B'
yield t
else:
yield t

always_accept = ('A',)

parser = _Lark("""
start: B
A: "A"
%declare B
""", postlex=TestPostLexer())

test_file = "A"
tree = parser.parse(test_file)
self.assertEqual(tree.children, [Token('B', 'A')])

@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self):


Carregando…
Cancelar
Salvar