| @@ -5,5 +5,7 @@ from .visitors import * | |||||
| from .exceptions import * | from .exceptions import * | ||||
| from .lexer import * | from .lexer import * | ||||
| from .lark import * | from .lark import * | ||||
| from logging import Logger as _Logger | |||||
| logger: _Logger | |||||
| __version__: str = ... | __version__: str = ... | ||||
| @@ -2,7 +2,7 @@ | |||||
| from typing import ( | from typing import ( | ||||
| TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | ||||
| Literal, Protocol, | |||||
| Literal, Protocol, Iterable, | |||||
| ) | ) | ||||
| from .visitors import Transformer | from .visitors import Transformer | ||||
| from .lexer import Token, Lexer, TerminalDef | from .lexer import Token, Lexer, TerminalDef | ||||
| @@ -14,6 +14,8 @@ class PostLex(Protocol): | |||||
| def process(self, stream: Iterator[Token]) -> Iterator[Token]: | def process(self, stream: Iterator[Token]) -> Iterator[Token]: | ||||
| ... | ... | ||||
| always_accept: Iterable[str] | |||||
| class LarkOptions: | class LarkOptions: | ||||
| @@ -269,8 +269,13 @@ class Lark(Serialize): | |||||
| # Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
| self.grammar = load_grammar(grammar, self.source, re_module) | self.grammar = load_grammar(grammar, self.source, re_module) | ||||
| if self.options.postlex is not None: | |||||
| terminals_to_keep = set(self.options.postlex.always_accept) | |||||
| else: | |||||
| terminals_to_keep = set() | |||||
| # Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
| self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | |||||
| self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep) | |||||
| if self.options.edit_terminals: | if self.options.edit_terminals: | ||||
| for t in self.terminals: | for t in self.terminals: | ||||
| @@ -526,7 +526,7 @@ class Grammar: | |||||
| self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
| self.ignore = ignore | self.ignore = ignore | ||||
| def compile(self, start): | |||||
| def compile(self, start, terminals_to_keep): | |||||
| # We change the trees in-place (to support huge grammars) | # We change the trees in-place (to support huge grammars) | ||||
| # So deepcopy allows calling compile more than once. | # So deepcopy allows calling compile more than once. | ||||
| term_defs = deepcopy(list(self.term_defs)) | term_defs = deepcopy(list(self.term_defs)) | ||||
| @@ -641,7 +641,7 @@ class Grammar: | |||||
| used_terms = {t.name for r in compiled_rules | used_terms = {t.name for r in compiled_rules | ||||
| for t in r.expansion | for t in r.expansion | ||||
| if isinstance(t, Terminal)} | if isinstance(t, Terminal)} | ||||
| terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore) | |||||
| terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep) | |||||
| if unused: | if unused: | ||||
| logger.debug("Unused terminals: %s", [t.name for t in unused]) | logger.debug("Unused terminals: %s", [t.name for t in unused]) | ||||
| @@ -81,7 +81,8 @@ class TreeMatcher: | |||||
| def __init__(self, parser): | def __init__(self, parser): | ||||
| # XXX TODO calling compile twice returns different results! | # XXX TODO calling compile twice returns different results! | ||||
| assert parser.options.maybe_placeholders == False | assert parser.options.maybe_placeholders == False | ||||
| self.tokens, rules, _extra = parser.grammar.compile(parser.options.start) | |||||
| # XXX TODO: we just ignore the potential existence of a postlexer | |||||
| self.tokens, rules, _extra = parser.grammar.compile(parser.options.start, set()) | |||||
| self.rules_for_root = defaultdict(list) | self.rules_for_root = defaultdict(list) | ||||
| @@ -1781,6 +1781,29 @@ def _make_parser_test(LEXER, PARSER): | |||||
| %import bad_test.NUMBER | %import bad_test.NUMBER | ||||
| """ | """ | ||||
| self.assertRaises(IOError, _Lark, grammar) | self.assertRaises(IOError, _Lark, grammar) | ||||
| @unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic") | |||||
| def test_postlex_declare(self): # Note: this test does a lot. maybe split it up? | |||||
| class TestPostLexer: | |||||
| def process(self, stream): | |||||
| for t in stream: | |||||
| if t.type == 'A': | |||||
| t.type = 'B' | |||||
| yield t | |||||
| else: | |||||
| yield t | |||||
| always_accept = ('A',) | |||||
| parser = _Lark(""" | |||||
| start: B | |||||
| A: "A" | |||||
| %declare B | |||||
| """, postlex=TestPostLexer()) | |||||
| test_file = "A" | |||||
| tree = parser.parse(test_file) | |||||
| self.assertEqual(tree.children, [Token('B', 'A')]) | |||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | ||||
| def test_earley_prioritization(self): | def test_earley_prioritization(self): | ||||