Conflicts: tests/test_parser.pytags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
| @@ -106,7 +106,7 @@ Lark is great at handling ambiguity. Here is the result of parsing the phrase "f | |||||
| - MyPy support using type stubs | - MyPy support using type stubs | ||||
| - And much more! | - And much more! | ||||
| See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features/) | |||||
| See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features.html) | |||||
| ### Comparison to other libraries | ### Comparison to other libraries | ||||
| @@ -132,7 +132,7 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail | |||||
| |:--------|:----------|:----|:--------|:------------|:------------|:----------|:---------- | |:--------|:----------|:----|:--------|:------------|:------------|:----------|:---------- | ||||
| | **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! | Yes! | Yes! (LALR only) | | | **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! | Yes! | Yes! (LALR only) | | ||||
| | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No | No | No | | | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No | No | No | | ||||
| | [PyParsing](http://pyparsing.wikispaces.com/) | PEG | Combinators | No | No | No\* | No | No | | |||||
| | [PyParsing](https://github.com/pyparsing/pyparsing) | PEG | Combinators | No | No | No\* | No | No | | |||||
| | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* | No | No | | | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* | No | No | | ||||
| | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* | No | No | | | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* | No | No | | ||||
| | [ANTLR](https://github.com/antlr/antlr4) | LL(*) | EBNF | Yes | No | Yes? | Yes | No | | | [ANTLR](https://github.com/antlr/antlr4) | LL(*) | EBNF | Yes | No | Yes? | Yes | No | | ||||
| @@ -0,0 +1,79 @@ | |||||
| """ | |||||
| Example-Driven Error Reporting | |||||
| ============================== | |||||
| A demonstration of example-driven error reporting with the Earley parser | |||||
| (See also: error_reporting_lalr.py) | |||||
| """ | |||||
| from lark import Lark, UnexpectedInput | |||||
| from _json_parser import json_grammar # Using the grammar from the json_parser example | |||||
| json_parser = Lark(json_grammar) | |||||
| class JsonSyntaxError(SyntaxError): | |||||
| def __str__(self): | |||||
| context, line, column = self.args | |||||
| return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context) | |||||
| class JsonMissingValue(JsonSyntaxError): | |||||
| label = 'Missing Value' | |||||
| class JsonMissingOpening(JsonSyntaxError): | |||||
| label = 'Missing Opening' | |||||
| class JsonMissingClosing(JsonSyntaxError): | |||||
| label = 'Missing Closing' | |||||
| class JsonMissingComma(JsonSyntaxError): | |||||
| label = 'Missing Comma' | |||||
| class JsonTrailingComma(JsonSyntaxError): | |||||
| label = 'Trailing Comma' | |||||
| def parse(json_text): | |||||
| try: | |||||
| j = json_parser.parse(json_text) | |||||
| except UnexpectedInput as u: | |||||
| exc_class = u.match_examples(json_parser.parse, { | |||||
| JsonMissingOpening: ['{"foo": ]}', | |||||
| '{"foor": }}', | |||||
| '{"foo": }'], | |||||
| JsonMissingClosing: ['{"foo": [}', | |||||
| '{', | |||||
| '{"a": 1', | |||||
| '[1'], | |||||
| JsonMissingComma: ['[1 2]', | |||||
| '[false 1]', | |||||
| '["b" 1]', | |||||
| '{"a":true 1:4}', | |||||
| '{"a":1 1:4}', | |||||
| '{"a":"b" 1:4}'], | |||||
| JsonTrailingComma: ['[,]', | |||||
| '[1,]', | |||||
| '[1,2,]', | |||||
| '{"foo":1,}', | |||||
| '{"foo":false,"bar":true,}'] | |||||
| }, use_accepts=True) | |||||
| if not exc_class: | |||||
| raise | |||||
| raise exc_class(u.get_context(json_text), u.line, u.column) | |||||
| def test(): | |||||
| try: | |||||
| parse('{"example1": "value"') | |||||
| except JsonMissingClosing as e: | |||||
| print(e) | |||||
| try: | |||||
| parse('{"example2": ] ') | |||||
| except JsonMissingOpening as e: | |||||
| print(e) | |||||
| if __name__ == '__main__': | |||||
| test() | |||||
| @@ -3,7 +3,7 @@ Example-Driven Error Reporting | |||||
| ============================== | ============================== | ||||
| A demonstration of example-driven error reporting with the LALR parser | A demonstration of example-driven error reporting with the LALR parser | ||||
| (See also: error_reporting_earley.py) | |||||
| """ | """ | ||||
| from lark import Lark, UnexpectedInput | from lark import Lark, UnexpectedInput | ||||
| @@ -63,7 +63,7 @@ class Lark: | |||||
| *, | *, | ||||
| start: Union[None, str, List[str]] = "start", | start: Union[None, str, List[str]] = "start", | ||||
| parser: Literal["earley", "lalr", "cyk"] = "auto", | parser: Literal["earley", "lalr", "cyk"] = "auto", | ||||
| lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Lexer] = "auto", | |||||
| lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | |||||
| transformer: Optional[Transformer] = None, | transformer: Optional[Transformer] = None, | ||||
| postlex: Optional[PostLex] = None, | postlex: Optional[PostLex] = None, | ||||
| ambiguity: Literal["explicit", "resolve"] = "resolve", | ambiguity: Literal["explicit", "resolve"] = "resolve", | ||||
| @@ -85,6 +85,9 @@ class Token(str): | |||||
| end_column: int | end_column: int | ||||
| end_pos: int | end_pos: int | ||||
| def __init__(self, type_: str, value: Any, pos_in_stream: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None): | |||||
| ... | |||||
| def update(self, type_: Optional[str] = None, value: Optional[str] = None) -> Token: | def update(self, type_: Optional[str] = None, value: Optional[str] = None) -> Token: | ||||
| ... | ... | ||||
| @@ -136,7 +139,7 @@ class TraditionalLexer(Lexer): | |||||
| def lex(self, stream: str) -> Iterator[Token]: | def lex(self, stream: str) -> Iterator[Token]: | ||||
| ... | ... | ||||
| def next_token(self, lex_state: Any) -> Token: | |||||
| def next_token(self, lex_state: Any, parser_state: Any = None) -> Token: | |||||
| ... | ... | ||||
| class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
| @@ -3,7 +3,7 @@ from .tree import Tree | |||||
| from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | ||||
| from .visitors import InlineTransformer, inline_args # XXX Deprecated | from .visitors import InlineTransformer, inline_args # XXX Deprecated | ||||
| from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | ||||
| UnexpectedInput, UnexpectedCharacters, LarkError) | |||||
| UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError) | |||||
| from .lexer import Token | from .lexer import Token | ||||
| from .lark import Lark | from .lark import Lark | ||||
| @@ -19,14 +19,6 @@ class LexError(LarkError): | |||||
| pass | pass | ||||
| class UnexpectedEOF(ParseError): | |||||
| def __init__(self, expected): | |||||
| self.expected = expected | |||||
| message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||||
| super(UnexpectedEOF, self).__init__(message) | |||||
| class UnexpectedInput(LarkError): | class UnexpectedInput(LarkError): | ||||
| """UnexpectedInput Error. | """UnexpectedInput Error. | ||||
| @@ -47,6 +39,7 @@ class UnexpectedInput(LarkError): | |||||
| The parser doesn't hold a copy of the text it has to parse, | The parser doesn't hold a copy of the text it has to parse, | ||||
| so you have to provide it again | so you have to provide it again | ||||
| """ | """ | ||||
| assert self.pos_in_stream is not None, self | |||||
| pos = self.pos_in_stream | pos = self.pos_in_stream | ||||
| start = max(pos - span, 0) | start = max(pos - span, 0) | ||||
| end = pos + span | end = pos + span | ||||
| @@ -91,7 +84,7 @@ class UnexpectedInput(LarkError): | |||||
| parse_fn(malformed) | parse_fn(malformed) | ||||
| except UnexpectedInput as ut: | except UnexpectedInput as ut: | ||||
| if ut.state == self.state: | if ut.state == self.state: | ||||
| if use_accepts and ut.accepts != self.accepts: | |||||
| if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: | |||||
| logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | ||||
| (self.state, self.accepts, ut.accepts, i, j)) | (self.state, self.accepts, ut.accepts, i, j)) | ||||
| continue | continue | ||||
| @@ -108,15 +101,29 @@ class UnexpectedInput(LarkError): | |||||
| except AttributeError: | except AttributeError: | ||||
| pass | pass | ||||
| if not candidate[0]: | |||||
| if candidate[0] is None: | |||||
| logger.debug("Same State match at example [%s][%s]" % (i, j)) | logger.debug("Same State match at example [%s][%s]" % (i, j)) | ||||
| candidate = label, False | candidate = label, False | ||||
| return candidate[0] | return candidate[0] | ||||
| class UnexpectedEOF(ParseError, UnexpectedInput): | |||||
| def __init__(self, expected, state=None): | |||||
| self.expected = expected | |||||
| self.state = state | |||||
| from .lexer import Token | |||||
| self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1) | |||||
| self.pos_in_stream = -1 | |||||
| self.line = -1 | |||||
| self.column = -1 | |||||
| message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||||
| super(UnexpectedEOF, self).__init__(message) | |||||
| class UnexpectedCharacters(LexError, UnexpectedInput): | class UnexpectedCharacters(LexError, UnexpectedInput): | ||||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): | def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): | ||||
| # TODO considered_tokens and allowed can be figured out using state | |||||
| self.line = line | self.line = line | ||||
| self.column = column | self.column = column | ||||
| self.pos_in_stream = lex_pos | self.pos_in_stream = lex_pos | ||||
| @@ -147,7 +154,8 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| see: :ref:`ParserPuppet`. | see: :ref:`ParserPuppet`. | ||||
| """ | """ | ||||
| def __init__(self, token, expected, considered_rules=None, state=None, puppet=None): | |||||
| def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, token_history=None): | |||||
| # TODO considered_rules and expected can be figured out using state | |||||
| self.line = getattr(token, 'line', '?') | self.line = getattr(token, 'line', '?') | ||||
| self.column = getattr(token, 'column', '?') | self.column = getattr(token, 'column', '?') | ||||
| self.pos_in_stream = getattr(token, 'pos_in_stream', None) | self.pos_in_stream = getattr(token, 'pos_in_stream', None) | ||||
| @@ -157,6 +165,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| self.expected = expected # XXX deprecate? `accepts` is better | self.expected = expected # XXX deprecate? `accepts` is better | ||||
| self.considered_rules = considered_rules | self.considered_rules = considered_rules | ||||
| self.puppet = puppet | self.puppet = puppet | ||||
| self.token_history = token_history | |||||
| # TODO Only calculate `accepts()` when we need to display it to the user | # TODO Only calculate `accepts()` when we need to display it to the user | ||||
| # This will improve performance when doing automatic error handling | # This will improve performance when doing automatic error handling | ||||
| @@ -166,6 +175,9 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| "Expected one of: \n\t* %s\n" | "Expected one of: \n\t* %s\n" | ||||
| % (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected))) | % (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected))) | ||||
| if self.token_history: | |||||
| message += "Previous tokens: %r\n" % token_history | |||||
| super(UnexpectedToken, self).__init__(message) | super(UnexpectedToken, self).__init__(message) | ||||
| @@ -338,12 +338,12 @@ class TraditionalLexer(Lexer): | |||||
| if m: | if m: | ||||
| return m.group(0), type_from_index[m.lastindex] | return m.group(0), type_from_index[m.lastindex] | ||||
| def lex(self, state, _parser_state): | |||||
| def lex(self, state, parser_state): | |||||
| with suppress(EOFError): | with suppress(EOFError): | ||||
| while True: | while True: | ||||
| yield self.next_token(state) | |||||
| yield self.next_token(state, parser_state) | |||||
| def next_token(self, lex_state): | |||||
| def next_token(self, lex_state, parser_state=None): | |||||
| line_ctr = lex_state.line_ctr | line_ctr = lex_state.line_ctr | ||||
| while line_ctr.char_pos < len(lex_state.text): | while line_ctr.char_pos < len(lex_state.text): | ||||
| res = self.match(lex_state.text, line_ctr.char_pos) | res = self.match(lex_state.text, line_ctr.char_pos) | ||||
| @@ -352,7 +352,8 @@ class TraditionalLexer(Lexer): | |||||
| if not allowed: | if not allowed: | ||||
| allowed = {"<END-OF-FILE>"} | allowed = {"<END-OF-FILE>"} | ||||
| raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, | raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, | ||||
| allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token]) | |||||
| allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], | |||||
| state=parser_state) | |||||
| value, type_ = res | value, type_ = res | ||||
| @@ -428,14 +429,14 @@ class ContextualLexer(Lexer): | |||||
| try: | try: | ||||
| while True: | while True: | ||||
| lexer = self.lexers[parser_state.position] | lexer = self.lexers[parser_state.position] | ||||
| yield lexer.next_token(lexer_state) | |||||
| yield lexer.next_token(lexer_state, parser_state) | |||||
| except EOFError: | except EOFError: | ||||
| pass | pass | ||||
| except UnexpectedCharacters as e: | except UnexpectedCharacters as e: | ||||
| # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. | # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. | ||||
| # This tests the input against the global context, to provide a nicer error. | # This tests the input against the global context, to provide a nicer error. | ||||
| token = self.root_lexer.next_token(lexer_state) | |||||
| raise UnexpectedToken(token, e.allowed, state=parser_state.position) | |||||
| token = self.root_lexer.next_token(lexer_state, parser_state) | |||||
| raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[lexer_state.last_token]) | |||||
| class LexerThread: | class LexerThread: | ||||
| @@ -179,9 +179,6 @@ class Earley_WithLexer(WithLexer): | |||||
| tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None | tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None | ||||
| self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class) | self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class) | ||||
| def make_lexer(self, text): | |||||
| return WithLexer.make_lexer(self, text).lex(None) | |||||
| def match(self, term, token): | def match(self, term, token): | ||||
| return term.name == token.type | return term.name == token.type | ||||
| @@ -146,7 +146,7 @@ class Parser: | |||||
| column.add(new_item) | column.add(new_item) | ||||
| items.append(new_item) | items.append(new_item) | ||||
| def _parse(self, stream, columns, to_scan, start_symbol=None): | |||||
| def _parse(self, lexer, columns, to_scan, start_symbol=None): | |||||
| def is_quasi_complete(item): | def is_quasi_complete(item): | ||||
| if item.is_complete: | if item.is_complete: | ||||
| return True | return True | ||||
| @@ -245,7 +245,7 @@ class Parser: | |||||
| if not next_set and not next_to_scan: | if not next_set and not next_to_scan: | ||||
| expect = {i.expect.name for i in to_scan} | expect = {i.expect.name for i in to_scan} | ||||
| raise UnexpectedToken(token, expect, considered_rules = set(to_scan)) | |||||
| raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan)) | |||||
| return next_to_scan | return next_to_scan | ||||
| @@ -261,20 +261,24 @@ class Parser: | |||||
| # Completions will be added to the SPPF tree, and predictions will be recursively | # Completions will be added to the SPPF tree, and predictions will be recursively | ||||
| # processed down to terminals/empty nodes to be added to the scanner for the next | # processed down to terminals/empty nodes to be added to the scanner for the next | ||||
| # step. | # step. | ||||
| expects = {i.expect for i in to_scan} | |||||
| i = 0 | i = 0 | ||||
| for token in stream: | |||||
| for token in lexer.lex(expects): | |||||
| self.predict_and_complete(i, to_scan, columns, transitives) | self.predict_and_complete(i, to_scan, columns, transitives) | ||||
| to_scan = scan(i, token, to_scan) | to_scan = scan(i, token, to_scan) | ||||
| i += 1 | i += 1 | ||||
| expects.clear() | |||||
| expects |= {i.expect for i in to_scan} | |||||
| self.predict_and_complete(i, to_scan, columns, transitives) | self.predict_and_complete(i, to_scan, columns, transitives) | ||||
| ## Column is now the final column in the parse. | ## Column is now the final column in the parse. | ||||
| assert i == len(columns)-1 | assert i == len(columns)-1 | ||||
| return to_scan | return to_scan | ||||
| def parse(self, stream, start): | |||||
| def parse(self, lexer, start): | |||||
| assert start, start | assert start, start | ||||
| start_symbol = NonTerminal(start) | start_symbol = NonTerminal(start) | ||||
| @@ -291,7 +295,7 @@ class Parser: | |||||
| else: | else: | ||||
| columns[0].add(item) | columns[0].add(item) | ||||
| to_scan = self._parse(stream, columns, to_scan, start_symbol) | |||||
| to_scan = self._parse(lexer, columns, to_scan, start_symbol) | |||||
| # If the parse was successful, the start | # If the parse was successful, the start | ||||
| # symbol should have been completed in the last step of the Earley cycle, and will be in | # symbol should have been completed in the last step of the Earley cycle, and will be in | ||||
| @@ -299,7 +303,7 @@ class Parser: | |||||
| solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | ||||
| if not solutions: | if not solutions: | ||||
| expected_terminals = [t.expect for t in to_scan] | expected_terminals = [t.expect for t in to_scan] | ||||
| raise UnexpectedEOF(expected_terminals) | |||||
| raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan)) | |||||
| if self.debug: | if self.debug: | ||||
| from .earley_forest import ForestToPyDotVisitor | from .earley_forest import ForestToPyDotVisitor | ||||
| @@ -3,7 +3,7 @@ | |||||
| # Author: Erez Shinan (2017) | # Author: Erez Shinan (2017) | ||||
| # Email : erezshin@gmail.com | # Email : erezshin@gmail.com | ||||
| from copy import deepcopy, copy | from copy import deepcopy, copy | ||||
| from ..exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||||
| from ..exceptions import UnexpectedInput, UnexpectedToken | |||||
| from ..lexer import Token | from ..lexer import Token | ||||
| from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | ||||
| @@ -62,6 +62,12 @@ class ParserState: | |||||
| def position(self): | def position(self): | ||||
| return self.state_stack[-1] | return self.state_stack[-1] | ||||
| # Necessary for match_examples() to work | |||||
| def __eq__(self, other): | |||||
| if not isinstance(other, ParserState): | |||||
| return False | |||||
| return self.position == other.position | |||||
| def __copy__(self): | def __copy__(self): | ||||
| return type(self)( | return type(self)( | ||||
| self.parse_conf, | self.parse_conf, | ||||
| @@ -86,7 +92,7 @@ class ParserState: | |||||
| action, arg = states[state][token.type] | action, arg = states[state][token.type] | ||||
| except KeyError: | except KeyError: | ||||
| expected = {s for s in states[state].keys() if s.isupper()} | expected = {s for s in states[state].keys() if s.isupper()} | ||||
| raise UnexpectedToken(token, expected, state=state, puppet=None) | |||||
| raise UnexpectedToken(token, expected, state=self, puppet=None) | |||||
| assert arg != end_state | assert arg != end_state | ||||
| @@ -113,7 +113,8 @@ class Parser(BaseParser): | |||||
| del delayed_matches[i+1] # No longer needed, so unburden memory | del delayed_matches[i+1] # No longer needed, so unburden memory | ||||
| if not next_set and not delayed_matches and not next_to_scan: | if not next_set and not delayed_matches and not next_to_scan: | ||||
| raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan)) | |||||
| raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, | |||||
| set(to_scan), state=frozenset(i.s for i in to_scan)) | |||||
| return next_to_scan | return next_to_scan | ||||
| @@ -69,6 +69,14 @@ def parse_rulename(s): | |||||
| return name, args | return name, args | ||||
| class ChildrenLexer: | |||||
| def __init__(self, children): | |||||
| self.children = children | |||||
| def lex(self, parser_state): | |||||
| return self.children | |||||
| class TreeMatcher: | class TreeMatcher: | ||||
| """Match the elements of a tree node, based on an ontology | """Match the elements of a tree node, based on an ontology | ||||
| provided by a Lark grammar. | provided by a Lark grammar. | ||||
| @@ -173,6 +181,6 @@ class TreeMatcher: | |||||
| self._parser_cache[rulename] = parser | self._parser_cache[rulename] = parser | ||||
| # find a full derivation | # find a full derivation | ||||
| unreduced_tree = parser.parse(tree.children, rulename) | |||||
| unreduced_tree = parser.parse(ChildrenLexer(tree.children), rulename) | |||||
| assert unreduced_tree.data == rulename | assert unreduced_tree.data == rulename | ||||
| return unreduced_tree | return unreduced_tree | ||||
| @@ -29,8 +29,8 @@ setup( | |||||
| description = "a modern parsing library", | description = "a modern parsing library", | ||||
| license = "MIT", | license = "MIT", | ||||
| keywords = "Earley LALR parser parsing ast", | keywords = "Earley LALR parser parsing ast", | ||||
| url = "https://github.com/erezsh/lark", | |||||
| download_url = "https://github.com/erezsh/lark/tarball/master", | |||||
| url = "https://github.com/lark-parser/lark", | |||||
| download_url = "https://github.com/lark-parser/lark/tarball/master", | |||||
| long_description=''' | long_description=''' | ||||
| Lark is a modern general-purpose parsing library for Python. | Lark is a modern general-purpose parsing library for Python. | ||||
| @@ -9,6 +9,7 @@ from .test_tools import TestStandalone | |||||
| from .test_cache import TestCache | from .test_cache import TestCache | ||||
| from .test_grammar import TestGrammar | from .test_grammar import TestGrammar | ||||
| from .test_reconstructor import TestReconstructor | from .test_reconstructor import TestReconstructor | ||||
| from .test_tree_forest_transformer import TestTreeForestTransformer | |||||
| try: | try: | ||||
| from .test_nearley.test_nearley import TestNearley | from .test_nearley.test_nearley import TestNearley | ||||
| @@ -322,7 +322,7 @@ class TestParsers(unittest.TestCase): | |||||
| def test_alias(self): | def test_alias(self): | ||||
| Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """) | Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """) | ||||
| def test_backwards_custom_lexer(self): | def test_backwards_custom_lexer(self): | ||||
| class OldCustomLexer(Lexer): | class OldCustomLexer(Lexer): | ||||
| def __init__(self, lexer_conf): | def __init__(self, lexer_conf): | ||||
| @@ -330,12 +330,12 @@ class TestParsers(unittest.TestCase): | |||||
| def lex(self, text): | def lex(self, text): | ||||
| yield Token('A', 'A') | yield Token('A', 'A') | ||||
| p = Lark(""" | p = Lark(""" | ||||
| start: A | start: A | ||||
| %declare A | %declare A | ||||
| """, parser='lalr', lexer=OldCustomLexer) | """, parser='lalr', lexer=OldCustomLexer) | ||||
| r = p.parse('') | r = p.parse('') | ||||
| self.assertEqual(r, Tree('start', [Token('A', 'A')])) | self.assertEqual(r, Tree('start', [Token('A', 'A')])) | ||||
| @@ -2361,6 +2361,31 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(a.line, 1) | self.assertEqual(a.line, 1) | ||||
| self.assertEqual(b.line, 2) | self.assertEqual(b.line, 2) | ||||
| @unittest.skipIf(PARSER=='cyk', "match_examples() not supported for CYK") | |||||
| def test_match_examples(self): | |||||
| p = _Lark(r""" | |||||
| start: "a" "b" "c" | |||||
| """) | |||||
| def match_error(s): | |||||
| try: | |||||
| _ = p.parse(s) | |||||
| except UnexpectedInput as u: | |||||
| return u.match_examples(p.parse, { | |||||
| 0: ['abe'], | |||||
| 1: ['ab'], | |||||
| 2: ['cbc', 'dbc'], | |||||
| }) | |||||
| assert False | |||||
| assert match_error("abe") == 0 | |||||
| assert match_error("ab") == 1 | |||||
| assert match_error("bbc") == 2 | |||||
| assert match_error("cbc") == 2 | |||||
| self.assertEqual( match_error("dbc"), 2 ) | |||||
| self.assertEqual( match_error("ebc"), 2 ) | |||||
| @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.') | @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.') | ||||
| def test_unicode_class(self): | def test_unicode_class(self): | ||||
| "Tests that character classes from the `regex` module work correctly." | "Tests that character classes from the `regex` module work correctly." | ||||