Browse Source

Merge branch 'master' of https://github.com/lark-parser/lark into earley_custom

 Conflicts:
	tests/test_parser.py
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
MegaIng1 3 years ago
parent
commit
26e03b9ff8
16 changed files with 179 additions and 42 deletions
  1. +2
    -2
      README.md
  2. +79
    -0
      examples/advanced/error_reporting_earley.py
  3. +1
    -1
      examples/advanced/error_reporting_lalr.py
  4. +1
    -1
      lark-stubs/lark.pyi
  5. +4
    -1
      lark-stubs/lexer.pyi
  6. +1
    -1
      lark/__init__.py
  7. +23
    -11
      lark/exceptions.py
  8. +8
    -7
      lark/lexer.py
  9. +0
    -3
      lark/parser_frontends.py
  10. +10
    -6
      lark/parsers/earley.py
  11. +8
    -2
      lark/parsers/lalr_parser.py
  12. +2
    -1
      lark/parsers/xearley.py
  13. +9
    -1
      lark/tree_matcher.py
  14. +2
    -2
      setup.py
  15. +1
    -0
      tests/__main__.py
  16. +28
    -3
      tests/test_parser.py

+ 2
- 2
README.md View File

@@ -106,7 +106,7 @@ Lark is great at handling ambiguity. Here is the result of parsing the phrase "f
- MyPy support using type stubs - MyPy support using type stubs
- And much more! - And much more!


See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features/)
See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features.html)




### Comparison to other libraries ### Comparison to other libraries
@@ -132,7 +132,7 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
|:--------|:----------|:----|:--------|:------------|:------------|:----------|:---------- |:--------|:----------|:----|:--------|:------------|:------------|:----------|:----------
| **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! | Yes! | Yes! (LALR only) | | **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! | Yes! | Yes! (LALR only) |
| [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No | No | No | | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No | No | No |
| [PyParsing](http://pyparsing.wikispaces.com/) | PEG | Combinators | No | No | No\* | No | No |
| [PyParsing](https://github.com/pyparsing/pyparsing) | PEG | Combinators | No | No | No\* | No | No |
| [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* | No | No | | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* | No | No |
| [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* | No | No | | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* | No | No |
| [ANTLR](https://github.com/antlr/antlr4) | LL(*) | EBNF | Yes | No | Yes? | Yes | No | | [ANTLR](https://github.com/antlr/antlr4) | LL(*) | EBNF | Yes | No | Yes? | Yes | No |


+ 79
- 0
examples/advanced/error_reporting_earley.py View File

@@ -0,0 +1,79 @@
"""
Example-Driven Error Reporting
==============================

A demonstration of example-driven error reporting with the Earley parser
(See also: error_reporting_lalr.py)
"""
from lark import Lark, UnexpectedInput

from _json_parser import json_grammar # Using the grammar from the json_parser example

json_parser = Lark(json_grammar)

class JsonSyntaxError(SyntaxError):
def __str__(self):
context, line, column = self.args
return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context)

class JsonMissingValue(JsonSyntaxError):
label = 'Missing Value'

class JsonMissingOpening(JsonSyntaxError):
label = 'Missing Opening'

class JsonMissingClosing(JsonSyntaxError):
label = 'Missing Closing'

class JsonMissingComma(JsonSyntaxError):
label = 'Missing Comma'

class JsonTrailingComma(JsonSyntaxError):
label = 'Trailing Comma'


def parse(json_text):
try:
j = json_parser.parse(json_text)
except UnexpectedInput as u:
exc_class = u.match_examples(json_parser.parse, {
JsonMissingOpening: ['{"foo": ]}',
'{"foor": }}',
'{"foo": }'],
JsonMissingClosing: ['{"foo": [}',
'{',
'{"a": 1',
'[1'],
JsonMissingComma: ['[1 2]',
'[false 1]',
'["b" 1]',
'{"a":true 1:4}',
'{"a":1 1:4}',
'{"a":"b" 1:4}'],
JsonTrailingComma: ['[,]',
'[1,]',
'[1,2,]',
'{"foo":1,}',
'{"foo":false,"bar":true,}']
}, use_accepts=True)
if not exc_class:
raise
raise exc_class(u.get_context(json_text), u.line, u.column)


def test():
try:
parse('{"example1": "value"')
except JsonMissingClosing as e:
print(e)

try:
parse('{"example2": ] ')
except JsonMissingOpening as e:
print(e)


if __name__ == '__main__':
test()



+ 1
- 1
examples/advanced/error_reporting_lalr.py View File

@@ -3,7 +3,7 @@ Example-Driven Error Reporting
============================== ==============================


A demonstration of example-driven error reporting with the LALR parser A demonstration of example-driven error reporting with the LALR parser
(See also: error_reporting_earley.py)
""" """
from lark import Lark, UnexpectedInput from lark import Lark, UnexpectedInput




+ 1
- 1
lark-stubs/lark.pyi View File

@@ -63,7 +63,7 @@ class Lark:
*, *,
start: Union[None, str, List[str]] = "start", start: Union[None, str, List[str]] = "start",
parser: Literal["earley", "lalr", "cyk"] = "auto", parser: Literal["earley", "lalr", "cyk"] = "auto",
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Lexer] = "auto",
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto",
transformer: Optional[Transformer] = None, transformer: Optional[Transformer] = None,
postlex: Optional[PostLex] = None, postlex: Optional[PostLex] = None,
ambiguity: Literal["explicit", "resolve"] = "resolve", ambiguity: Literal["explicit", "resolve"] = "resolve",


+ 4
- 1
lark-stubs/lexer.pyi View File

@@ -85,6 +85,9 @@ class Token(str):
end_column: int end_column: int
end_pos: int end_pos: int


def __init__(self, type_: str, value: Any, pos_in_stream: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None):
...

def update(self, type_: Optional[str] = None, value: Optional[str] = None) -> Token: def update(self, type_: Optional[str] = None, value: Optional[str] = None) -> Token:
... ...


@@ -136,7 +139,7 @@ class TraditionalLexer(Lexer):
def lex(self, stream: str) -> Iterator[Token]: def lex(self, stream: str) -> Iterator[Token]:
... ...


def next_token(self, lex_state: Any) -> Token:
def next_token(self, lex_state: Any, parser_state: Any = None) -> Token:
... ...


class ContextualLexer(Lexer): class ContextualLexer(Lexer):


+ 1
- 1
lark/__init__.py View File

@@ -3,7 +3,7 @@ from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive
from .visitors import InlineTransformer, inline_args # XXX Deprecated from .visitors import InlineTransformer, inline_args # XXX Deprecated
from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
UnexpectedInput, UnexpectedCharacters, LarkError)
UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError)
from .lexer import Token from .lexer import Token
from .lark import Lark from .lark import Lark




+ 23
- 11
lark/exceptions.py View File

@@ -19,14 +19,6 @@ class LexError(LarkError):
pass pass




class UnexpectedEOF(ParseError):
def __init__(self, expected):
self.expected = expected

message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
super(UnexpectedEOF, self).__init__(message)


class UnexpectedInput(LarkError): class UnexpectedInput(LarkError):
"""UnexpectedInput Error. """UnexpectedInput Error.


@@ -47,6 +39,7 @@ class UnexpectedInput(LarkError):
The parser doesn't hold a copy of the text it has to parse, The parser doesn't hold a copy of the text it has to parse,
so you have to provide it again so you have to provide it again
""" """
assert self.pos_in_stream is not None, self
pos = self.pos_in_stream pos = self.pos_in_stream
start = max(pos - span, 0) start = max(pos - span, 0)
end = pos + span end = pos + span
@@ -91,7 +84,7 @@ class UnexpectedInput(LarkError):
parse_fn(malformed) parse_fn(malformed)
except UnexpectedInput as ut: except UnexpectedInput as ut:
if ut.state == self.state: if ut.state == self.state:
if use_accepts and ut.accepts != self.accepts:
if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts:
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j)) (self.state, self.accepts, ut.accepts, i, j))
continue continue
@@ -108,15 +101,29 @@ class UnexpectedInput(LarkError):


except AttributeError: except AttributeError:
pass pass
if not candidate[0]:
if candidate[0] is None:
logger.debug("Same State match at example [%s][%s]" % (i, j)) logger.debug("Same State match at example [%s][%s]" % (i, j))
candidate = label, False candidate = label, False


return candidate[0] return candidate[0]


class UnexpectedEOF(ParseError, UnexpectedInput):
def __init__(self, expected, state=None):
self.expected = expected
self.state = state
from .lexer import Token
self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1)
self.pos_in_stream = -1
self.line = -1
self.column = -1

message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
super(UnexpectedEOF, self).__init__(message)



class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedCharacters(LexError, UnexpectedInput):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
# TODO considered_tokens and allowed can be figured out using state
self.line = line self.line = line
self.column = column self.column = column
self.pos_in_stream = lex_pos self.pos_in_stream = lex_pos
@@ -147,7 +154,8 @@ class UnexpectedToken(ParseError, UnexpectedInput):


see: :ref:`ParserPuppet`. see: :ref:`ParserPuppet`.
""" """
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, token_history=None):
# TODO considered_rules and expected can be figured out using state
self.line = getattr(token, 'line', '?') self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?') self.column = getattr(token, 'column', '?')
self.pos_in_stream = getattr(token, 'pos_in_stream', None) self.pos_in_stream = getattr(token, 'pos_in_stream', None)
@@ -157,6 +165,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
self.expected = expected # XXX deprecate? `accepts` is better self.expected = expected # XXX deprecate? `accepts` is better
self.considered_rules = considered_rules self.considered_rules = considered_rules
self.puppet = puppet self.puppet = puppet
self.token_history = token_history


# TODO Only calculate `accepts()` when we need to display it to the user # TODO Only calculate `accepts()` when we need to display it to the user
# This will improve performance when doing automatic error handling # This will improve performance when doing automatic error handling
@@ -166,6 +175,9 @@ class UnexpectedToken(ParseError, UnexpectedInput):
"Expected one of: \n\t* %s\n" "Expected one of: \n\t* %s\n"
% (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected))) % (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected)))


if self.token_history:
message += "Previous tokens: %r\n" % token_history

super(UnexpectedToken, self).__init__(message) super(UnexpectedToken, self).__init__(message)






+ 8
- 7
lark/lexer.py View File

@@ -338,12 +338,12 @@ class TraditionalLexer(Lexer):
if m: if m:
return m.group(0), type_from_index[m.lastindex] return m.group(0), type_from_index[m.lastindex]


def lex(self, state, _parser_state):
def lex(self, state, parser_state):
with suppress(EOFError): with suppress(EOFError):
while True: while True:
yield self.next_token(state)
yield self.next_token(state, parser_state)


def next_token(self, lex_state):
def next_token(self, lex_state, parser_state=None):
line_ctr = lex_state.line_ctr line_ctr = lex_state.line_ctr
while line_ctr.char_pos < len(lex_state.text): while line_ctr.char_pos < len(lex_state.text):
res = self.match(lex_state.text, line_ctr.char_pos) res = self.match(lex_state.text, line_ctr.char_pos)
@@ -352,7 +352,8 @@ class TraditionalLexer(Lexer):
if not allowed: if not allowed:
allowed = {"<END-OF-FILE>"} allowed = {"<END-OF-FILE>"}
raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token])
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
state=parser_state)


value, type_ = res value, type_ = res


@@ -428,14 +429,14 @@ class ContextualLexer(Lexer):
try: try:
while True: while True:
lexer = self.lexers[parser_state.position] lexer = self.lexers[parser_state.position]
yield lexer.next_token(lexer_state)
yield lexer.next_token(lexer_state, parser_state)
except EOFError: except EOFError:
pass pass
except UnexpectedCharacters as e: except UnexpectedCharacters as e:
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
# This tests the input against the global context, to provide a nicer error. # This tests the input against the global context, to provide a nicer error.
token = self.root_lexer.next_token(lexer_state)
raise UnexpectedToken(token, e.allowed, state=parser_state.position)
token = self.root_lexer.next_token(lexer_state, parser_state)
raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[lexer_state.last_token])




class LexerThread: class LexerThread:


+ 0
- 3
lark/parser_frontends.py View File

@@ -179,9 +179,6 @@ class Earley_WithLexer(WithLexer):
tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class) self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class)


def make_lexer(self, text):
return WithLexer.make_lexer(self, text).lex(None)

def match(self, term, token): def match(self, term, token):
return term.name == token.type return term.name == token.type




+ 10
- 6
lark/parsers/earley.py View File

@@ -146,7 +146,7 @@ class Parser:
column.add(new_item) column.add(new_item)
items.append(new_item) items.append(new_item)


def _parse(self, stream, columns, to_scan, start_symbol=None):
def _parse(self, lexer, columns, to_scan, start_symbol=None):
def is_quasi_complete(item): def is_quasi_complete(item):
if item.is_complete: if item.is_complete:
return True return True
@@ -245,7 +245,7 @@ class Parser:


if not next_set and not next_to_scan: if not next_set and not next_to_scan:
expect = {i.expect.name for i in to_scan} expect = {i.expect.name for i in to_scan}
raise UnexpectedToken(token, expect, considered_rules = set(to_scan))
raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))


return next_to_scan return next_to_scan


@@ -261,20 +261,24 @@ class Parser:
# Completions will be added to the SPPF tree, and predictions will be recursively # Completions will be added to the SPPF tree, and predictions will be recursively
# processed down to terminals/empty nodes to be added to the scanner for the next # processed down to terminals/empty nodes to be added to the scanner for the next
# step. # step.
expects = {i.expect for i in to_scan}
i = 0 i = 0
for token in stream:
for token in lexer.lex(expects):
self.predict_and_complete(i, to_scan, columns, transitives) self.predict_and_complete(i, to_scan, columns, transitives)


to_scan = scan(i, token, to_scan) to_scan = scan(i, token, to_scan)
i += 1 i += 1


expects.clear()
expects |= {i.expect for i in to_scan}

self.predict_and_complete(i, to_scan, columns, transitives) self.predict_and_complete(i, to_scan, columns, transitives)


## Column is now the final column in the parse. ## Column is now the final column in the parse.
assert i == len(columns)-1 assert i == len(columns)-1
return to_scan return to_scan


def parse(self, stream, start):
def parse(self, lexer, start):
assert start, start assert start, start
start_symbol = NonTerminal(start) start_symbol = NonTerminal(start)


@@ -291,7 +295,7 @@ class Parser:
else: else:
columns[0].add(item) columns[0].add(item)


to_scan = self._parse(stream, columns, to_scan, start_symbol)
to_scan = self._parse(lexer, columns, to_scan, start_symbol)


# If the parse was successful, the start # If the parse was successful, the start
# symbol should have been completed in the last step of the Earley cycle, and will be in # symbol should have been completed in the last step of the Earley cycle, and will be in
@@ -299,7 +303,7 @@ class Parser:
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
if not solutions: if not solutions:
expected_terminals = [t.expect for t in to_scan] expected_terminals = [t.expect for t in to_scan]
raise UnexpectedEOF(expected_terminals)
raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))


if self.debug: if self.debug:
from .earley_forest import ForestToPyDotVisitor from .earley_forest import ForestToPyDotVisitor


+ 8
- 2
lark/parsers/lalr_parser.py View File

@@ -3,7 +3,7 @@
# Author: Erez Shinan (2017) # Author: Erez Shinan (2017)
# Email : erezshin@gmail.com # Email : erezshin@gmail.com
from copy import deepcopy, copy from copy import deepcopy, copy
from ..exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
from ..exceptions import UnexpectedInput, UnexpectedToken
from ..lexer import Token from ..lexer import Token


from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
@@ -62,6 +62,12 @@ class ParserState:
def position(self): def position(self):
return self.state_stack[-1] return self.state_stack[-1]


# Necessary for match_examples() to work
def __eq__(self, other):
if not isinstance(other, ParserState):
return False
return self.position == other.position

def __copy__(self): def __copy__(self):
return type(self)( return type(self)(
self.parse_conf, self.parse_conf,
@@ -86,7 +92,7 @@ class ParserState:
action, arg = states[state][token.type] action, arg = states[state][token.type]
except KeyError: except KeyError:
expected = {s for s in states[state].keys() if s.isupper()} expected = {s for s in states[state].keys() if s.isupper()}
raise UnexpectedToken(token, expected, state=state, puppet=None)
raise UnexpectedToken(token, expected, state=self, puppet=None)


assert arg != end_state assert arg != end_state




+ 2
- 1
lark/parsers/xearley.py View File

@@ -113,7 +113,8 @@ class Parser(BaseParser):
del delayed_matches[i+1] # No longer needed, so unburden memory del delayed_matches[i+1] # No longer needed, so unburden memory


if not next_set and not delayed_matches and not next_to_scan: if not next_set and not delayed_matches and not next_to_scan:
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan))
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
set(to_scan), state=frozenset(i.s for i in to_scan))


return next_to_scan return next_to_scan




+ 9
- 1
lark/tree_matcher.py View File

@@ -69,6 +69,14 @@ def parse_rulename(s):
return name, args return name, args





class ChildrenLexer:
def __init__(self, children):
self.children = children

def lex(self, parser_state):
return self.children

class TreeMatcher: class TreeMatcher:
"""Match the elements of a tree node, based on an ontology """Match the elements of a tree node, based on an ontology
provided by a Lark grammar. provided by a Lark grammar.
@@ -173,6 +181,6 @@ class TreeMatcher:
self._parser_cache[rulename] = parser self._parser_cache[rulename] = parser


# find a full derivation # find a full derivation
unreduced_tree = parser.parse(tree.children, rulename)
unreduced_tree = parser.parse(ChildrenLexer(tree.children), rulename)
assert unreduced_tree.data == rulename assert unreduced_tree.data == rulename
return unreduced_tree return unreduced_tree

+ 2
- 2
setup.py View File

@@ -29,8 +29,8 @@ setup(
description = "a modern parsing library", description = "a modern parsing library",
license = "MIT", license = "MIT",
keywords = "Earley LALR parser parsing ast", keywords = "Earley LALR parser parsing ast",
url = "https://github.com/erezsh/lark",
download_url = "https://github.com/erezsh/lark/tarball/master",
url = "https://github.com/lark-parser/lark",
download_url = "https://github.com/lark-parser/lark/tarball/master",
long_description=''' long_description='''
Lark is a modern general-purpose parsing library for Python. Lark is a modern general-purpose parsing library for Python.




+ 1
- 0
tests/__main__.py View File

@@ -9,6 +9,7 @@ from .test_tools import TestStandalone
from .test_cache import TestCache from .test_cache import TestCache
from .test_grammar import TestGrammar from .test_grammar import TestGrammar
from .test_reconstructor import TestReconstructor from .test_reconstructor import TestReconstructor
from .test_tree_forest_transformer import TestTreeForestTransformer


try: try:
from .test_nearley.test_nearley import TestNearley from .test_nearley.test_nearley import TestNearley


+ 28
- 3
tests/test_parser.py View File

@@ -322,7 +322,7 @@ class TestParsers(unittest.TestCase):


def test_alias(self): def test_alias(self):
Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """) Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
def test_backwards_custom_lexer(self): def test_backwards_custom_lexer(self):
class OldCustomLexer(Lexer): class OldCustomLexer(Lexer):
def __init__(self, lexer_conf): def __init__(self, lexer_conf):
@@ -330,12 +330,12 @@ class TestParsers(unittest.TestCase):


def lex(self, text): def lex(self, text):
yield Token('A', 'A') yield Token('A', 'A')
p = Lark(""" p = Lark("""
start: A start: A
%declare A %declare A
""", parser='lalr', lexer=OldCustomLexer) """, parser='lalr', lexer=OldCustomLexer)
r = p.parse('') r = p.parse('')
self.assertEqual(r, Tree('start', [Token('A', 'A')])) self.assertEqual(r, Tree('start', [Token('A', 'A')]))


@@ -2361,6 +2361,31 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(a.line, 1) self.assertEqual(a.line, 1)
self.assertEqual(b.line, 2) self.assertEqual(b.line, 2)


@unittest.skipIf(PARSER=='cyk', "match_examples() not supported for CYK")
def test_match_examples(self):
p = _Lark(r"""
start: "a" "b" "c"
""")

def match_error(s):
try:
_ = p.parse(s)
except UnexpectedInput as u:
return u.match_examples(p.parse, {
0: ['abe'],
1: ['ab'],
2: ['cbc', 'dbc'],
})
assert False

assert match_error("abe") == 0
assert match_error("ab") == 1
assert match_error("bbc") == 2
assert match_error("cbc") == 2
self.assertEqual( match_error("dbc"), 2 )
self.assertEqual( match_error("ebc"), 2 )


@unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.') @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_class(self): def test_unicode_class(self):
"Tests that character classes from the `regex` module work correctly." "Tests that character classes from the `regex` module work correctly."


Loading…
Cancel
Save