From fbba305a9ec6e0339f50abd41f0320bfdfd9ad4f Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Fri, 24 Feb 2017 00:45:34 +0200 Subject: [PATCH] Fixed grammars --- examples/json_parser.py | 4 ++-- lark/grammars/common.g | 9 +++++++-- tests/test_parser.py | 15 +++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/examples/json_parser.py b/examples/json_parser.py index 53b3afb..5b910ef 100644 --- a/examples/json_parser.py +++ b/examples/json_parser.py @@ -24,11 +24,11 @@ json_grammar = r""" object : "{" [pair ("," pair)*] "}" pair : string ":" value - number: FLOAT + number: SIGNED_NUMBER string : ESCAPED_STRING %import common.ESCAPED_STRING - %import common.FLOAT + %import common.SIGNED_NUMBER %import common.WS %ignore WS diff --git a/lark/grammars/common.g b/lark/grammars/common.g index b10fbce..3db6ec1 100644 --- a/lark/grammars/common.g +++ b/lark/grammars/common.g @@ -3,13 +3,18 @@ // DIGIT: "0".."9" +HEXDIGIT: "a".."f"|"A".."F"|DIGIT INT: DIGIT+ -DECIMAL: INT ("." INT)? +SIGNED_INT: ["+"|"-"] INT +DECIMAL: INT "." INT? | "." INT // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/ -FLOAT: "-"? DECIMAL (("e"|"E")("+"|"-")? INT)? +_EXP: ("e"|"E") SIGNED_INT +FLOAT: INT _EXP | DECIMAL _EXP? +NUMBER: FLOAT | INT +SIGNED_NUMBER: ["+"|"-"] NUMBER // // Strings diff --git a/tests/test_parser.py b/tests/test_parser.py index 083577a..3b9a7b9 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -18,6 +18,7 @@ logging.basicConfig(level=logging.INFO) from lark.lark import Lark from lark.common import GrammarError, ParseError +from lark.lexer import LexError __path__ = os.path.dirname(__file__) def _read(n, *args): @@ -276,6 +277,20 @@ def _make_parser_test(PARSER): x = g.parse('Hello HelloWorld') self.assertSequenceEqual(x.children, ['HelloWorld']) + # def test_string_priority(self): + # g = _Lark("""start: (A | /a?bb/)+ + # A: "a" """) + # x = g.parse('abb') + # self.assertEqual(len(x.children), 2) + + # # This parse raises an exception because the lexer will always try to consume + # # "a" first and will never match the regular expression + # # This behavior is subject to change!! + # # Thie won't happen with ambiguity handling. + # g = _Lark("""start: (A | /a?ab/)+ + # A: "a" """) + # self.assertRaises(LexError, g.parse, 'aab') + def test_undefined_rule(self): self.assertRaises(GrammarError, _Lark, """start: a""")