Browse Source

Fixed grammars

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
fbba305a9e
3 changed files with 24 additions and 4 deletions
  1. +2
    -2
      examples/json_parser.py
  2. +7
    -2
      lark/grammars/common.g
  3. +15
    -0
      tests/test_parser.py

+ 2
- 2
examples/json_parser.py View File

@@ -24,11 +24,11 @@ json_grammar = r"""
object : "{" [pair ("," pair)*] "}" object : "{" [pair ("," pair)*] "}"
pair : string ":" value pair : string ":" value


number: FLOAT
number: SIGNED_NUMBER
string : ESCAPED_STRING string : ESCAPED_STRING


%import common.ESCAPED_STRING %import common.ESCAPED_STRING
%import common.FLOAT
%import common.SIGNED_NUMBER
%import common.WS %import common.WS


%ignore WS %ignore WS


+ 7
- 2
lark/grammars/common.g View File

@@ -3,13 +3,18 @@
// //


DIGIT: "0".."9" DIGIT: "0".."9"
HEXDIGIT: "a".."f"|"A".."F"|DIGIT


INT: DIGIT+ INT: DIGIT+
DECIMAL: INT ("." INT)?
SIGNED_INT: ["+"|"-"] INT
DECIMAL: INT "." INT? | "." INT


// float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/ // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/
FLOAT: "-"? DECIMAL (("e"|"E")("+"|"-")? INT)?
_EXP: ("e"|"E") SIGNED_INT
FLOAT: INT _EXP | DECIMAL _EXP?


NUMBER: FLOAT | INT
SIGNED_NUMBER: ["+"|"-"] NUMBER


// //
// Strings // Strings


+ 15
- 0
tests/test_parser.py View File

@@ -18,6 +18,7 @@ logging.basicConfig(level=logging.INFO)


from lark.lark import Lark from lark.lark import Lark
from lark.common import GrammarError, ParseError from lark.common import GrammarError, ParseError
from lark.lexer import LexError


__path__ = os.path.dirname(__file__) __path__ = os.path.dirname(__file__)
def _read(n, *args): def _read(n, *args):
@@ -276,6 +277,20 @@ def _make_parser_test(PARSER):
x = g.parse('Hello HelloWorld') x = g.parse('Hello HelloWorld')
self.assertSequenceEqual(x.children, ['HelloWorld']) self.assertSequenceEqual(x.children, ['HelloWorld'])


# def test_string_priority(self):
# g = _Lark("""start: (A | /a?bb/)+
# A: "a" """)
# x = g.parse('abb')
# self.assertEqual(len(x.children), 2)

# # This parse raises an exception because the lexer will always try to consume
# # "a" first and will never match the regular expression
# # This behavior is subject to change!!
# # Thie won't happen with ambiguity handling.
# g = _Lark("""start: (A | /a?ab/)+
# A: "a" """)
# self.assertRaises(LexError, g.parse, 'aab')

def test_undefined_rule(self): def test_undefined_rule(self):
self.assertRaises(GrammarError, _Lark, """start: a""") self.assertRaises(GrammarError, _Lark, """start: a""")




Loading…
Cancel
Save