From 09a80ed2228f0d12e9a87b152a6211132796d94e Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 12 Feb 2017 00:35:38 +0200 Subject: [PATCH] Fixed bug in lexer where unidentical tokens got the same name --- README.md | 1 + lark/indenter.py | 2 +- lark/load_grammar.py | 4 ++-- lark/parse_tree_builder.py | 2 +- tests/test_parser.py | 11 +++++++++++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d8ce7a7..8cd812c 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ Lark has no dependencies. - Automatic token collision resolution (unless both tokens are regexps) - Python 2 & 3 compatible - Unicode fully supported + - Extensive test suite ## Coming soon diff --git a/lark/indenter.py b/lark/indenter.py index 594c45f..d6d27ed 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -18,7 +18,7 @@ class Indenter: if indent > self.indent_level[-1]: self.indent_level.append(indent) - yield Token(self.INDENT_type, indent_str) + yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) else: while indent < self.indent_level[-1]: self.indent_level.pop() diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 9e2f0c4..0c4e878 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -252,8 +252,8 @@ class ExtractAnonTokens(InlineTransformer): try: token_name = _TOKEN_NAMES[value] except KeyError: - if value.isalnum() and value[0].isalpha(): - token_name = value.upper() + if value.isalnum() and value[0].isalpha() and ('__'+value.upper()) not in self.token_set: + token_name = value.upper() # This can create name duplications for unidentical tokens else: token_name = 'ANONSTR_%d' % self.i self.i += 1 diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 5561544..c1302a3 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -48,7 +48,7 @@ class ParseTreeBuilder: new_rules = [] for origin, expansions in rules.items(): expand1 = origin.startswith('?') - _origin = origin.lstrip('?*') + _origin = origin.lstrip('?') for expansion, alias in expansions: if alias and origin.startswith('_'): diff --git a/tests/test_parser.py b/tests/test_parser.py index 22d28d2..014d220 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -340,6 +340,17 @@ def _make_parser_test(PARSER): g = _Lark("""start: %s %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) + def test_float_without_lexer(self): + g = _Lark("""start: ["+"|"-"] float + float: digit* "." digit+ exp? + | digit+ exp + exp: ("e"|"E") ["+"|"-"] digit+ + digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9" + """) + g.parse("1.2") + g.parse("-.2e9") + g.parse("+2e-9") + self.assertRaises(ParseError, g.parse, "+2e-9e") _NAME = "Test" + PARSER.capitalize() _TestParser.__name__ = _NAME