Browse Source

Fixed bug in lexer where unidentical tokens got the same name

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
09a80ed222
5 changed files with 16 additions and 4 deletions
  1. +1
    -0
      README.md
  2. +1
    -1
      lark/indenter.py
  3. +2
    -2
      lark/load_grammar.py
  4. +1
    -1
      lark/parse_tree_builder.py
  5. +11
    -0
      tests/test_parser.py

+ 1
- 0
README.md View File

@@ -63,6 +63,7 @@ Lark has no dependencies.
- Automatic token collision resolution (unless both tokens are regexps) - Automatic token collision resolution (unless both tokens are regexps)
- Python 2 & 3 compatible - Python 2 & 3 compatible
- Unicode fully supported - Unicode fully supported
- Extensive test suite


## Coming soon ## Coming soon




+ 1
- 1
lark/indenter.py View File

@@ -18,7 +18,7 @@ class Indenter:


if indent > self.indent_level[-1]: if indent > self.indent_level[-1]:
self.indent_level.append(indent) self.indent_level.append(indent)
yield Token(self.INDENT_type, indent_str)
yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
else: else:
while indent < self.indent_level[-1]: while indent < self.indent_level[-1]:
self.indent_level.pop() self.indent_level.pop()


+ 2
- 2
lark/load_grammar.py View File

@@ -252,8 +252,8 @@ class ExtractAnonTokens(InlineTransformer):
try: try:
token_name = _TOKEN_NAMES[value] token_name = _TOKEN_NAMES[value]
except KeyError: except KeyError:
if value.isalnum() and value[0].isalpha():
token_name = value.upper()
if value.isalnum() and value[0].isalpha() and ('__'+value.upper()) not in self.token_set:
token_name = value.upper() # This can create name duplications for unidentical tokens
else: else:
token_name = 'ANONSTR_%d' % self.i token_name = 'ANONSTR_%d' % self.i
self.i += 1 self.i += 1


+ 1
- 1
lark/parse_tree_builder.py View File

@@ -48,7 +48,7 @@ class ParseTreeBuilder:
new_rules = [] new_rules = []
for origin, expansions in rules.items(): for origin, expansions in rules.items():
expand1 = origin.startswith('?') expand1 = origin.startswith('?')
_origin = origin.lstrip('?*')
_origin = origin.lstrip('?')


for expansion, alias in expansions: for expansion, alias in expansions:
if alias and origin.startswith('_'): if alias and origin.startswith('_'):


+ 11
- 0
tests/test_parser.py View File

@@ -340,6 +340,17 @@ def _make_parser_test(PARSER):
g = _Lark("""start: %s g = _Lark("""start: %s
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))


def test_float_without_lexer(self):
g = _Lark("""start: ["+"|"-"] float
float: digit* "." digit+ exp?
| digit+ exp
exp: ("e"|"E") ["+"|"-"] digit+
digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
""")
g.parse("1.2")
g.parse("-.2e9")
g.parse("+2e-9")
self.assertRaises(ParseError, g.parse, "+2e-9e")


_NAME = "Test" + PARSER.capitalize() _NAME = "Test" + PARSER.capitalize()
_TestParser.__name__ = _NAME _TestParser.__name__ = _NAME


Loading…
Cancel
Save