From ee798f81f2d6136cbb8bee8d26ce5be4bb9880c7 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 11 Feb 2017 21:15:57 +0200 Subject: [PATCH] Fixed bug where identical anonymous tokens got separate names --- README.md | 2 +- lark/load_grammar.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c92307..d8ce7a7 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ These features are planned to be implemented in the near future: ## Comparison to other parsers -This is a feature comparison. For benchmarks vs pyparsing, check out the [JSON tutorial](/docs/json_tutorial.md#conclusion). +This is a feature comparison. For benchmarks vs other parsers, check out the [JSON tutorial](/docs/json_tutorial.md#conclusion). | Library | Algorithm | LOC | Grammar | Builds tree? |:--------|:----------|:----|:--------|:------------ diff --git a/lark/load_grammar.py b/lark/load_grammar.py index e7d1bf4..9e2f0c4 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -261,6 +261,7 @@ class ExtractAnonTokens(InlineTransformer): elif token.type == 'REGEXP': token_name = 'ANONRE_%d' % self.i + value = token.value self.i += 1 else: assert False, x @@ -268,6 +269,8 @@ class ExtractAnonTokens(InlineTransformer): if token_name not in self.token_set: self.token_set.add(token_name) self.tokens.append((token_name, token, [])) + assert value not in self.token_reverse + self.token_reverse[value] = token_name return Token('TOKEN', token_name, -1)