Browse Source

Simplify PrepareAnonTerminals

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 7 years ago
parent
commit
ea413fd648
1 changed files with 11 additions and 14 deletions
  1. +11
    -14
      lark/load_grammar.py

+ 11
- 14
lark/load_grammar.py View File

@@ -13,7 +13,7 @@ from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify
from .utils import classify, suppress


from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST


@@ -269,35 +269,32 @@ class PrepareAnonTerminals(InlineTransformer):
if p in self.token_reverse and p.flags != self.token_reverse[p].pattern.flags: if p in self.token_reverse and p.flags != self.token_reverse[p].pattern.flags:
raise GrammarError(u'Conflicting flags for the same terminal: %s' % p) raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)


token_name = None

if isinstance(p, PatternStr): if isinstance(p, PatternStr):
try: try:
# If already defined, use the user-defined token name # If already defined, use the user-defined token name
token_name = self.token_reverse[p].name token_name = self.token_reverse[p].name
except KeyError: except KeyError:
# Try to assign an indicative anon-token name, otherwise use a numbered name
# Try to assign an indicative anon-token name
try: try:
token_name = _TOKEN_NAMES[value] token_name = _TOKEN_NAMES[value]
except KeyError: except KeyError:
if value.isalnum() and value[0].isalpha() and value.upper() not in self.token_set: if value.isalnum() and value[0].isalpha() and value.upper() not in self.token_set:
token_name = '%s%d' % (value.upper(), self.i)
try:
# Make sure we don't have unicode in our token names
token_name.encode('ascii')
except UnicodeEncodeError:
token_name = 'ANONSTR_%d' % self.i
else:
token_name = 'ANONSTR_%d' % self.i
self.i += 1
with suppress(UnicodeEncodeError):
value.upper().encode('ascii') # Make sure we don't have unicode in our token names
token_name = value.upper()


elif isinstance(p, PatternRE): elif isinstance(p, PatternRE):
if p in self.token_reverse: # Kind of a wierd placement.name if p in self.token_reverse: # Kind of a wierd placement.name
token_name = self.token_reverse[p].name token_name = self.token_reverse[p].name
else:
token_name = 'ANONRE_%d' % self.i
self.i += 1
else: else:
assert False, p assert False, p


if token_name is None:
token_name = '__ANON_%d' % self.i
self.i += 1

if token_name not in self.token_set: if token_name not in self.token_set:
assert p not in self.token_reverse assert p not in self.token_reverse
self.token_set.add(token_name) self.token_set.add(token_name)


Loading…
Cancel
Save