From f8b0ca3ccc1f27bcaa9e7dedf6ae7374d677ac4f Mon Sep 17 00:00:00 2001 From: julienmalard Date: Tue, 10 Nov 2020 17:57:00 -0500 Subject: [PATCH] Code review 3 --- lark/load_grammar.py | 4 ++-- lark/utils.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index e45053e..ee38dc8 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -6,7 +6,7 @@ from copy import copy, deepcopy from io import open import pkgutil -from .utils import bfs, eval_escaping, Py36, logger, classify_bool, is_id_continue, isalpha +from .utils import bfs, eval_escaping, Py36, logger, classify_bool, is_id_continue, is_id_start from .lexer import Token, TerminalDef, PatternStr, PatternRE from .parse_tree_builder import ParseTreeBuilder @@ -328,7 +328,7 @@ class PrepareAnonTerminals(Transformer_InPlace): try: term_name = _TERMINAL_NAMES[value] except KeyError: - if is_id_continue(value) and isalpha(value[0]) and value.upper() not in self.term_set: + if is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set: term_name = value.upper() if term_name in self.term_set: diff --git a/lark/utils.py b/lark/utils.py index 29fa514..89498c6 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -20,14 +20,17 @@ def is_id_continue(x): """ if len(x) != 1: return all(is_id_continue(y) for y in x) - return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] + return x == '_' or unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] -def isalpha(x): - """See PEP 3131 for details.""" +def is_id_start(x): + """ + Checks if all characters in `x` are alphabetic characters (Unicode standard, so diactrics, Indian vowels, non-latin + numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details. + """ if len(x) != 1: - return all(isalpha(y) for y in x) - return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'] + return all(is_id_start(y) for y in x) + return x == '_' or unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'] def classify(seq, key=None, value=None):