Code review 3

3 anni fa · f8b0ca3ccc
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -6,7 +6,7 @@ from copy import copy, deepcopy
 from io import open
 import pkgutil

 from .utils import bfs, eval_escaping, Py36, logger, classify_bool, is_id_continue, isalpha
 from .utils import bfs, eval_escaping, Py36, logger, classify_bool, is_id_continue, is_id_start
 from .lexer import Token, TerminalDef, PatternStr, PatternRE

 from .parse_tree_builder import ParseTreeBuilder
@@ -328,7 +328,7 @@ class PrepareAnonTerminals(Transformer_InPlace):
                try:
                    term_name = _TERMINAL_NAMES[value]
                except KeyError:
                    if is_id_continue(value) and isalpha(value[0]) and value.upper() not in self.term_set:
                    if is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set:
                        term_name = value.upper()

                if term_name in self.term_set:
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -20,14 +20,17 @@ def is_id_continue(x):
    """
    if len(x) != 1:
        return all(is_id_continue(y) for y in x)
    return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']
    return x == '_' or unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']


 def isalpha(x):
    """See PEP 3131 for details."""
 def is_id_start(x):
    """
    Checks if all characters in `x` are alphabetic characters (Unicode standard, so diactrics, Indian vowels, non-latin
    numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
    """
    if len(x) != 1:
        return all(isalpha(y) for y in x)
    return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc']
        return all(is_id_start(y) for y in x)
    return x == '_' or unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc']


 def classify(seq, key=None, value=None):