浏览代码

Response to code review

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
julienmalard 3 年前
父节点
当前提交
364f9ae3a5
共有 3 个文件被更改,包括 11 次插入9 次删除
  1. +3
    -5
      lark/load_grammar.py
  2. +2
    -2
      lark/reconstruct.py
  3. +6
    -2
      lark/utils.py

+ 3
- 5
lark/load_grammar.py 查看文件

@@ -6,7 +6,7 @@ from copy import copy, deepcopy
from io import open
import pkgutil

from .utils import bfs, eval_escaping, Py36, logger, classify_bool, isalnum, isalpha
from .utils import bfs, eval_escaping, Py36, logger, classify_bool, is_id_continue, isalpha
from .lexer import Token, TerminalDef, PatternStr, PatternRE

from .parse_tree_builder import ParseTreeBuilder
@@ -328,10 +328,8 @@ class PrepareAnonTerminals(Transformer_InPlace):
try:
term_name = _TERMINAL_NAMES[value]
except KeyError:
if isalnum(value) and isalpha(value[0]) and value.upper() not in self.term_set:
with suppress(UnicodeEncodeError):
value.upper().encode('utf8') # Why shouldn't we have unicode in our terminal names?
term_name = value.upper()
if is_id_continue(value) and isalpha(value[0]) and value.upper() not in self.term_set:
term_name = value.upper()

if term_name in self.term_set:
term_name = None


+ 2
- 2
lark/reconstruct.py 查看文件

@@ -8,7 +8,7 @@ from .lexer import Token, PatternStr
from .grammar import Terminal, NonTerminal

from .tree_matcher import TreeMatcher, is_discarded_terminal
from .utils import isalnum
from .utils import is_id_continue

def is_iter_empty(i):
try:
@@ -94,7 +94,7 @@ class Reconstructor(TreeMatcher):
y = []
prev_item = ''
for item in x:
if prev_item and item and isalnum(prev_item[-1]) and isalnum(item[0]):
if prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
y.append(' ')
y.append(item)
prev_item = item


+ 6
- 2
lark/utils.py 查看文件

@@ -13,9 +13,13 @@ logger.addHandler(logging.StreamHandler())
# By default, we should not output any log messages
logger.setLevel(logging.CRITICAL)

def isalnum(x):
def is_id_continue(x):
"""
Checks if all characters in `x` are alphanumeric characters (Unicode standard, so diactrics, Indian vowels, non-latin
numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier.
"""
if len(x) != 1:
return all(isalnum(y) for y in x)
return all(is_id_continue(y) for y in x)
return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']




正在加载...
取消
保存