瀏覽代碼

Response to code review

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
julienmalard 3 年之前
父節點
當前提交
364f9ae3a5
共有 3 個檔案被更改,包括 11 行新增9 行删除
  1. +3
    -5
      lark/load_grammar.py
  2. +2
    -2
      lark/reconstruct.py
  3. +6
    -2
      lark/utils.py

+ 3
- 5
lark/load_grammar.py 查看文件

@@ -6,7 +6,7 @@ from copy import copy, deepcopy
from io import open
import pkgutil

from .utils import bfs, eval_escaping, Py36, logger, classify_bool, isalnum, isalpha
from .utils import bfs, eval_escaping, Py36, logger, classify_bool, is_id_continue, isalpha
from .lexer import Token, TerminalDef, PatternStr, PatternRE

from .parse_tree_builder import ParseTreeBuilder
@@ -328,10 +328,8 @@ class PrepareAnonTerminals(Transformer_InPlace):
try:
term_name = _TERMINAL_NAMES[value]
except KeyError:
if isalnum(value) and isalpha(value[0]) and value.upper() not in self.term_set:
with suppress(UnicodeEncodeError):
value.upper().encode('utf8') # Why shouldn't we have unicode in our terminal names?
term_name = value.upper()
if is_id_continue(value) and isalpha(value[0]) and value.upper() not in self.term_set:
term_name = value.upper()

if term_name in self.term_set:
term_name = None


+ 2
- 2
lark/reconstruct.py 查看文件

@@ -8,7 +8,7 @@ from .lexer import Token, PatternStr
from .grammar import Terminal, NonTerminal

from .tree_matcher import TreeMatcher, is_discarded_terminal
from .utils import isalnum
from .utils import is_id_continue

def is_iter_empty(i):
try:
@@ -94,7 +94,7 @@ class Reconstructor(TreeMatcher):
y = []
prev_item = ''
for item in x:
if prev_item and item and isalnum(prev_item[-1]) and isalnum(item[0]):
if prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
y.append(' ')
y.append(item)
prev_item = item


+ 6
- 2
lark/utils.py 查看文件

@@ -13,9 +13,13 @@ logger.addHandler(logging.StreamHandler())
# By default, we should not output any log messages
logger.setLevel(logging.CRITICAL)

def isalnum(x):
def is_id_continue(x):
"""
Checks if all characters in `x` are alphanumeric characters (Unicode standard, so diactrics, Indian vowels, non-latin
numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier.
"""
if len(x) != 1:
return all(isalnum(y) for y in x)
return all(is_id_continue(y) for y in x)
return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']




Loading…
取消
儲存