@@ -6,7 +6,7 @@ from copy import copy, deepcopy | |||||
from io import open | from io import open | ||||
import pkgutil | import pkgutil | ||||
from .utils import bfs, eval_escaping, Py36, logger, classify_bool | |||||
from .utils import bfs, eval_escaping, Py36, logger, classify_bool, isalnum, isalpha | |||||
from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
@@ -328,9 +328,9 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||||
try: | try: | ||||
term_name = _TERMINAL_NAMES[value] | term_name = _TERMINAL_NAMES[value] | ||||
except KeyError: | except KeyError: | ||||
if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: | |||||
if isalnum(value) and isalpha(value[0]) and value.upper() not in self.term_set: | |||||
with suppress(UnicodeEncodeError): | with suppress(UnicodeEncodeError): | ||||
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||||
value.upper().encode('utf8') # Why shouldn't we have unicode in our terminal names? | |||||
term_name = value.upper() | term_name = value.upper() | ||||
if term_name in self.term_set: | if term_name in self.term_set: | ||||
@@ -8,6 +8,7 @@ from .lexer import Token, PatternStr | |||||
from .grammar import Terminal, NonTerminal | from .grammar import Terminal, NonTerminal | ||||
from .tree_matcher import TreeMatcher, is_discarded_terminal | from .tree_matcher import TreeMatcher, is_discarded_terminal | ||||
from .utils import isalnum | |||||
def is_iter_empty(i): | def is_iter_empty(i): | ||||
try: | try: | ||||
@@ -56,10 +57,6 @@ class WriteTokensTransformer(Transformer_InPlace): | |||||
return to_write | return to_write | ||||
def _isalnum(x): | |||||
# Categories defined here: https://www.python.org/dev/peps/pep-3131/ | |||||
return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] | |||||
class Reconstructor(TreeMatcher): | class Reconstructor(TreeMatcher): | ||||
""" | """ | ||||
A Reconstructor that will, given a full parse Tree, generate source code. | A Reconstructor that will, given a full parse Tree, generate source code. | ||||
@@ -97,7 +94,7 @@ class Reconstructor(TreeMatcher): | |||||
y = [] | y = [] | ||||
prev_item = '' | prev_item = '' | ||||
for item in x: | for item in x: | ||||
if prev_item and item and _isalnum(prev_item[-1]) and _isalnum(item[0]): | |||||
if prev_item and item and isalnum(prev_item[-1]) and isalnum(item[0]): | |||||
y.append(' ') | y.append(' ') | ||||
y.append(item) | y.append(item) | ||||
prev_item = item | prev_item = item | ||||
@@ -1,4 +1,5 @@ | |||||
import sys | import sys | ||||
import unicodedata | |||||
import os | import os | ||||
from functools import reduce | from functools import reduce | ||||
from ast import literal_eval | from ast import literal_eval | ||||
@@ -12,6 +13,17 @@ logger.addHandler(logging.StreamHandler()) | |||||
# By default, we should not output any log messages | # By default, we should not output any log messages | ||||
logger.setLevel(logging.CRITICAL) | logger.setLevel(logging.CRITICAL) | ||||
def isalnum(x): | |||||
if len(x) != 1: | |||||
return all(isalnum(y) for y in x) | |||||
return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] | |||||
def isalpha(x): | |||||
if len(x) != 1: | |||||
return all(isalpha(y) for y in x) | |||||
return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'] | |||||
def classify(seq, key=None, value=None): | def classify(seq, key=None, value=None): | ||||
d = {} | d = {} | ||||
@@ -1 +1 @@ | |||||
Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44 | |||||
Subproject commit cf8925f729bde741a3076c5856c0c0862bc7f5de |