This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

388 lines
11 KiB

  1. import hashlib
  2. import unicodedata
  3. import os
  4. from functools import reduce
  5. from collections import deque
  6. ###{standalone
  7. import sys, re
  8. import logging
  9. from io import open
  10. logger = logging.getLogger("lark")
  11. logger.addHandler(logging.StreamHandler())
  12. # Set to highest level, since we have some warnings amongst the code
  13. # By default, we should not output any log messages
  14. logger.setLevel(logging.CRITICAL)
  15. if sys.version_info[0]>2:
  16. from abc import ABC, abstractmethod
  17. else:
  18. from abc import ABCMeta, abstractmethod
  19. class ABC(object): # Provide Python27 compatibility
  20. __slots__ = ()
  21. __metclass__ = ABCMeta
  22. Py36 = (sys.version_info[:2] >= (3, 6))
  23. NO_VALUE = object()
  24. def classify(seq, key=None, value=None):
  25. d = {}
  26. for item in seq:
  27. k = key(item) if (key is not None) else item
  28. v = value(item) if (value is not None) else item
  29. if k in d:
  30. d[k].append(v)
  31. else:
  32. d[k] = [v]
  33. return d
  34. def _deserialize(data, namespace, memo):
  35. if isinstance(data, dict):
  36. if '__type__' in data: # Object
  37. class_ = namespace[data['__type__']]
  38. return class_.deserialize(data, memo)
  39. elif '@' in data:
  40. return memo[data['@']]
  41. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  42. elif isinstance(data, list):
  43. return [_deserialize(value, namespace, memo) for value in data]
  44. return data
  45. class Serialize(object):
  46. """Safe-ish serialization interface that doesn't rely on Pickle
  47. Attributes:
  48. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  49. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  50. Should include all field types that aren't builtin types.
  51. """
  52. def memo_serialize(self, types_to_memoize):
  53. memo = SerializeMemoizer(types_to_memoize)
  54. return self.serialize(memo), memo.serialize()
  55. def serialize(self, memo=None):
  56. if memo and memo.in_types(self):
  57. return {'@': memo.memoized.get(self)}
  58. fields = getattr(self, '__serialize_fields__')
  59. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  60. res['__type__'] = type(self).__name__
  61. if hasattr(self, '_serialize'):
  62. self._serialize(res, memo)
  63. return res
  64. @classmethod
  65. def deserialize(cls, data, memo):
  66. namespace = getattr(cls, '__serialize_namespace__', [])
  67. namespace = {c.__name__:c for c in namespace}
  68. fields = getattr(cls, '__serialize_fields__')
  69. if '@' in data:
  70. return memo[data['@']]
  71. inst = cls.__new__(cls)
  72. for f in fields:
  73. try:
  74. setattr(inst, f, _deserialize(data[f], namespace, memo))
  75. except KeyError as e:
  76. raise KeyError("Cannot find key for class", cls, e)
  77. if hasattr(inst, '_deserialize'):
  78. inst._deserialize()
  79. return inst
  80. class SerializeMemoizer(Serialize):
  81. "A version of serialize that memoizes objects to reduce space"
  82. __serialize_fields__ = 'memoized',
  83. def __init__(self, types_to_memoize):
  84. self.types_to_memoize = tuple(types_to_memoize)
  85. self.memoized = Enumerator()
  86. def in_types(self, value):
  87. return isinstance(value, self.types_to_memoize)
  88. def serialize(self):
  89. return _serialize(self.memoized.reversed(), None)
  90. @classmethod
  91. def deserialize(cls, data, namespace, memo):
  92. return _deserialize(data, namespace, memo)
  93. try:
  94. STRING_TYPE = basestring
  95. except NameError: # Python 3
  96. STRING_TYPE = str
  97. import types
  98. from functools import wraps, partial
  99. from contextlib import contextmanager
  100. Str = type(u'')
  101. try:
  102. classtype = types.ClassType # Python2
  103. except AttributeError:
  104. classtype = type # Python3
  105. def smart_decorator(f, create_decorator):
  106. if isinstance(f, types.FunctionType):
  107. return wraps(f)(create_decorator(f, True))
  108. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  109. return wraps(f)(create_decorator(f, False))
  110. elif isinstance(f, types.MethodType):
  111. return wraps(f)(create_decorator(f.__func__, True))
  112. elif isinstance(f, partial):
  113. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  114. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  115. else:
  116. return create_decorator(f.__func__.__call__, True)
  117. try:
  118. import regex
  119. except ImportError:
  120. regex = None
  121. import sre_parse
  122. import sre_constants
  123. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  124. def get_regexp_width(expr):
  125. if regex:
  126. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  127. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  128. # match here below.
  129. regexp_final = re.sub(categ_pattern, 'A', expr)
  130. else:
  131. if re.search(categ_pattern, expr):
  132. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  133. regexp_final = expr
  134. try:
  135. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  136. except sre_constants.error:
  137. if not regex:
  138. raise ValueError(expr)
  139. else:
  140. # sre_parse does not support the new features in regex. To not completely fail in that case,
  141. # we manually test for the most important info (whether the empty string is matched)
  142. c = regex.compile(regexp_final)
  143. if c.match('') is None:
  144. return 1, sre_constants.MAXREPEAT
  145. else:
  146. return 0, sre_constants.MAXREPEAT
  147. ###}
  148. _ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
  149. _ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
  150. def _test_unicode_category(s, categories):
  151. if len(s) != 1:
  152. return all(_test_unicode_category(char, categories) for char in s)
  153. return s == '_' or unicodedata.category(s) in categories
  154. def is_id_continue(s):
  155. """
  156. Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
  157. numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
  158. """
  159. return _test_unicode_category(s, _ID_CONTINUE)
  160. def is_id_start(s):
  161. """
  162. Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
  163. numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
  164. """
  165. return _test_unicode_category(s, _ID_START)
  166. def dedup_list(l):
  167. """Given a list (l) will removing duplicates from the list,
  168. preserving the original order of the list. Assumes that
  169. the list entries are hashable."""
  170. dedup = set()
  171. return [x for x in l if not (x in dedup or dedup.add(x))]
  172. try:
  173. from contextlib import suppress # Python 3
  174. except ImportError:
  175. @contextmanager
  176. def suppress(*excs):
  177. '''Catch and dismiss the provided exception
  178. >>> x = 'hello'
  179. >>> with suppress(IndexError):
  180. ... x = x[10]
  181. >>> x
  182. 'hello'
  183. '''
  184. try:
  185. yield
  186. except excs:
  187. pass
  188. class Enumerator(Serialize):
  189. def __init__(self):
  190. self.enums = {}
  191. def get(self, item):
  192. if item not in self.enums:
  193. self.enums[item] = len(self.enums)
  194. return self.enums[item]
  195. def __len__(self):
  196. return len(self.enums)
  197. def reversed(self):
  198. r = {v: k for k, v in self.enums.items()}
  199. assert len(r) == len(self.enums)
  200. return r
  201. def combine_alternatives(lists):
  202. """
  203. Accepts a list of alternatives, and enumerates all their possible concatinations.
  204. Examples:
  205. >>> combine_alternatives([range(2), [4,5]])
  206. [[0, 4], [0, 5], [1, 4], [1, 5]]
  207. >>> combine_alternatives(["abc", "xy", '$'])
  208. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  209. >>> combine_alternatives([])
  210. [[]]
  211. """
  212. if not lists:
  213. return [[]]
  214. assert all(l for l in lists), lists
  215. init = [[x] for x in lists[0]]
  216. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  217. try:
  218. import atomicwrites
  219. except ImportError:
  220. atomicwrites = None
  221. class FS:
  222. exists = os.path.exists
  223. @staticmethod
  224. def open(name, mode="r", **kwargs):
  225. if atomicwrites and "w" in mode:
  226. return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs)
  227. else:
  228. return open(name, mode, **kwargs)
  229. def isascii(s):
  230. """ str.isascii only exists in python3.7+ """
  231. try:
  232. return s.isascii()
  233. except AttributeError:
  234. try:
  235. s.encode('ascii')
  236. return True
  237. except (UnicodeDecodeError, UnicodeEncodeError):
  238. return False
  239. class fzset(frozenset):
  240. def __repr__(self):
  241. return '{%s}' % ', '.join(map(repr, self))
  242. def classify_bool(seq, pred):
  243. true_elems = []
  244. false_elems = []
  245. for elem in seq:
  246. if pred(elem):
  247. true_elems.append(elem)
  248. else:
  249. false_elems.append(elem)
  250. return true_elems, false_elems
  251. def bfs(initial, expand):
  252. open_q = deque(list(initial))
  253. visited = set(open_q)
  254. while open_q:
  255. node = open_q.popleft()
  256. yield node
  257. for next_node in expand(node):
  258. if next_node not in visited:
  259. visited.add(next_node)
  260. open_q.append(next_node)
  261. def bfs_all_unique(initial, expand):
  262. "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions"
  263. open_q = deque(list(initial))
  264. while open_q:
  265. node = open_q.popleft()
  266. yield node
  267. open_q += expand(node)
  268. def _serialize(value, memo):
  269. if isinstance(value, Serialize):
  270. return value.serialize(memo)
  271. elif isinstance(value, list):
  272. return [_serialize(elem, memo) for elem in value]
  273. elif isinstance(value, frozenset):
  274. return list(value) # TODO reversible?
  275. elif isinstance(value, dict):
  276. return {key:_serialize(elem, memo) for key, elem in value.items()}
  277. # assert value is None or isinstance(value, (int, float, str, tuple)), value
  278. return value
  279. def small_factors(n, max_factor):
  280. """
  281. Splits n up into smaller factors and summands <= max_factor.
  282. Returns a list of [(a, b), ...]
  283. so that the following code returns n:
  284. n = 1
  285. for a, b in values:
  286. n = n * a + b
  287. Currently, we also keep a + b <= max_factor, but that might change
  288. """
  289. assert n >= 0
  290. assert max_factor > 2
  291. if n <= max_factor:
  292. return [(n, 0)]
  293. for a in range(max_factor, 1, -1):
  294. r, b = divmod(n, a)
  295. if a + b <= max_factor:
  296. return small_factors(r, max_factor) + [(a, b)]
  297. assert False, "Failed to factorize %s" % n