This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

362 рядки
10 KiB

  1. import hashlib
  2. import unicodedata
  3. import os
  4. from functools import reduce
  5. from collections import deque
  6. ###{standalone
  7. import sys, re
  8. import logging
  9. from io import open
  10. logger = logging.getLogger("lark")
  11. logger.addHandler(logging.StreamHandler())
  12. # Set to highest level, since we have some warnings amongst the code
  13. # By default, we should not output any log messages
  14. logger.setLevel(logging.CRITICAL)
  15. if sys.version_info[0]>2:
  16. from abc import ABC, abstractmethod
  17. else:
  18. from abc import ABCMeta, abstractmethod
  19. class ABC(object): # Provide Python27 compatibility
  20. __slots__ = ()
  21. __metclass__ = ABCMeta
  22. Py36 = (sys.version_info[:2] >= (3, 6))
  23. NO_VALUE = object()
  24. def classify(seq, key=None, value=None):
  25. d = {}
  26. for item in seq:
  27. k = key(item) if (key is not None) else item
  28. v = value(item) if (value is not None) else item
  29. if k in d:
  30. d[k].append(v)
  31. else:
  32. d[k] = [v]
  33. return d
  34. def _deserialize(data, namespace, memo):
  35. if isinstance(data, dict):
  36. if '__type__' in data: # Object
  37. class_ = namespace[data['__type__']]
  38. return class_.deserialize(data, memo)
  39. elif '@' in data:
  40. return memo[data['@']]
  41. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  42. elif isinstance(data, list):
  43. return [_deserialize(value, namespace, memo) for value in data]
  44. return data
  45. class Serialize(object):
  46. """Safe-ish serialization interface that doesn't rely on Pickle
  47. Attributes:
  48. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  49. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  50. Should include all field types that aren't builtin types.
  51. """
  52. def memo_serialize(self, types_to_memoize):
  53. memo = SerializeMemoizer(types_to_memoize)
  54. return self.serialize(memo), memo.serialize()
  55. def serialize(self, memo=None):
  56. if memo and memo.in_types(self):
  57. return {'@': memo.memoized.get(self)}
  58. fields = getattr(self, '__serialize_fields__')
  59. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  60. res['__type__'] = type(self).__name__
  61. if hasattr(self, '_serialize'):
  62. self._serialize(res, memo)
  63. return res
  64. @classmethod
  65. def deserialize(cls, data, memo):
  66. namespace = getattr(cls, '__serialize_namespace__', [])
  67. namespace = {c.__name__:c for c in namespace}
  68. fields = getattr(cls, '__serialize_fields__')
  69. if '@' in data:
  70. return memo[data['@']]
  71. inst = cls.__new__(cls)
  72. for f in fields:
  73. try:
  74. setattr(inst, f, _deserialize(data[f], namespace, memo))
  75. except KeyError as e:
  76. raise KeyError("Cannot find key for class", cls, e)
  77. if hasattr(inst, '_deserialize'):
  78. inst._deserialize()
  79. return inst
  80. class SerializeMemoizer(Serialize):
  81. "A version of serialize that memoizes objects to reduce space"
  82. __serialize_fields__ = 'memoized',
  83. def __init__(self, types_to_memoize):
  84. self.types_to_memoize = tuple(types_to_memoize)
  85. self.memoized = Enumerator()
  86. def in_types(self, value):
  87. return isinstance(value, self.types_to_memoize)
  88. def serialize(self):
  89. return _serialize(self.memoized.reversed(), None)
  90. @classmethod
  91. def deserialize(cls, data, namespace, memo):
  92. return _deserialize(data, namespace, memo)
  93. try:
  94. STRING_TYPE = basestring
  95. except NameError: # Python 3
  96. STRING_TYPE = str
  97. import types
  98. from functools import wraps, partial
  99. from contextlib import contextmanager
  100. Str = type(u'')
  101. try:
  102. classtype = types.ClassType # Python2
  103. except AttributeError:
  104. classtype = type # Python3
  105. def smart_decorator(f, create_decorator):
  106. if isinstance(f, types.FunctionType):
  107. return wraps(f)(create_decorator(f, True))
  108. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  109. return wraps(f)(create_decorator(f, False))
  110. elif isinstance(f, types.MethodType):
  111. return wraps(f)(create_decorator(f.__func__, True))
  112. elif isinstance(f, partial):
  113. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  114. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  115. else:
  116. return create_decorator(f.__func__.__call__, True)
  117. try:
  118. import regex
  119. except ImportError:
  120. regex = None
  121. import sre_parse
  122. import sre_constants
  123. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  124. def get_regexp_width(expr):
  125. if regex:
  126. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  127. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  128. # match here below.
  129. regexp_final = re.sub(categ_pattern, 'A', expr)
  130. else:
  131. if re.search(categ_pattern, expr):
  132. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  133. regexp_final = expr
  134. try:
  135. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  136. except sre_constants.error:
  137. if not regex:
  138. raise ValueError(expr)
  139. else:
  140. # sre_parse does not support the new features in regex. To not completely fail in that case,
  141. # we manually test for the most important info (whether the empty string is matched)
  142. c = regex.compile(regexp_final)
  143. if c.match('') is None:
  144. return 1, sre_constants.MAXREPEAT
  145. else:
  146. return 0, sre_constants.MAXREPEAT
  147. ###}
  148. _ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
  149. _ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
  150. def _test_unicode_category(s, categories):
  151. if len(s) != 1:
  152. return all(_test_unicode_category(char, categories) for char in s)
  153. return s == '_' or unicodedata.category(s) in categories
  154. def is_id_continue(s):
  155. """
  156. Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
  157. numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
  158. """
  159. return _test_unicode_category(s, _ID_CONTINUE)
  160. def is_id_start(s):
  161. """
  162. Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
  163. numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
  164. """
  165. return _test_unicode_category(s, _ID_START)
  166. def dedup_list(l):
  167. """Given a list (l) will removing duplicates from the list,
  168. preserving the original order of the list. Assumes that
  169. the list entries are hashable."""
  170. dedup = set()
  171. return [x for x in l if not (x in dedup or dedup.add(x))]
  172. try:
  173. from contextlib import suppress # Python 3
  174. except ImportError:
  175. @contextmanager
  176. def suppress(*excs):
  177. '''Catch and dismiss the provided exception
  178. >>> x = 'hello'
  179. >>> with suppress(IndexError):
  180. ... x = x[10]
  181. >>> x
  182. 'hello'
  183. '''
  184. try:
  185. yield
  186. except excs:
  187. pass
  188. class Enumerator(Serialize):
  189. def __init__(self):
  190. self.enums = {}
  191. def get(self, item):
  192. if item not in self.enums:
  193. self.enums[item] = len(self.enums)
  194. return self.enums[item]
  195. def __len__(self):
  196. return len(self.enums)
  197. def reversed(self):
  198. r = {v: k for k, v in self.enums.items()}
  199. assert len(r) == len(self.enums)
  200. return r
  201. def combine_alternatives(lists):
  202. """
  203. Accepts a list of alternatives, and enumerates all their possible concatinations.
  204. Examples:
  205. >>> combine_alternatives([range(2), [4,5]])
  206. [[0, 4], [0, 5], [1, 4], [1, 5]]
  207. >>> combine_alternatives(["abc", "xy", '$'])
  208. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  209. >>> combine_alternatives([])
  210. [[]]
  211. """
  212. if not lists:
  213. return [[]]
  214. assert all(l for l in lists), lists
  215. init = [[x] for x in lists[0]]
  216. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  217. try:
  218. import atomicwrites
  219. except ImportError:
  220. atomicwrites = None
  221. class FS:
  222. exists = os.path.exists
  223. @staticmethod
  224. def open(name, mode="r", **kwargs):
  225. if atomicwrites and "w" in mode:
  226. return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs)
  227. else:
  228. return open(name, mode, **kwargs)
  229. def isascii(s):
  230. """ str.isascii only exists in python3.7+ """
  231. try:
  232. return s.isascii()
  233. except AttributeError:
  234. try:
  235. s.encode('ascii')
  236. return True
  237. except (UnicodeDecodeError, UnicodeEncodeError):
  238. return False
  239. class fzset(frozenset):
  240. def __repr__(self):
  241. return '{%s}' % ', '.join(map(repr, self))
  242. def classify_bool(seq, pred):
  243. true_elems = []
  244. false_elems = []
  245. for elem in seq:
  246. if pred(elem):
  247. true_elems.append(elem)
  248. else:
  249. false_elems.append(elem)
  250. return true_elems, false_elems
  251. def bfs(initial, expand):
  252. open_q = deque(list(initial))
  253. visited = set(open_q)
  254. while open_q:
  255. node = open_q.popleft()
  256. yield node
  257. for next_node in expand(node):
  258. if next_node not in visited:
  259. visited.add(next_node)
  260. open_q.append(next_node)
  261. def bfs_all_unique(initial, expand):
  262. "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions"
  263. open_q = deque(list(initial))
  264. while open_q:
  265. node = open_q.popleft()
  266. yield node
  267. open_q += expand(node)
  268. def _serialize(value, memo):
  269. if isinstance(value, Serialize):
  270. return value.serialize(memo)
  271. elif isinstance(value, list):
  272. return [_serialize(elem, memo) for elem in value]
  273. elif isinstance(value, frozenset):
  274. return list(value) # TODO reversible?
  275. elif isinstance(value, dict):
  276. return {key:_serialize(elem, memo) for key, elem in value.items()}
  277. # assert value is None or isinstance(value, (int, float, str, tuple)), value
  278. return value