This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

336 lines
8.9 KiB

  1. import sys
  2. import os
  3. from functools import reduce
  4. from ast import literal_eval
  5. from collections import deque
  6. ###{standalone
  7. import logging
  8. logger = logging.getLogger("lark")
  9. logger.addHandler(logging.StreamHandler())
  10. # Set to highest level, since we have some warnings amongst the code
  11. # By default, we should not output any log messages
  12. logger.setLevel(logging.CRITICAL)
  13. def classify(seq, key=None, value=None):
  14. d = {}
  15. for item in seq:
  16. k = key(item) if (key is not None) else item
  17. v = value(item) if (value is not None) else item
  18. if k in d:
  19. d[k].append(v)
  20. else:
  21. d[k] = [v]
  22. return d
  23. def _deserialize(data, namespace, memo):
  24. if isinstance(data, dict):
  25. if '__type__' in data: # Object
  26. class_ = namespace[data['__type__']]
  27. return class_.deserialize(data, memo)
  28. elif '@' in data:
  29. return memo[data['@']]
  30. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  31. elif isinstance(data, list):
  32. return [_deserialize(value, namespace, memo) for value in data]
  33. return data
  34. class Serialize(object):
  35. """Safe-ish serialization interface that doesn't rely on Pickle
  36. Attributes:
  37. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  38. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  39. Should include all field types that aren't builtin types.
  40. """
  41. def memo_serialize(self, types_to_memoize):
  42. memo = SerializeMemoizer(types_to_memoize)
  43. return self.serialize(memo), memo.serialize()
  44. def serialize(self, memo=None):
  45. if memo and memo.in_types(self):
  46. return {'@': memo.memoized.get(self)}
  47. fields = getattr(self, '__serialize_fields__')
  48. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  49. res['__type__'] = type(self).__name__
  50. postprocess = getattr(self, '_serialize', None)
  51. if postprocess:
  52. postprocess(res, memo)
  53. return res
  54. @classmethod
  55. def deserialize(cls, data, memo):
  56. namespace = getattr(cls, '__serialize_namespace__', {})
  57. namespace = {c.__name__:c for c in namespace}
  58. fields = getattr(cls, '__serialize_fields__')
  59. if '@' in data:
  60. return memo[data['@']]
  61. inst = cls.__new__(cls)
  62. for f in fields:
  63. try:
  64. setattr(inst, f, _deserialize(data[f], namespace, memo))
  65. except KeyError as e:
  66. raise KeyError("Cannot find key for class", cls, e)
  67. postprocess = getattr(inst, '_deserialize', None)
  68. if postprocess:
  69. postprocess()
  70. return inst
  71. class SerializeMemoizer(Serialize):
  72. "A version of serialize that memoizes objects to reduce space"
  73. __serialize_fields__ = 'memoized',
  74. def __init__(self, types_to_memoize):
  75. self.types_to_memoize = tuple(types_to_memoize)
  76. self.memoized = Enumerator()
  77. def in_types(self, value):
  78. return isinstance(value, self.types_to_memoize)
  79. def serialize(self):
  80. return _serialize(self.memoized.reversed(), None)
  81. @classmethod
  82. def deserialize(cls, data, namespace, memo):
  83. return _deserialize(data, namespace, memo)
  84. try:
  85. STRING_TYPE = basestring
  86. except NameError: # Python 3
  87. STRING_TYPE = str
  88. import types
  89. from functools import wraps, partial
  90. from contextlib import contextmanager
  91. Str = type(u'')
  92. try:
  93. classtype = types.ClassType # Python2
  94. except AttributeError:
  95. classtype = type # Python3
  96. def smart_decorator(f, create_decorator):
  97. if isinstance(f, types.FunctionType):
  98. return wraps(f)(create_decorator(f, True))
  99. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  100. return wraps(f)(create_decorator(f, False))
  101. elif isinstance(f, types.MethodType):
  102. return wraps(f)(create_decorator(f.__func__, True))
  103. elif isinstance(f, partial):
  104. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  105. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  106. else:
  107. return create_decorator(f.__func__.__call__, True)
  108. try:
  109. import regex
  110. except ImportError:
  111. regex = None
  112. import sys, re
  113. Py36 = (sys.version_info[:2] >= (3, 6))
  114. import sre_parse
  115. import sre_constants
  116. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  117. def get_regexp_width(expr):
  118. if regex:
  119. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  120. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  121. # match here below.
  122. regexp_final = re.sub(categ_pattern, 'A', expr)
  123. else:
  124. if re.search(categ_pattern, expr):
  125. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  126. regexp_final = expr
  127. try:
  128. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  129. except sre_constants.error:
  130. raise ValueError(expr)
  131. ###}
  132. def dedup_list(l):
  133. """Given a list (l) will removing duplicates from the list,
  134. preserving the original order of the list. Assumes that
  135. the list entries are hashable."""
  136. dedup = set()
  137. return [ x for x in l if not (x in dedup or dedup.add(x))]
  138. try:
  139. from contextlib import suppress # Python 3
  140. except ImportError:
  141. @contextmanager
  142. def suppress(*excs):
  143. '''Catch and dismiss the provided exception
  144. >>> x = 'hello'
  145. >>> with suppress(IndexError):
  146. ... x = x[10]
  147. >>> x
  148. 'hello'
  149. '''
  150. try:
  151. yield
  152. except excs:
  153. pass
  154. try:
  155. compare = cmp
  156. except NameError:
  157. def compare(a, b):
  158. if a == b:
  159. return 0
  160. elif a > b:
  161. return 1
  162. return -1
  163. class Enumerator(Serialize):
  164. def __init__(self):
  165. self.enums = {}
  166. def get(self, item):
  167. if item not in self.enums:
  168. self.enums[item] = len(self.enums)
  169. return self.enums[item]
  170. def __len__(self):
  171. return len(self.enums)
  172. def reversed(self):
  173. r = {v: k for k, v in self.enums.items()}
  174. assert len(r) == len(self.enums)
  175. return r
  176. def eval_escaping(s):
  177. w = ''
  178. i = iter(s)
  179. for n in i:
  180. w += n
  181. if n == '\\':
  182. try:
  183. n2 = next(i)
  184. except StopIteration:
  185. raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
  186. if n2 == '\\':
  187. w += '\\\\'
  188. elif n2 not in 'uxnftr':
  189. w += '\\'
  190. w += n2
  191. w = w.replace('\\"', '"').replace("'", "\\'")
  192. to_eval = "u'''%s'''" % w
  193. try:
  194. s = literal_eval(to_eval)
  195. except SyntaxError as e:
  196. raise ValueError(s, e)
  197. return s
  198. def combine_alternatives(lists):
  199. """
  200. Accepts a list of alternatives, and enumerates all their possible concatinations.
  201. Examples:
  202. >>> combine_alternatives([range(2), [4,5]])
  203. [[0, 4], [0, 5], [1, 4], [1, 5]]
  204. >>> combine_alternatives(["abc", "xy", '$'])
  205. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  206. >>> combine_alternatives([])
  207. [[]]
  208. """
  209. if not lists:
  210. return [[]]
  211. assert all(l for l in lists), lists
  212. init = [[x] for x in lists[0]]
  213. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  214. class FS:
  215. open = open
  216. exists = os.path.exists
  217. def isascii(s):
  218. """ str.isascii only exists in python3.7+ """
  219. try:
  220. return s.isascii()
  221. except AttributeError:
  222. try:
  223. s.encode('ascii')
  224. return True
  225. except (UnicodeDecodeError, UnicodeEncodeError):
  226. return False
  227. class fzset(frozenset):
  228. def __repr__(self):
  229. return '{%s}' % ', '.join(map(repr, self))
  230. def classify_bool(seq, pred):
  231. true_elems = []
  232. false_elems = []
  233. for elem in seq:
  234. if pred(elem):
  235. true_elems.append(elem)
  236. else:
  237. false_elems.append(elem)
  238. return true_elems, false_elems
  239. def bfs(initial, expand):
  240. open_q = deque(list(initial))
  241. visited = set(open_q)
  242. while open_q:
  243. node = open_q.popleft()
  244. yield node
  245. for next_node in expand(node):
  246. if next_node not in visited:
  247. visited.add(next_node)
  248. open_q.append(next_node)
  249. def _serialize(value, memo):
  250. if isinstance(value, Serialize):
  251. return value.serialize(memo)
  252. elif isinstance(value, list):
  253. return [_serialize(elem, memo) for elem in value]
  254. elif isinstance(value, frozenset):
  255. return list(value) # TODO reversible?
  256. elif isinstance(value, dict):
  257. return {key:_serialize(elem, memo) for key, elem in value.items()}
  258. return value