This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

325 lines
8.4 KiB

  1. import sys
  2. import os
  3. from functools import reduce
  4. from ast import literal_eval
  5. from collections import deque
  6. ###{standalone
  7. import logging
  8. logger = logging.getLogger("lark")
  9. logger.addHandler(logging.StreamHandler())
  10. # Set to highest level, since we have some warnings amongst the code
  11. # By default, we should not output any log messages
  12. logger.setLevel(logging.CRITICAL)
  13. def classify(seq, key=None, value=None):
  14. d = {}
  15. for item in seq:
  16. k = key(item) if (key is not None) else item
  17. v = value(item) if (value is not None) else item
  18. if k in d:
  19. d[k].append(v)
  20. else:
  21. d[k] = [v]
  22. return d
  23. def _deserialize(data, namespace, memo):
  24. if isinstance(data, dict):
  25. if '__type__' in data: # Object
  26. class_ = namespace[data['__type__']]
  27. return class_.deserialize(data, memo)
  28. elif '@' in data:
  29. return memo[data['@']]
  30. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  31. elif isinstance(data, list):
  32. return [_deserialize(value, namespace, memo) for value in data]
  33. return data
  34. class Serialize(object):
  35. def memo_serialize(self, types_to_memoize):
  36. memo = SerializeMemoizer(types_to_memoize)
  37. return self.serialize(memo), memo.serialize()
  38. def serialize(self, memo=None):
  39. if memo and memo.in_types(self):
  40. return {'@': memo.memoized.get(self)}
  41. fields = getattr(self, '__serialize_fields__')
  42. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  43. res['__type__'] = type(self).__name__
  44. postprocess = getattr(self, '_serialize', None)
  45. if postprocess:
  46. postprocess(res, memo)
  47. return res
  48. @classmethod
  49. def deserialize(cls, data, memo):
  50. namespace = getattr(cls, '__serialize_namespace__', {})
  51. namespace = {c.__name__:c for c in namespace}
  52. fields = getattr(cls, '__serialize_fields__')
  53. if '@' in data:
  54. return memo[data['@']]
  55. inst = cls.__new__(cls)
  56. for f in fields:
  57. try:
  58. setattr(inst, f, _deserialize(data[f], namespace, memo))
  59. except KeyError as e:
  60. raise KeyError("Cannot find key for class", cls, e)
  61. postprocess = getattr(inst, '_deserialize', None)
  62. if postprocess:
  63. postprocess()
  64. return inst
  65. class SerializeMemoizer(Serialize):
  66. __serialize_fields__ = 'memoized',
  67. def __init__(self, types_to_memoize):
  68. self.types_to_memoize = tuple(types_to_memoize)
  69. self.memoized = Enumerator()
  70. def in_types(self, value):
  71. return isinstance(value, self.types_to_memoize)
  72. def serialize(self):
  73. return _serialize(self.memoized.reversed(), None)
  74. @classmethod
  75. def deserialize(cls, data, namespace, memo):
  76. return _deserialize(data, namespace, memo)
  77. try:
  78. STRING_TYPE = basestring
  79. except NameError: # Python 3
  80. STRING_TYPE = str
  81. import types
  82. from functools import wraps, partial
  83. from contextlib import contextmanager
  84. Str = type(u'')
  85. try:
  86. classtype = types.ClassType # Python2
  87. except AttributeError:
  88. classtype = type # Python3
  89. def smart_decorator(f, create_decorator):
  90. if isinstance(f, types.FunctionType):
  91. return wraps(f)(create_decorator(f, True))
  92. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  93. return wraps(f)(create_decorator(f, False))
  94. elif isinstance(f, types.MethodType):
  95. return wraps(f)(create_decorator(f.__func__, True))
  96. elif isinstance(f, partial):
  97. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  98. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  99. else:
  100. return create_decorator(f.__func__.__call__, True)
  101. try:
  102. import regex
  103. except ImportError:
  104. regex = None
  105. import sys, re
  106. Py36 = (sys.version_info[:2] >= (3, 6))
  107. import sre_parse
  108. import sre_constants
  109. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  110. def get_regexp_width(expr):
  111. if regex:
  112. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  113. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  114. # match here below.
  115. regexp_final = re.sub(categ_pattern, 'A', expr)
  116. else:
  117. if re.search(categ_pattern, expr):
  118. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  119. regexp_final = expr
  120. try:
  121. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  122. except sre_constants.error:
  123. raise ValueError(expr)
  124. ###}
  125. def dedup_list(l):
  126. """Given a list (l) will removing duplicates from the list,
  127. preserving the original order of the list. Assumes that
  128. the list entries are hashable."""
  129. dedup = set()
  130. return [ x for x in l if not (x in dedup or dedup.add(x))]
  131. try:
  132. from contextlib import suppress # Python 3
  133. except ImportError:
  134. @contextmanager
  135. def suppress(*excs):
  136. '''Catch and dismiss the provided exception
  137. >>> x = 'hello'
  138. >>> with suppress(IndexError):
  139. ... x = x[10]
  140. >>> x
  141. 'hello'
  142. '''
  143. try:
  144. yield
  145. except excs:
  146. pass
  147. try:
  148. compare = cmp
  149. except NameError:
  150. def compare(a, b):
  151. if a == b:
  152. return 0
  153. elif a > b:
  154. return 1
  155. return -1
  156. class Enumerator(Serialize):
  157. def __init__(self):
  158. self.enums = {}
  159. def get(self, item):
  160. if item not in self.enums:
  161. self.enums[item] = len(self.enums)
  162. return self.enums[item]
  163. def __len__(self):
  164. return len(self.enums)
  165. def reversed(self):
  166. r = {v: k for k, v in self.enums.items()}
  167. assert len(r) == len(self.enums)
  168. return r
  169. def eval_escaping(s):
  170. w = ''
  171. i = iter(s)
  172. for n in i:
  173. w += n
  174. if n == '\\':
  175. try:
  176. n2 = next(i)
  177. except StopIteration:
  178. raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
  179. if n2 == '\\':
  180. w += '\\\\'
  181. elif n2 not in 'uxnftr':
  182. w += '\\'
  183. w += n2
  184. w = w.replace('\\"', '"').replace("'", "\\'")
  185. to_eval = "u'''%s'''" % w
  186. try:
  187. s = literal_eval(to_eval)
  188. except SyntaxError as e:
  189. raise ValueError(s, e)
  190. return s
  191. def combine_alternatives(lists):
  192. """
  193. Accepts a list of alternatives, and enumerates all their possible concatinations.
  194. Examples:
  195. >>> combine_alternatives([range(2), [4,5]])
  196. [[0, 4], [0, 5], [1, 4], [1, 5]]
  197. >>> combine_alternatives(["abc", "xy", '$'])
  198. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  199. >>> combine_alternatives([])
  200. [[]]
  201. """
  202. if not lists:
  203. return [[]]
  204. assert all(l for l in lists), lists
  205. init = [[x] for x in lists[0]]
  206. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  207. class FS:
  208. open = open
  209. exists = os.path.exists
  210. def isascii(s):
  211. """ str.isascii only exists in python3.7+ """
  212. try:
  213. return s.isascii()
  214. except AttributeError:
  215. try:
  216. s.encode('ascii')
  217. return True
  218. except (UnicodeDecodeError, UnicodeEncodeError):
  219. return False
  220. class fzset(frozenset):
  221. def __repr__(self):
  222. return '{%s}' % ', '.join(map(repr, self))
  223. def classify_bool(seq, pred):
  224. true_elems = []
  225. false_elems = []
  226. for elem in seq:
  227. if pred(elem):
  228. true_elems.append(elem)
  229. else:
  230. false_elems.append(elem)
  231. return true_elems, false_elems
  232. def bfs(initial, expand):
  233. open_q = deque(list(initial))
  234. visited = set(open_q)
  235. while open_q:
  236. node = open_q.popleft()
  237. yield node
  238. for next_node in expand(node):
  239. if next_node not in visited:
  240. visited.add(next_node)
  241. open_q.append(next_node)
  242. def _serialize(value, memo):
  243. if isinstance(value, Serialize):
  244. return value.serialize(memo)
  245. elif isinstance(value, list):
  246. return [_serialize(elem, memo) for elem in value]
  247. elif isinstance(value, frozenset):
  248. return list(value) # TODO reversible?
  249. elif isinstance(value, dict):
  250. return {key:_serialize(elem, memo) for key, elem in value.items()}
  251. return value