This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

308 lines
8.3 KiB

  1. import os
  2. from functools import reduce
  3. from collections import deque
  4. ###{standalone
  5. import sys, re
  6. import logging
  7. logger = logging.getLogger("lark")
  8. logger.addHandler(logging.StreamHandler())
  9. # Set to highest level, since we have some warnings amongst the code
  10. # By default, we should not output any log messages
  11. logger.setLevel(logging.CRITICAL)
  12. Py36 = (sys.version_info[:2] >= (3, 6))
  13. NO_VALUE = object()
  14. def classify(seq, key=None, value=None):
  15. d = {}
  16. for item in seq:
  17. k = key(item) if (key is not None) else item
  18. v = value(item) if (value is not None) else item
  19. if k in d:
  20. d[k].append(v)
  21. else:
  22. d[k] = [v]
  23. return d
  24. def _deserialize(data, namespace, memo):
  25. if isinstance(data, dict):
  26. if '__type__' in data: # Object
  27. class_ = namespace[data['__type__']]
  28. return class_.deserialize(data, memo)
  29. elif '@' in data:
  30. return memo[data['@']]
  31. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  32. elif isinstance(data, list):
  33. return [_deserialize(value, namespace, memo) for value in data]
  34. return data
  35. class Serialize(object):
  36. """Safe-ish serialization interface that doesn't rely on Pickle
  37. Attributes:
  38. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  39. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  40. Should include all field types that aren't builtin types.
  41. """
  42. def memo_serialize(self, types_to_memoize):
  43. memo = SerializeMemoizer(types_to_memoize)
  44. return self.serialize(memo), memo.serialize()
  45. def serialize(self, memo=None):
  46. if memo and memo.in_types(self):
  47. return {'@': memo.memoized.get(self)}
  48. fields = getattr(self, '__serialize_fields__')
  49. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  50. res['__type__'] = type(self).__name__
  51. postprocess = getattr(self, '_serialize', None)
  52. if postprocess:
  53. postprocess(res, memo)
  54. return res
  55. @classmethod
  56. def deserialize(cls, data, memo):
  57. namespace = getattr(cls, '__serialize_namespace__', {})
  58. namespace = {c.__name__:c for c in namespace}
  59. fields = getattr(cls, '__serialize_fields__')
  60. if '@' in data:
  61. return memo[data['@']]
  62. inst = cls.__new__(cls)
  63. for f in fields:
  64. try:
  65. setattr(inst, f, _deserialize(data[f], namespace, memo))
  66. except KeyError as e:
  67. raise KeyError("Cannot find key for class", cls, e)
  68. postprocess = getattr(inst, '_deserialize', None)
  69. if postprocess:
  70. postprocess()
  71. return inst
  72. class SerializeMemoizer(Serialize):
  73. "A version of serialize that memoizes objects to reduce space"
  74. __serialize_fields__ = 'memoized',
  75. def __init__(self, types_to_memoize):
  76. self.types_to_memoize = tuple(types_to_memoize)
  77. self.memoized = Enumerator()
  78. def in_types(self, value):
  79. return isinstance(value, self.types_to_memoize)
  80. def serialize(self):
  81. return _serialize(self.memoized.reversed(), None)
  82. @classmethod
  83. def deserialize(cls, data, namespace, memo):
  84. return _deserialize(data, namespace, memo)
  85. try:
  86. STRING_TYPE = basestring
  87. except NameError: # Python 3
  88. STRING_TYPE = str
  89. import types
  90. from functools import wraps, partial
  91. from contextlib import contextmanager
  92. Str = type(u'')
  93. try:
  94. classtype = types.ClassType # Python2
  95. except AttributeError:
  96. classtype = type # Python3
  97. def smart_decorator(f, create_decorator):
  98. if isinstance(f, types.FunctionType):
  99. return wraps(f)(create_decorator(f, True))
  100. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  101. return wraps(f)(create_decorator(f, False))
  102. elif isinstance(f, types.MethodType):
  103. return wraps(f)(create_decorator(f.__func__, True))
  104. elif isinstance(f, partial):
  105. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  106. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  107. else:
  108. return create_decorator(f.__func__.__call__, True)
  109. try:
  110. import regex
  111. except ImportError:
  112. regex = None
  113. import sre_parse
  114. import sre_constants
  115. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  116. def get_regexp_width(expr):
  117. if regex:
  118. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  119. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  120. # match here below.
  121. regexp_final = re.sub(categ_pattern, 'A', expr)
  122. else:
  123. if re.search(categ_pattern, expr):
  124. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  125. regexp_final = expr
  126. try:
  127. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  128. except sre_constants.error:
  129. raise ValueError(expr)
  130. ###}
  131. def dedup_list(l):
  132. """Given a list (l) will removing duplicates from the list,
  133. preserving the original order of the list. Assumes that
  134. the list entries are hashable."""
  135. dedup = set()
  136. return [x for x in l if not (x in dedup or dedup.add(x))]
  137. try:
  138. from contextlib import suppress # Python 3
  139. except ImportError:
  140. @contextmanager
  141. def suppress(*excs):
  142. '''Catch and dismiss the provided exception
  143. >>> x = 'hello'
  144. >>> with suppress(IndexError):
  145. ... x = x[10]
  146. >>> x
  147. 'hello'
  148. '''
  149. try:
  150. yield
  151. except excs:
  152. pass
  153. try:
  154. compare = cmp
  155. except NameError:
  156. def compare(a, b):
  157. if a == b:
  158. return 0
  159. elif a > b:
  160. return 1
  161. return -1
  162. class Enumerator(Serialize):
  163. def __init__(self):
  164. self.enums = {}
  165. def get(self, item):
  166. if item not in self.enums:
  167. self.enums[item] = len(self.enums)
  168. return self.enums[item]
  169. def __len__(self):
  170. return len(self.enums)
  171. def reversed(self):
  172. r = {v: k for k, v in self.enums.items()}
  173. assert len(r) == len(self.enums)
  174. return r
  175. def combine_alternatives(lists):
  176. """
  177. Accepts a list of alternatives, and enumerates all their possible concatinations.
  178. Examples:
  179. >>> combine_alternatives([range(2), [4,5]])
  180. [[0, 4], [0, 5], [1, 4], [1, 5]]
  181. >>> combine_alternatives(["abc", "xy", '$'])
  182. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  183. >>> combine_alternatives([])
  184. [[]]
  185. """
  186. if not lists:
  187. return [[]]
  188. assert all(l for l in lists), lists
  189. init = [[x] for x in lists[0]]
  190. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  191. class FS:
  192. open = open
  193. exists = os.path.exists
  194. def isascii(s):
  195. """ str.isascii only exists in python3.7+ """
  196. try:
  197. return s.isascii()
  198. except AttributeError:
  199. try:
  200. s.encode('ascii')
  201. return True
  202. except (UnicodeDecodeError, UnicodeEncodeError):
  203. return False
  204. class fzset(frozenset):
  205. def __repr__(self):
  206. return '{%s}' % ', '.join(map(repr, self))
  207. def classify_bool(seq, pred):
  208. true_elems = []
  209. false_elems = []
  210. for elem in seq:
  211. if pred(elem):
  212. true_elems.append(elem)
  213. else:
  214. false_elems.append(elem)
  215. return true_elems, false_elems
  216. def bfs(initial, expand):
  217. open_q = deque(list(initial))
  218. visited = set(open_q)
  219. while open_q:
  220. node = open_q.popleft()
  221. yield node
  222. for next_node in expand(node):
  223. if next_node not in visited:
  224. visited.add(next_node)
  225. open_q.append(next_node)
  226. def _serialize(value, memo):
  227. if isinstance(value, Serialize):
  228. return value.serialize(memo)
  229. elif isinstance(value, list):
  230. return [_serialize(elem, memo) for elem in value]
  231. elif isinstance(value, frozenset):
  232. return list(value) # TODO reversible?
  233. elif isinstance(value, dict):
  234. return {key:_serialize(elem, memo) for key, elem in value.items()}
  235. # assert value is None or isinstance(value, (int, float, str, tuple)), value
  236. return value