This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

348 lines
9.2 KiB

  1. import sys
  2. import unicodedata
  3. import os
  4. from functools import reduce
  5. from ast import literal_eval
  6. from collections import deque
  7. ###{standalone
  8. import logging
  9. logger = logging.getLogger("lark")
  10. logger.addHandler(logging.StreamHandler())
  11. # Set to highest level, since we have some warnings amongst the code
  12. # By default, we should not output any log messages
  13. logger.setLevel(logging.CRITICAL)
  14. def isalnum(x):
  15. if len(x) != 1:
  16. return all(isalnum(y) for y in x)
  17. return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']
  18. def isalpha(x):
  19. if len(x) != 1:
  20. return all(isalpha(y) for y in x)
  21. return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc']
  22. def classify(seq, key=None, value=None):
  23. d = {}
  24. for item in seq:
  25. k = key(item) if (key is not None) else item
  26. v = value(item) if (value is not None) else item
  27. if k in d:
  28. d[k].append(v)
  29. else:
  30. d[k] = [v]
  31. return d
  32. def _deserialize(data, namespace, memo):
  33. if isinstance(data, dict):
  34. if '__type__' in data: # Object
  35. class_ = namespace[data['__type__']]
  36. return class_.deserialize(data, memo)
  37. elif '@' in data:
  38. return memo[data['@']]
  39. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  40. elif isinstance(data, list):
  41. return [_deserialize(value, namespace, memo) for value in data]
  42. return data
  43. class Serialize(object):
  44. """Safe-ish serialization interface that doesn't rely on Pickle
  45. Attributes:
  46. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  47. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  48. Should include all field types that aren't builtin types.
  49. """
  50. def memo_serialize(self, types_to_memoize):
  51. memo = SerializeMemoizer(types_to_memoize)
  52. return self.serialize(memo), memo.serialize()
  53. def serialize(self, memo=None):
  54. if memo and memo.in_types(self):
  55. return {'@': memo.memoized.get(self)}
  56. fields = getattr(self, '__serialize_fields__')
  57. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  58. res['__type__'] = type(self).__name__
  59. postprocess = getattr(self, '_serialize', None)
  60. if postprocess:
  61. postprocess(res, memo)
  62. return res
  63. @classmethod
  64. def deserialize(cls, data, memo):
  65. namespace = getattr(cls, '__serialize_namespace__', {})
  66. namespace = {c.__name__:c for c in namespace}
  67. fields = getattr(cls, '__serialize_fields__')
  68. if '@' in data:
  69. return memo[data['@']]
  70. inst = cls.__new__(cls)
  71. for f in fields:
  72. try:
  73. setattr(inst, f, _deserialize(data[f], namespace, memo))
  74. except KeyError as e:
  75. raise KeyError("Cannot find key for class", cls, e)
  76. postprocess = getattr(inst, '_deserialize', None)
  77. if postprocess:
  78. postprocess()
  79. return inst
  80. class SerializeMemoizer(Serialize):
  81. "A version of serialize that memoizes objects to reduce space"
  82. __serialize_fields__ = 'memoized',
  83. def __init__(self, types_to_memoize):
  84. self.types_to_memoize = tuple(types_to_memoize)
  85. self.memoized = Enumerator()
  86. def in_types(self, value):
  87. return isinstance(value, self.types_to_memoize)
  88. def serialize(self):
  89. return _serialize(self.memoized.reversed(), None)
  90. @classmethod
  91. def deserialize(cls, data, namespace, memo):
  92. return _deserialize(data, namespace, memo)
  93. try:
  94. STRING_TYPE = basestring
  95. except NameError: # Python 3
  96. STRING_TYPE = str
  97. import types
  98. from functools import wraps, partial
  99. from contextlib import contextmanager
  100. Str = type(u'')
  101. try:
  102. classtype = types.ClassType # Python2
  103. except AttributeError:
  104. classtype = type # Python3
  105. def smart_decorator(f, create_decorator):
  106. if isinstance(f, types.FunctionType):
  107. return wraps(f)(create_decorator(f, True))
  108. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  109. return wraps(f)(create_decorator(f, False))
  110. elif isinstance(f, types.MethodType):
  111. return wraps(f)(create_decorator(f.__func__, True))
  112. elif isinstance(f, partial):
  113. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  114. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  115. else:
  116. return create_decorator(f.__func__.__call__, True)
  117. try:
  118. import regex
  119. except ImportError:
  120. regex = None
  121. import sys, re
  122. Py36 = (sys.version_info[:2] >= (3, 6))
  123. import sre_parse
  124. import sre_constants
  125. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  126. def get_regexp_width(expr):
  127. if regex:
  128. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  129. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  130. # match here below.
  131. regexp_final = re.sub(categ_pattern, 'A', expr)
  132. else:
  133. if re.search(categ_pattern, expr):
  134. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  135. regexp_final = expr
  136. try:
  137. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  138. except sre_constants.error:
  139. raise ValueError(expr)
  140. ###}
  141. def dedup_list(l):
  142. """Given a list (l) will removing duplicates from the list,
  143. preserving the original order of the list. Assumes that
  144. the list entries are hashable."""
  145. dedup = set()
  146. return [ x for x in l if not (x in dedup or dedup.add(x))]
  147. try:
  148. from contextlib import suppress # Python 3
  149. except ImportError:
  150. @contextmanager
  151. def suppress(*excs):
  152. '''Catch and dismiss the provided exception
  153. >>> x = 'hello'
  154. >>> with suppress(IndexError):
  155. ... x = x[10]
  156. >>> x
  157. 'hello'
  158. '''
  159. try:
  160. yield
  161. except excs:
  162. pass
  163. try:
  164. compare = cmp
  165. except NameError:
  166. def compare(a, b):
  167. if a == b:
  168. return 0
  169. elif a > b:
  170. return 1
  171. return -1
  172. class Enumerator(Serialize):
  173. def __init__(self):
  174. self.enums = {}
  175. def get(self, item):
  176. if item not in self.enums:
  177. self.enums[item] = len(self.enums)
  178. return self.enums[item]
  179. def __len__(self):
  180. return len(self.enums)
  181. def reversed(self):
  182. r = {v: k for k, v in self.enums.items()}
  183. assert len(r) == len(self.enums)
  184. return r
  185. def eval_escaping(s):
  186. w = ''
  187. i = iter(s)
  188. for n in i:
  189. w += n
  190. if n == '\\':
  191. try:
  192. n2 = next(i)
  193. except StopIteration:
  194. raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
  195. if n2 == '\\':
  196. w += '\\\\'
  197. elif n2 not in 'uxnftr':
  198. w += '\\'
  199. w += n2
  200. w = w.replace('\\"', '"').replace("'", "\\'")
  201. to_eval = "u'''%s'''" % w
  202. try:
  203. s = literal_eval(to_eval)
  204. except SyntaxError as e:
  205. raise ValueError(s, e)
  206. return s
  207. def combine_alternatives(lists):
  208. """
  209. Accepts a list of alternatives, and enumerates all their possible concatinations.
  210. Examples:
  211. >>> combine_alternatives([range(2), [4,5]])
  212. [[0, 4], [0, 5], [1, 4], [1, 5]]
  213. >>> combine_alternatives(["abc", "xy", '$'])
  214. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  215. >>> combine_alternatives([])
  216. [[]]
  217. """
  218. if not lists:
  219. return [[]]
  220. assert all(l for l in lists), lists
  221. init = [[x] for x in lists[0]]
  222. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  223. class FS:
  224. open = open
  225. exists = os.path.exists
  226. def isascii(s):
  227. """ str.isascii only exists in python3.7+ """
  228. try:
  229. return s.isascii()
  230. except AttributeError:
  231. try:
  232. s.encode('ascii')
  233. return True
  234. except (UnicodeDecodeError, UnicodeEncodeError):
  235. return False
  236. class fzset(frozenset):
  237. def __repr__(self):
  238. return '{%s}' % ', '.join(map(repr, self))
  239. def classify_bool(seq, pred):
  240. true_elems = []
  241. false_elems = []
  242. for elem in seq:
  243. if pred(elem):
  244. true_elems.append(elem)
  245. else:
  246. false_elems.append(elem)
  247. return true_elems, false_elems
  248. def bfs(initial, expand):
  249. open_q = deque(list(initial))
  250. visited = set(open_q)
  251. while open_q:
  252. node = open_q.popleft()
  253. yield node
  254. for next_node in expand(node):
  255. if next_node not in visited:
  256. visited.add(next_node)
  257. open_q.append(next_node)
  258. def _serialize(value, memo):
  259. if isinstance(value, Serialize):
  260. return value.serialize(memo)
  261. elif isinstance(value, list):
  262. return [_serialize(elem, memo) for elem in value]
  263. elif isinstance(value, frozenset):
  264. return list(value) # TODO reversible?
  265. elif isinstance(value, dict):
  266. return {key:_serialize(elem, memo) for key, elem in value.items()}
  267. return value