This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

356 lines
9.8 KiB

  1. import sys
  2. import unicodedata
  3. import os
  4. from functools import reduce
  5. from ast import literal_eval
  6. from collections import deque
  7. ###{standalone
  8. import logging
  9. logger = logging.getLogger("lark")
  10. logger.addHandler(logging.StreamHandler())
  11. # Set to highest level, since we have some warnings amongst the code
  12. # By default, we should not output any log messages
  13. logger.setLevel(logging.CRITICAL)
  14. def is_id_continue(x):
  15. """
  16. Checks if all characters in `x` are alphanumeric characters (Unicode standard, so diactrics, Indian vowels, non-latin
  17. numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
  18. """
  19. if len(x) != 1:
  20. return all(is_id_continue(y) for y in x)
  21. return x == '_' or unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']
  22. def is_id_start(x):
  23. """
  24. Checks if all characters in `x` are alphabetic characters (Unicode standard, so diactrics, Indian vowels, non-latin
  25. numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
  26. """
  27. if len(x) != 1:
  28. return all(is_id_start(y) for y in x)
  29. return x == '_' or unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc']
  30. def classify(seq, key=None, value=None):
  31. d = {}
  32. for item in seq:
  33. k = key(item) if (key is not None) else item
  34. v = value(item) if (value is not None) else item
  35. if k in d:
  36. d[k].append(v)
  37. else:
  38. d[k] = [v]
  39. return d
  40. def _deserialize(data, namespace, memo):
  41. if isinstance(data, dict):
  42. if '__type__' in data: # Object
  43. class_ = namespace[data['__type__']]
  44. return class_.deserialize(data, memo)
  45. elif '@' in data:
  46. return memo[data['@']]
  47. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  48. elif isinstance(data, list):
  49. return [_deserialize(value, namespace, memo) for value in data]
  50. return data
  51. class Serialize(object):
  52. """Safe-ish serialization interface that doesn't rely on Pickle
  53. Attributes:
  54. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  55. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  56. Should include all field types that aren't builtin types.
  57. """
  58. def memo_serialize(self, types_to_memoize):
  59. memo = SerializeMemoizer(types_to_memoize)
  60. return self.serialize(memo), memo.serialize()
  61. def serialize(self, memo=None):
  62. if memo and memo.in_types(self):
  63. return {'@': memo.memoized.get(self)}
  64. fields = getattr(self, '__serialize_fields__')
  65. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  66. res['__type__'] = type(self).__name__
  67. postprocess = getattr(self, '_serialize', None)
  68. if postprocess:
  69. postprocess(res, memo)
  70. return res
  71. @classmethod
  72. def deserialize(cls, data, memo):
  73. namespace = getattr(cls, '__serialize_namespace__', {})
  74. namespace = {c.__name__:c for c in namespace}
  75. fields = getattr(cls, '__serialize_fields__')
  76. if '@' in data:
  77. return memo[data['@']]
  78. inst = cls.__new__(cls)
  79. for f in fields:
  80. try:
  81. setattr(inst, f, _deserialize(data[f], namespace, memo))
  82. except KeyError as e:
  83. raise KeyError("Cannot find key for class", cls, e)
  84. postprocess = getattr(inst, '_deserialize', None)
  85. if postprocess:
  86. postprocess()
  87. return inst
  88. class SerializeMemoizer(Serialize):
  89. "A version of serialize that memoizes objects to reduce space"
  90. __serialize_fields__ = 'memoized',
  91. def __init__(self, types_to_memoize):
  92. self.types_to_memoize = tuple(types_to_memoize)
  93. self.memoized = Enumerator()
  94. def in_types(self, value):
  95. return isinstance(value, self.types_to_memoize)
  96. def serialize(self):
  97. return _serialize(self.memoized.reversed(), None)
  98. @classmethod
  99. def deserialize(cls, data, namespace, memo):
  100. return _deserialize(data, namespace, memo)
  101. try:
  102. STRING_TYPE = basestring
  103. except NameError: # Python 3
  104. STRING_TYPE = str
  105. import types
  106. from functools import wraps, partial
  107. from contextlib import contextmanager
  108. Str = type(u'')
  109. try:
  110. classtype = types.ClassType # Python2
  111. except AttributeError:
  112. classtype = type # Python3
  113. def smart_decorator(f, create_decorator):
  114. if isinstance(f, types.FunctionType):
  115. return wraps(f)(create_decorator(f, True))
  116. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  117. return wraps(f)(create_decorator(f, False))
  118. elif isinstance(f, types.MethodType):
  119. return wraps(f)(create_decorator(f.__func__, True))
  120. elif isinstance(f, partial):
  121. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  122. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  123. else:
  124. return create_decorator(f.__func__.__call__, True)
  125. try:
  126. import regex
  127. except ImportError:
  128. regex = None
  129. import sys, re
  130. Py36 = (sys.version_info[:2] >= (3, 6))
  131. import sre_parse
  132. import sre_constants
  133. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  134. def get_regexp_width(expr):
  135. if regex:
  136. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  137. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  138. # match here below.
  139. regexp_final = re.sub(categ_pattern, 'A', expr)
  140. else:
  141. if re.search(categ_pattern, expr):
  142. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  143. regexp_final = expr
  144. try:
  145. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  146. except sre_constants.error:
  147. raise ValueError(expr)
  148. ###}
  149. def dedup_list(l):
  150. """Given a list (l) will removing duplicates from the list,
  151. preserving the original order of the list. Assumes that
  152. the list entries are hashable."""
  153. dedup = set()
  154. return [ x for x in l if not (x in dedup or dedup.add(x))]
  155. try:
  156. from contextlib import suppress # Python 3
  157. except ImportError:
  158. @contextmanager
  159. def suppress(*excs):
  160. '''Catch and dismiss the provided exception
  161. >>> x = 'hello'
  162. >>> with suppress(IndexError):
  163. ... x = x[10]
  164. >>> x
  165. 'hello'
  166. '''
  167. try:
  168. yield
  169. except excs:
  170. pass
  171. try:
  172. compare = cmp
  173. except NameError:
  174. def compare(a, b):
  175. if a == b:
  176. return 0
  177. elif a > b:
  178. return 1
  179. return -1
  180. class Enumerator(Serialize):
  181. def __init__(self):
  182. self.enums = {}
  183. def get(self, item):
  184. if item not in self.enums:
  185. self.enums[item] = len(self.enums)
  186. return self.enums[item]
  187. def __len__(self):
  188. return len(self.enums)
  189. def reversed(self):
  190. r = {v: k for k, v in self.enums.items()}
  191. assert len(r) == len(self.enums)
  192. return r
  193. def eval_escaping(s):
  194. w = ''
  195. i = iter(s)
  196. for n in i:
  197. w += n
  198. if n == '\\':
  199. try:
  200. n2 = next(i)
  201. except StopIteration:
  202. raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
  203. if n2 == '\\':
  204. w += '\\\\'
  205. elif n2 not in 'uxnftr':
  206. w += '\\'
  207. w += n2
  208. w = w.replace('\\"', '"').replace("'", "\\'")
  209. to_eval = "u'''%s'''" % w
  210. try:
  211. s = literal_eval(to_eval)
  212. except SyntaxError as e:
  213. raise ValueError(s, e)
  214. return s
  215. def combine_alternatives(lists):
  216. """
  217. Accepts a list of alternatives, and enumerates all their possible concatinations.
  218. Examples:
  219. >>> combine_alternatives([range(2), [4,5]])
  220. [[0, 4], [0, 5], [1, 4], [1, 5]]
  221. >>> combine_alternatives(["abc", "xy", '$'])
  222. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  223. >>> combine_alternatives([])
  224. [[]]
  225. """
  226. if not lists:
  227. return [[]]
  228. assert all(l for l in lists), lists
  229. init = [[x] for x in lists[0]]
  230. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  231. class FS:
  232. open = open
  233. exists = os.path.exists
  234. def isascii(s):
  235. """ str.isascii only exists in python3.7+ """
  236. try:
  237. return s.isascii()
  238. except AttributeError:
  239. try:
  240. s.encode('ascii')
  241. return True
  242. except (UnicodeDecodeError, UnicodeEncodeError):
  243. return False
  244. class fzset(frozenset):
  245. def __repr__(self):
  246. return '{%s}' % ', '.join(map(repr, self))
  247. def classify_bool(seq, pred):
  248. true_elems = []
  249. false_elems = []
  250. for elem in seq:
  251. if pred(elem):
  252. true_elems.append(elem)
  253. else:
  254. false_elems.append(elem)
  255. return true_elems, false_elems
  256. def bfs(initial, expand):
  257. open_q = deque(list(initial))
  258. visited = set(open_q)
  259. while open_q:
  260. node = open_q.popleft()
  261. yield node
  262. for next_node in expand(node):
  263. if next_node not in visited:
  264. visited.add(next_node)
  265. open_q.append(next_node)
  266. def _serialize(value, memo):
  267. if isinstance(value, Serialize):
  268. return value.serialize(memo)
  269. elif isinstance(value, list):
  270. return [_serialize(elem, memo) for elem in value]
  271. elif isinstance(value, frozenset):
  272. return list(value) # TODO reversible?
  273. elif isinstance(value, dict):
  274. return {key:_serialize(elem, memo) for key, elem in value.items()}
  275. return value