This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2357 lines
84 KiB

  1. # The file was automatically generated by Lark v0.9.0
  2. __version__ = "0.9.0"
  3. #
  4. #
  5. # Lark Stand-alone Generator Tool
  6. # ----------------------------------
  7. # Generates a stand-alone LALR(1) parser with a standard lexer
  8. #
  9. # Git: https://github.com/erezsh/lark
  10. # Author: Erez Shinan (erezshin@gmail.com)
  11. #
  12. #
  13. # >>> LICENSE
  14. #
  15. # This tool and its generated code use a separate license from Lark,
  16. # and are subject to the terms of the Mozilla Public License, v. 2.0.
  17. # If a copy of the MPL was not distributed with this
  18. # file, You can obtain one at https://mozilla.org/MPL/2.0/.
  19. #
  20. # If you wish to purchase a commercial license for this tool and its
  21. # generated code, you may contact me via email or otherwise.
  22. #
  23. # If MPL2 is incompatible with your free or open-source project,
  24. # contact me and we'll work it out.
  25. #
  26. #
  27. import os
  28. from io import open
  29. class LarkError(Exception):
  30. pass
  31. class GrammarError(LarkError):
  32. pass
  33. class ParseError(LarkError):
  34. pass
  35. class LexError(LarkError):
  36. pass
  37. class UnexpectedEOF(ParseError):
  38. def __init__(self, expected):
  39. self.expected = expected
  40. message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
  41. super(UnexpectedEOF, self).__init__(message)
  42. class UnexpectedInput(LarkError):
  43. #--
  44. pos_in_stream = None
  45. def get_context(self, text, span=40):
  46. #--
  47. pos = self.pos_in_stream
  48. start = max(pos - span, 0)
  49. end = pos + span
  50. if not isinstance(text, bytes):
  51. before = text[start:pos].rsplit('\n', 1)[-1]
  52. after = text[pos:end].split('\n', 1)[0]
  53. return before + after + '\n' + ' ' * len(before) + '^\n'
  54. else:
  55. before = text[start:pos].rsplit(b'\n', 1)[-1]
  56. after = text[pos:end].split(b'\n', 1)[0]
  57. return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace")
  58. def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
  59. #--
  60. assert self.state is not None, "Not supported for this exception"
  61. if isinstance(examples, dict):
  62. examples = examples.items()
  63. candidate = (None, False)
  64. for i, (label, example) in enumerate(examples):
  65. assert not isinstance(example, STRING_TYPE)
  66. for j, malformed in enumerate(example):
  67. try:
  68. parse_fn(malformed)
  69. except UnexpectedInput as ut:
  70. if ut.state == self.state:
  71. if use_accepts and ut.accepts != self.accepts:
  72. logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
  73. (self.state, self.accepts, ut.accepts, i, j))
  74. continue
  75. try:
  76. if ut.token == self.token: ##
  77. logger.debug("Exact Match at example [%s][%s]" % (i, j))
  78. return label
  79. if token_type_match_fallback:
  80. ##
  81. if (ut.token.type == self.token.type) and not candidate[-1]:
  82. logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
  83. candidate = label, True
  84. except AttributeError:
  85. pass
  86. if not candidate[0]:
  87. logger.debug("Same State match at example [%s][%s]" % (i, j))
  88. candidate = label, False
  89. return candidate[0]
  90. class UnexpectedCharacters(LexError, UnexpectedInput):
  91. def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
  92. self.line = line
  93. self.column = column
  94. self.pos_in_stream = lex_pos
  95. self.state = state
  96. self.allowed = allowed
  97. self.considered_tokens = considered_tokens
  98. if isinstance(seq, bytes):
  99. _s = seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace")
  100. else:
  101. _s = seq[lex_pos]
  102. message = "No terminal defined for '%s' at line %d col %d" % (_s, line, column)
  103. message += '\n\n' + self.get_context(seq)
  104. if allowed:
  105. message += '\nExpecting: %s\n' % allowed
  106. if token_history:
  107. message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history)
  108. super(UnexpectedCharacters, self).__init__(message)
  109. class UnexpectedToken(ParseError, UnexpectedInput):
  110. #--
  111. def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
  112. self.line = getattr(token, 'line', '?')
  113. self.column = getattr(token, 'column', '?')
  114. self.pos_in_stream = getattr(token, 'pos_in_stream', None)
  115. self.state = state
  116. self.token = token
  117. self.expected = expected ##
  118. self.considered_rules = considered_rules
  119. self.puppet = puppet
  120. ##
  121. ##
  122. self.accepts = puppet and puppet.accepts()
  123. message = ("Unexpected token %r at line %s, column %s.\n"
  124. "Expected one of: \n\t* %s\n"
  125. % (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected)))
  126. super(UnexpectedToken, self).__init__(message)
  127. class VisitError(LarkError):
  128. #--
  129. def __init__(self, rule, obj, orig_exc):
  130. self.obj = obj
  131. self.orig_exc = orig_exc
  132. message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
  133. super(VisitError, self).__init__(message)
  134. import logging
  135. logger = logging.getLogger("lark")
  136. logger.addHandler(logging.StreamHandler())
  137. ##
  138. ##
  139. logger.setLevel(logging.CRITICAL)
  140. def classify(seq, key=None, value=None):
  141. d = {}
  142. for item in seq:
  143. k = key(item) if (key is not None) else item
  144. v = value(item) if (value is not None) else item
  145. if k in d:
  146. d[k].append(v)
  147. else:
  148. d[k] = [v]
  149. return d
  150. def _deserialize(data, namespace, memo):
  151. if isinstance(data, dict):
  152. if '__type__' in data: ##
  153. class_ = namespace[data['__type__']]
  154. return class_.deserialize(data, memo)
  155. elif '@' in data:
  156. return memo[data['@']]
  157. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  158. elif isinstance(data, list):
  159. return [_deserialize(value, namespace, memo) for value in data]
  160. return data
  161. class Serialize(object):
  162. def memo_serialize(self, types_to_memoize):
  163. memo = SerializeMemoizer(types_to_memoize)
  164. return self.serialize(memo), memo.serialize()
  165. def serialize(self, memo=None):
  166. if memo and memo.in_types(self):
  167. return {'@': memo.memoized.get(self)}
  168. fields = getattr(self, '__serialize_fields__')
  169. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  170. res['__type__'] = type(self).__name__
  171. postprocess = getattr(self, '_serialize', None)
  172. if postprocess:
  173. postprocess(res, memo)
  174. return res
  175. @classmethod
  176. def deserialize(cls, data, memo):
  177. namespace = getattr(cls, '__serialize_namespace__', {})
  178. namespace = {c.__name__:c for c in namespace}
  179. fields = getattr(cls, '__serialize_fields__')
  180. if '@' in data:
  181. return memo[data['@']]
  182. inst = cls.__new__(cls)
  183. for f in fields:
  184. try:
  185. setattr(inst, f, _deserialize(data[f], namespace, memo))
  186. except KeyError as e:
  187. raise KeyError("Cannot find key for class", cls, e)
  188. postprocess = getattr(inst, '_deserialize', None)
  189. if postprocess:
  190. postprocess()
  191. return inst
  192. class SerializeMemoizer(Serialize):
  193. __serialize_fields__ = 'memoized',
  194. def __init__(self, types_to_memoize):
  195. self.types_to_memoize = tuple(types_to_memoize)
  196. self.memoized = Enumerator()
  197. def in_types(self, value):
  198. return isinstance(value, self.types_to_memoize)
  199. def serialize(self):
  200. return _serialize(self.memoized.reversed(), None)
  201. @classmethod
  202. def deserialize(cls, data, namespace, memo):
  203. return _deserialize(data, namespace, memo)
  204. try:
  205. STRING_TYPE = basestring
  206. except NameError: ##
  207. STRING_TYPE = str
  208. import types
  209. from functools import wraps, partial
  210. from contextlib import contextmanager
  211. Str = type(u'')
  212. try:
  213. classtype = types.ClassType ##
  214. except AttributeError:
  215. classtype = type ##
  216. def smart_decorator(f, create_decorator):
  217. if isinstance(f, types.FunctionType):
  218. return wraps(f)(create_decorator(f, True))
  219. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  220. return wraps(f)(create_decorator(f, False))
  221. elif isinstance(f, types.MethodType):
  222. return wraps(f)(create_decorator(f.__func__, True))
  223. elif isinstance(f, partial):
  224. ##
  225. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  226. else:
  227. return create_decorator(f.__func__.__call__, True)
  228. try:
  229. import regex
  230. except ImportError:
  231. regex = None
  232. import sys, re
  233. Py36 = (sys.version_info[:2] >= (3, 6))
  234. import sre_parse
  235. import sre_constants
  236. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  237. def get_regexp_width(expr):
  238. if regex:
  239. ##
  240. ##
  241. ##
  242. regexp_final = re.sub(categ_pattern, 'A', expr)
  243. else:
  244. if re.search(categ_pattern, expr):
  245. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  246. regexp_final = expr
  247. try:
  248. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  249. except sre_constants.error:
  250. raise ValueError(expr)
  251. from collections import OrderedDict
  252. class Meta:
  253. def __init__(self):
  254. self.empty = True
  255. class Tree(object):
  256. #--
  257. def __init__(self, data, children, meta=None):
  258. self.data = data
  259. self.children = children
  260. self._meta = meta
  261. @property
  262. def meta(self):
  263. if self._meta is None:
  264. self._meta = Meta()
  265. return self._meta
  266. def __repr__(self):
  267. return 'Tree(%s, %s)' % (self.data, self.children)
  268. def _pretty_label(self):
  269. return self.data
  270. def _pretty(self, level, indent_str):
  271. if len(self.children) == 1 and not isinstance(self.children[0], Tree):
  272. return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
  273. l = [ indent_str*level, self._pretty_label(), '\n' ]
  274. for n in self.children:
  275. if isinstance(n, Tree):
  276. l += n._pretty(level+1, indent_str)
  277. else:
  278. l += [ indent_str*(level+1), '%s' % (n,), '\n' ]
  279. return l
  280. def pretty(self, indent_str=' '):
  281. #--
  282. return ''.join(self._pretty(0, indent_str))
  283. def __eq__(self, other):
  284. try:
  285. return self.data == other.data and self.children == other.children
  286. except AttributeError:
  287. return False
  288. def __ne__(self, other):
  289. return not (self == other)
  290. def __hash__(self):
  291. return hash((self.data, tuple(self.children)))
  292. def iter_subtrees(self):
  293. #--
  294. queue = [self]
  295. subtrees = OrderedDict()
  296. for subtree in queue:
  297. subtrees[id(subtree)] = subtree
  298. queue += [c for c in reversed(subtree.children)
  299. if isinstance(c, Tree) and id(c) not in subtrees]
  300. del queue
  301. return reversed(list(subtrees.values()))
  302. def find_pred(self, pred):
  303. #--
  304. return filter(pred, self.iter_subtrees())
  305. def find_data(self, data):
  306. #--
  307. return self.find_pred(lambda t: t.data == data)
  308. from inspect import getmembers, getmro
  309. class Discard(Exception):
  310. #--
  311. pass
  312. ##
  313. class _Decoratable:
  314. #--
  315. @classmethod
  316. def _apply_decorator(cls, decorator, **kwargs):
  317. mro = getmro(cls)
  318. assert mro[0] is cls
  319. libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
  320. for name, value in getmembers(cls):
  321. ##
  322. if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
  323. continue
  324. if not callable(value):
  325. continue
  326. ##
  327. if hasattr(cls.__dict__[name], 'vargs_applied') or hasattr(value, 'vargs_applied'):
  328. continue
  329. static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
  330. setattr(cls, name, decorator(value, static=static, **kwargs))
  331. return cls
  332. def __class_getitem__(cls, _):
  333. return cls
  334. class Transformer(_Decoratable):
  335. #--
  336. __visit_tokens__ = True ##
  337. def __init__(self, visit_tokens=True):
  338. self.__visit_tokens__ = visit_tokens
  339. def _call_userfunc(self, tree, new_children=None):
  340. ##
  341. children = new_children if new_children is not None else tree.children
  342. try:
  343. f = getattr(self, tree.data)
  344. except AttributeError:
  345. return self.__default__(tree.data, children, tree.meta)
  346. else:
  347. try:
  348. wrapper = getattr(f, 'visit_wrapper', None)
  349. if wrapper is not None:
  350. return f.visit_wrapper(f, tree.data, children, tree.meta)
  351. else:
  352. return f(children)
  353. except (GrammarError, Discard):
  354. raise
  355. except Exception as e:
  356. raise VisitError(tree.data, tree, e)
  357. def _call_userfunc_token(self, token):
  358. try:
  359. f = getattr(self, token.type)
  360. except AttributeError:
  361. return self.__default_token__(token)
  362. else:
  363. try:
  364. return f(token)
  365. except (GrammarError, Discard):
  366. raise
  367. except Exception as e:
  368. raise VisitError(token.type, token, e)
  369. def _transform_children(self, children):
  370. for c in children:
  371. try:
  372. if isinstance(c, Tree):
  373. yield self._transform_tree(c)
  374. elif self.__visit_tokens__ and isinstance(c, Token):
  375. yield self._call_userfunc_token(c)
  376. else:
  377. yield c
  378. except Discard:
  379. pass
  380. def _transform_tree(self, tree):
  381. children = list(self._transform_children(tree.children))
  382. return self._call_userfunc(tree, children)
  383. def transform(self, tree):
  384. return self._transform_tree(tree)
  385. def __mul__(self, other):
  386. return TransformerChain(self, other)
  387. def __default__(self, data, children, meta):
  388. #--
  389. return Tree(data, children, meta)
  390. def __default_token__(self, token):
  391. #--
  392. return token
  393. class InlineTransformer(Transformer): ##
  394. def _call_userfunc(self, tree, new_children=None):
  395. ##
  396. children = new_children if new_children is not None else tree.children
  397. try:
  398. f = getattr(self, tree.data)
  399. except AttributeError:
  400. return self.__default__(tree.data, children, tree.meta)
  401. else:
  402. return f(*children)
  403. class TransformerChain(object):
  404. def __init__(self, *transformers):
  405. self.transformers = transformers
  406. def transform(self, tree):
  407. for t in self.transformers:
  408. tree = t.transform(tree)
  409. return tree
  410. def __mul__(self, other):
  411. return TransformerChain(*self.transformers + (other,))
  412. class Transformer_InPlace(Transformer):
  413. #--
  414. def _transform_tree(self, tree): ##
  415. return self._call_userfunc(tree)
  416. def transform(self, tree):
  417. for subtree in tree.iter_subtrees():
  418. subtree.children = list(self._transform_children(subtree.children))
  419. return self._transform_tree(tree)
  420. class Transformer_NonRecursive(Transformer):
  421. #--
  422. def transform(self, tree):
  423. ##
  424. rev_postfix = []
  425. q = [tree]
  426. while q:
  427. t = q.pop()
  428. rev_postfix.append( t )
  429. if isinstance(t, Tree):
  430. q += t.children
  431. ##
  432. stack = []
  433. for x in reversed(rev_postfix):
  434. if isinstance(x, Tree):
  435. size = len(x.children)
  436. if size:
  437. args = stack[-size:]
  438. del stack[-size:]
  439. else:
  440. args = []
  441. stack.append(self._call_userfunc(x, args))
  442. else:
  443. stack.append(x)
  444. t ,= stack ##
  445. return t
  446. class Transformer_InPlaceRecursive(Transformer):
  447. #--
  448. def _transform_tree(self, tree):
  449. tree.children = list(self._transform_children(tree.children))
  450. return self._call_userfunc(tree)
  451. ##
  452. class VisitorBase:
  453. def _call_userfunc(self, tree):
  454. return getattr(self, tree.data, self.__default__)(tree)
  455. def __default__(self, tree):
  456. #--
  457. return tree
  458. def __class_getitem__(cls, _):
  459. return cls
  460. class Visitor(VisitorBase):
  461. #--
  462. def visit(self, tree):
  463. for subtree in tree.iter_subtrees():
  464. self._call_userfunc(subtree)
  465. return tree
  466. def visit_topdown(self,tree):
  467. for subtree in tree.iter_subtrees_topdown():
  468. self._call_userfunc(subtree)
  469. return tree
  470. class Visitor_Recursive(VisitorBase):
  471. #--
  472. def visit(self, tree):
  473. for child in tree.children:
  474. if isinstance(child, Tree):
  475. self.visit(child)
  476. self._call_userfunc(tree)
  477. return tree
  478. def visit_topdown(self,tree):
  479. self._call_userfunc(tree)
  480. for child in tree.children:
  481. if isinstance(child, Tree):
  482. self.visit_topdown(child)
  483. return tree
  484. def visit_children_decor(func):
  485. #--
  486. @wraps(func)
  487. def inner(cls, tree):
  488. values = cls.visit_children(tree)
  489. return func(cls, values)
  490. return inner
  491. class Interpreter(_Decoratable):
  492. #--
  493. def visit(self, tree):
  494. f = getattr(self, tree.data)
  495. wrapper = getattr(f, 'visit_wrapper', None)
  496. if wrapper is not None:
  497. return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
  498. else:
  499. return f(tree)
  500. def visit_children(self, tree):
  501. return [self.visit(child) if isinstance(child, Tree) else child
  502. for child in tree.children]
  503. def __getattr__(self, name):
  504. return self.__default__
  505. def __default__(self, tree):
  506. return self.visit_children(tree)
  507. ##
  508. def _apply_decorator(obj, decorator, **kwargs):
  509. try:
  510. _apply = obj._apply_decorator
  511. except AttributeError:
  512. return decorator(obj, **kwargs)
  513. else:
  514. return _apply(decorator, **kwargs)
  515. def _inline_args__func(func):
  516. @wraps(func)
  517. def create_decorator(_f, with_self):
  518. if with_self:
  519. def f(self, children):
  520. return _f(self, *children)
  521. else:
  522. def f(self, children):
  523. return _f(*children)
  524. return f
  525. return smart_decorator(func, create_decorator)
  526. def inline_args(obj): ##
  527. return _apply_decorator(obj, _inline_args__func)
  528. def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
  529. def create_decorator(_f, with_self):
  530. if with_self:
  531. def f(self, *args, **kwargs):
  532. return _f(self, *args, **kwargs)
  533. else:
  534. def f(self, *args, **kwargs):
  535. return _f(*args, **kwargs)
  536. return f
  537. if static:
  538. f = wraps(func)(create_decorator(func, False))
  539. else:
  540. f = smart_decorator(func, create_decorator)
  541. f.vargs_applied = True
  542. f.visit_wrapper = visit_wrapper
  543. return f
  544. def _vargs_inline(f, data, children, meta):
  545. return f(*children)
  546. def _vargs_meta_inline(f, data, children, meta):
  547. return f(meta, *children)
  548. def _vargs_meta(f, data, children, meta):
  549. return f(children, meta) ##
  550. def _vargs_tree(f, data, children, meta):
  551. return f(Tree(data, children, meta))
  552. def v_args(inline=False, meta=False, tree=False, wrapper=None):
  553. #--
  554. if tree and (meta or inline):
  555. raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
  556. func = None
  557. if meta:
  558. if inline:
  559. func = _vargs_meta_inline
  560. else:
  561. func = _vargs_meta
  562. elif inline:
  563. func = _vargs_inline
  564. elif tree:
  565. func = _vargs_tree
  566. if wrapper is not None:
  567. if func is not None:
  568. raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
  569. func = wrapper
  570. def _visitor_args_dec(obj):
  571. return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func)
  572. return _visitor_args_dec
  573. class Indenter:
  574. def __init__(self):
  575. self.paren_level = None
  576. self.indent_level = None
  577. assert self.tab_len > 0
  578. def handle_NL(self, token):
  579. if self.paren_level > 0:
  580. return
  581. yield token
  582. indent_str = token.rsplit('\n', 1)[1] ##
  583. indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
  584. if indent > self.indent_level[-1]:
  585. self.indent_level.append(indent)
  586. yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
  587. else:
  588. while indent < self.indent_level[-1]:
  589. self.indent_level.pop()
  590. yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
  591. assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1])
  592. def _process(self, stream):
  593. for token in stream:
  594. if token.type == self.NL_type:
  595. for t in self.handle_NL(token):
  596. yield t
  597. else:
  598. yield token
  599. if token.type in self.OPEN_PAREN_types:
  600. self.paren_level += 1
  601. elif token.type in self.CLOSE_PAREN_types:
  602. self.paren_level -= 1
  603. assert self.paren_level >= 0
  604. while len(self.indent_level) > 1:
  605. self.indent_level.pop()
  606. yield Token(self.DEDENT_type, '')
  607. assert self.indent_level == [0], self.indent_level
  608. def process(self, stream):
  609. self.paren_level = 0
  610. self.indent_level = [0]
  611. return self._process(stream)
  612. ##
  613. @property
  614. def always_accept(self):
  615. return (self.NL_type,)
  616. class Symbol(Serialize):
  617. __slots__ = ('name',)
  618. is_term = NotImplemented
  619. def __init__(self, name):
  620. self.name = name
  621. def __eq__(self, other):
  622. assert isinstance(other, Symbol), other
  623. return self.is_term == other.is_term and self.name == other.name
  624. def __ne__(self, other):
  625. return not (self == other)
  626. def __hash__(self):
  627. return hash(self.name)
  628. def __repr__(self):
  629. return '%s(%r)' % (type(self).__name__, self.name)
  630. fullrepr = property(__repr__)
  631. class Terminal(Symbol):
  632. __serialize_fields__ = 'name', 'filter_out'
  633. is_term = True
  634. def __init__(self, name, filter_out=False):
  635. self.name = name
  636. self.filter_out = filter_out
  637. @property
  638. def fullrepr(self):
  639. return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
  640. class NonTerminal(Symbol):
  641. __serialize_fields__ = 'name',
  642. is_term = False
  643. class RuleOptions(Serialize):
  644. __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
  645. def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()):
  646. self.keep_all_tokens = keep_all_tokens
  647. self.expand1 = expand1
  648. self.priority = priority
  649. self.template_source = template_source
  650. self.empty_indices = empty_indices
  651. def __repr__(self):
  652. return 'RuleOptions(%r, %r, %r, %r)' % (
  653. self.keep_all_tokens,
  654. self.expand1,
  655. self.priority,
  656. self.template_source
  657. )
  658. class Rule(Serialize):
  659. #--
  660. __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
  661. __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
  662. __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
  663. def __init__(self, origin, expansion, order=0, alias=None, options=None):
  664. self.origin = origin
  665. self.expansion = expansion
  666. self.alias = alias
  667. self.order = order
  668. self.options = options or RuleOptions()
  669. self._hash = hash((self.origin, tuple(self.expansion)))
  670. def _deserialize(self):
  671. self._hash = hash((self.origin, tuple(self.expansion)))
  672. def __str__(self):
  673. return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
  674. def __repr__(self):
  675. return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
  676. def __hash__(self):
  677. return self._hash
  678. def __eq__(self, other):
  679. if not isinstance(other, Rule):
  680. return False
  681. return self.origin == other.origin and self.expansion == other.expansion
  682. from copy import copy
  683. class Pattern(Serialize):
  684. def __init__(self, value, flags=()):
  685. self.value = value
  686. self.flags = frozenset(flags)
  687. def __repr__(self):
  688. return repr(self.to_regexp())
  689. ##
  690. def __hash__(self):
  691. return hash((type(self), self.value, self.flags))
  692. def __eq__(self, other):
  693. return type(self) == type(other) and self.value == other.value and self.flags == other.flags
  694. def to_regexp(self):
  695. raise NotImplementedError()
  696. if Py36:
  697. ##
  698. def _get_flags(self, value):
  699. for f in self.flags:
  700. value = ('(?%s:%s)' % (f, value))
  701. return value
  702. else:
  703. def _get_flags(self, value):
  704. for f in self.flags:
  705. value = ('(?%s)' % f) + value
  706. return value
  707. class PatternStr(Pattern):
  708. __serialize_fields__ = 'value', 'flags'
  709. type = "str"
  710. def to_regexp(self):
  711. return self._get_flags(re.escape(self.value))
  712. @property
  713. def min_width(self):
  714. return len(self.value)
  715. max_width = min_width
  716. class PatternRE(Pattern):
  717. __serialize_fields__ = 'value', 'flags', '_width'
  718. type = "re"
  719. def to_regexp(self):
  720. return self._get_flags(self.value)
  721. _width = None
  722. def _get_width(self):
  723. if self._width is None:
  724. self._width = get_regexp_width(self.to_regexp())
  725. return self._width
  726. @property
  727. def min_width(self):
  728. return self._get_width()[0]
  729. @property
  730. def max_width(self):
  731. return self._get_width()[1]
  732. class TerminalDef(Serialize):
  733. __serialize_fields__ = 'name', 'pattern', 'priority'
  734. __serialize_namespace__ = PatternStr, PatternRE
  735. def __init__(self, name, pattern, priority=1):
  736. assert isinstance(pattern, Pattern), pattern
  737. self.name = name
  738. self.pattern = pattern
  739. self.priority = priority
  740. def __repr__(self):
  741. return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
  742. class Token(Str):
  743. #--
  744. __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
  745. def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
  746. try:
  747. self = super(Token, cls).__new__(cls, value)
  748. except UnicodeDecodeError:
  749. value = value.decode('latin1')
  750. self = super(Token, cls).__new__(cls, value)
  751. self.type = type_
  752. self.pos_in_stream = pos_in_stream
  753. self.value = value
  754. self.line = line
  755. self.column = column
  756. self.end_line = end_line
  757. self.end_column = end_column
  758. self.end_pos = end_pos
  759. return self
  760. def update(self, type_=None, value=None):
  761. return Token.new_borrow_pos(
  762. type_ if type_ is not None else self.type,
  763. value if value is not None else self.value,
  764. self
  765. )
  766. @classmethod
  767. def new_borrow_pos(cls, type_, value, borrow_t):
  768. return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
  769. def __reduce__(self):
  770. return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
  771. def __repr__(self):
  772. return 'Token(%s, %r)' % (self.type, self.value)
  773. def __deepcopy__(self, memo):
  774. return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
  775. def __eq__(self, other):
  776. if isinstance(other, Token) and self.type != other.type:
  777. return False
  778. return Str.__eq__(self, other)
  779. __hash__ = Str.__hash__
  780. class LineCounter:
  781. def __init__(self, newline_char):
  782. self.newline_char = newline_char
  783. self.char_pos = 0
  784. self.line = 1
  785. self.column = 1
  786. self.line_start_pos = 0
  787. def feed(self, token, test_newline=True):
  788. #--
  789. if test_newline:
  790. newlines = token.count(self.newline_char)
  791. if newlines:
  792. self.line += newlines
  793. self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
  794. self.char_pos += len(token)
  795. self.column = self.char_pos - self.line_start_pos + 1
  796. class _Lex:
  797. #--
  798. def __init__(self, lexer, state=None):
  799. self.lexer = lexer
  800. self.state = state
  801. def lex(self, stream, newline_types, ignore_types):
  802. newline_types = frozenset(newline_types)
  803. ignore_types = frozenset(ignore_types)
  804. line_ctr = LineCounter('\n' if not self.lexer.use_bytes else b'\n')
  805. last_token = None
  806. while line_ctr.char_pos < len(stream):
  807. lexer = self.lexer
  808. res = lexer.match(stream, line_ctr.char_pos)
  809. if not res:
  810. allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - ignore_types
  811. if not allowed:
  812. allowed = {"<END-OF-FILE>"}
  813. raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
  814. value, type_ = res
  815. if type_ not in ignore_types:
  816. t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  817. line_ctr.feed(value, type_ in newline_types)
  818. t.end_line = line_ctr.line
  819. t.end_column = line_ctr.column
  820. t.end_pos = line_ctr.char_pos
  821. if t.type in lexer.callback:
  822. t = lexer.callback[t.type](t)
  823. if not isinstance(t, Token):
  824. raise ValueError("Callbacks must return a token (returned %r)" % t)
  825. yield t
  826. last_token = t
  827. else:
  828. if type_ in lexer.callback:
  829. t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  830. lexer.callback[type_](t2)
  831. line_ctr.feed(value, type_ in newline_types)
  832. class UnlessCallback:
  833. def __init__(self, mres):
  834. self.mres = mres
  835. def __call__(self, t):
  836. for mre, type_from_index in self.mres:
  837. m = mre.match(t.value)
  838. if m:
  839. t.type = type_from_index[m.lastindex]
  840. break
  841. return t
  842. class CallChain:
  843. def __init__(self, callback1, callback2, cond):
  844. self.callback1 = callback1
  845. self.callback2 = callback2
  846. self.cond = cond
  847. def __call__(self, t):
  848. t2 = self.callback1(t)
  849. return self.callback2(t) if self.cond(t2) else t2
  850. def _create_unless(terminals, g_regex_flags, re_, use_bytes):
  851. tokens_by_type = classify(terminals, lambda t: type(t.pattern))
  852. assert len(tokens_by_type) <= 2, tokens_by_type.keys()
  853. embedded_strs = set()
  854. callback = {}
  855. for retok in tokens_by_type.get(PatternRE, []):
  856. unless = [] ##
  857. for strtok in tokens_by_type.get(PatternStr, []):
  858. if strtok.priority > retok.priority:
  859. continue
  860. s = strtok.pattern.value
  861. m = re_.match(retok.pattern.to_regexp(), s, g_regex_flags)
  862. if m and m.group(0) == s:
  863. unless.append(strtok)
  864. if strtok.pattern.flags <= retok.pattern.flags:
  865. embedded_strs.add(strtok)
  866. if unless:
  867. callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
  868. terminals = [t for t in terminals if t not in embedded_strs]
  869. return terminals, callback
  870. def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes):
  871. ##
  872. ##
  873. ##
  874. postfix = '$' if match_whole else ''
  875. mres = []
  876. while terminals:
  877. pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
  878. if use_bytes:
  879. pattern = pattern.encode('latin-1')
  880. try:
  881. mre = re_.compile(pattern, g_regex_flags)
  882. except AssertionError: ##
  883. return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes)
  884. ##
  885. mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
  886. terminals = terminals[max_size:]
  887. return mres
  888. def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False):
  889. return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes)
  890. def _regexp_has_newline(r):
  891. #--
  892. return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
  893. class Lexer(object):
  894. #--
  895. lex = NotImplemented
  896. class TraditionalLexer(Lexer):
  897. def __init__(self, conf):
  898. terminals = list(conf.tokens)
  899. assert all(isinstance(t, TerminalDef) for t in terminals), terminals
  900. self.re = conf.re_module
  901. if not conf.skip_validation:
  902. ##
  903. for t in terminals:
  904. try:
  905. self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags)
  906. except self.re.error:
  907. raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
  908. if t.pattern.min_width == 0:
  909. raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
  910. assert set(conf.ignore) <= {t.name for t in terminals}
  911. ##
  912. self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())]
  913. self.ignore_types = list(conf.ignore)
  914. terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
  915. self.terminals = terminals
  916. self.user_callbacks = conf.callbacks
  917. self.g_regex_flags = conf.g_regex_flags
  918. self.use_bytes = conf.use_bytes
  919. self._mres = None
  920. ##
  921. def _build(self):
  922. terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes)
  923. assert all(self.callback.values())
  924. for type_, f in self.user_callbacks.items():
  925. if type_ in self.callback:
  926. ##
  927. self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
  928. else:
  929. self.callback[type_] = f
  930. self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes)
  931. @property
  932. def mres(self):
  933. if self._mres is None:
  934. self._build()
  935. return self._mres
  936. def match(self, stream, pos):
  937. for mre, type_from_index in self.mres:
  938. m = mre.match(stream, pos)
  939. if m:
  940. return m.group(0), type_from_index[m.lastindex]
  941. def lex(self, stream):
  942. return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
  943. class ContextualLexer(Lexer):
  944. def __init__(self, conf, states, always_accept=()):
  945. terminals = list(conf.tokens)
  946. tokens_by_name = {}
  947. for t in terminals:
  948. assert t.name not in tokens_by_name, t
  949. tokens_by_name[t.name] = t
  950. trad_conf = copy(conf)
  951. trad_conf.tokens = terminals
  952. lexer_by_tokens = {}
  953. self.lexers = {}
  954. for state, accepts in states.items():
  955. key = frozenset(accepts)
  956. try:
  957. lexer = lexer_by_tokens[key]
  958. except KeyError:
  959. accepts = set(accepts) | set(conf.ignore) | set(always_accept)
  960. state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name]
  961. lexer_conf = copy(trad_conf)
  962. lexer_conf.tokens = state_tokens
  963. lexer = TraditionalLexer(lexer_conf)
  964. lexer_by_tokens[key] = lexer
  965. self.lexers[state] = lexer
  966. assert trad_conf.tokens is terminals
  967. self.root_lexer = TraditionalLexer(trad_conf)
  968. def lex(self, stream, get_parser_state):
  969. parser_state = get_parser_state()
  970. l = _Lex(self.lexers[parser_state], parser_state)
  971. try:
  972. for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
  973. yield x
  974. parser_state = get_parser_state()
  975. l.lexer = self.lexers[parser_state]
  976. l.state = parser_state ##
  977. except UnexpectedCharacters as e:
  978. ##
  979. ##
  980. ##
  981. root_match = self.root_lexer.match(stream, e.pos_in_stream)
  982. if not root_match:
  983. raise
  984. value, type_ = root_match
  985. t = Token(type_, value, e.pos_in_stream, e.line, e.column)
  986. raise UnexpectedToken(t, e.allowed, state=e.state)
  987. class LexerConf(Serialize):
  988. __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes'
  989. __serialize_namespace__ = TerminalDef,
  990. def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
  991. self.tokens = tokens ##
  992. self.ignore = ignore
  993. self.postlex = postlex
  994. self.callbacks = callbacks or {}
  995. self.g_regex_flags = g_regex_flags
  996. self.re_module = re_module
  997. self.skip_validation = skip_validation
  998. self.use_bytes = use_bytes
  999. from functools import partial, wraps
  1000. from itertools import repeat, product
  1001. class ExpandSingleChild:
  1002. def __init__(self, node_builder):
  1003. self.node_builder = node_builder
  1004. def __call__(self, children):
  1005. if len(children) == 1:
  1006. return children[0]
  1007. else:
  1008. return self.node_builder(children)
  1009. class PropagatePositions:
  1010. def __init__(self, node_builder):
  1011. self.node_builder = node_builder
  1012. def __call__(self, children):
  1013. res = self.node_builder(children)
  1014. ##
  1015. if isinstance(res, Tree):
  1016. res_meta = res.meta
  1017. for c in children:
  1018. if isinstance(c, Tree):
  1019. child_meta = c.meta
  1020. if not child_meta.empty:
  1021. res_meta.line = child_meta.line
  1022. res_meta.column = child_meta.column
  1023. res_meta.start_pos = child_meta.start_pos
  1024. res_meta.empty = False
  1025. break
  1026. elif isinstance(c, Token):
  1027. res_meta.line = c.line
  1028. res_meta.column = c.column
  1029. res_meta.start_pos = c.pos_in_stream
  1030. res_meta.empty = False
  1031. break
  1032. for c in reversed(children):
  1033. if isinstance(c, Tree):
  1034. child_meta = c.meta
  1035. if not child_meta.empty:
  1036. res_meta.end_line = child_meta.end_line
  1037. res_meta.end_column = child_meta.end_column
  1038. res_meta.end_pos = child_meta.end_pos
  1039. res_meta.empty = False
  1040. break
  1041. elif isinstance(c, Token):
  1042. res_meta.end_line = c.end_line
  1043. res_meta.end_column = c.end_column
  1044. res_meta.end_pos = c.end_pos
  1045. res_meta.empty = False
  1046. break
  1047. return res
  1048. class ChildFilter:
  1049. def __init__(self, to_include, append_none, node_builder):
  1050. self.node_builder = node_builder
  1051. self.to_include = to_include
  1052. self.append_none = append_none
  1053. def __call__(self, children):
  1054. filtered = []
  1055. for i, to_expand, add_none in self.to_include:
  1056. if add_none:
  1057. filtered += [None] * add_none
  1058. if to_expand:
  1059. filtered += children[i].children
  1060. else:
  1061. filtered.append(children[i])
  1062. if self.append_none:
  1063. filtered += [None] * self.append_none
  1064. return self.node_builder(filtered)
  1065. class ChildFilterLALR(ChildFilter):
  1066. #--
  1067. def __call__(self, children):
  1068. filtered = []
  1069. for i, to_expand, add_none in self.to_include:
  1070. if add_none:
  1071. filtered += [None] * add_none
  1072. if to_expand:
  1073. if filtered:
  1074. filtered += children[i].children
  1075. else: ##
  1076. filtered = children[i].children
  1077. else:
  1078. filtered.append(children[i])
  1079. if self.append_none:
  1080. filtered += [None] * self.append_none
  1081. return self.node_builder(filtered)
  1082. class ChildFilterLALR_NoPlaceholders(ChildFilter):
  1083. #--
  1084. def __init__(self, to_include, node_builder):
  1085. self.node_builder = node_builder
  1086. self.to_include = to_include
  1087. def __call__(self, children):
  1088. filtered = []
  1089. for i, to_expand in self.to_include:
  1090. if to_expand:
  1091. if filtered:
  1092. filtered += children[i].children
  1093. else: ##
  1094. filtered = children[i].children
  1095. else:
  1096. filtered.append(children[i])
  1097. return self.node_builder(filtered)
  1098. def _should_expand(sym):
  1099. return not sym.is_term and sym.name.startswith('_')
  1100. def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
  1101. ##
  1102. if _empty_indices:
  1103. assert _empty_indices.count(False) == len(expansion)
  1104. s = ''.join(str(int(b)) for b in _empty_indices)
  1105. empty_indices = [len(ones) for ones in s.split('0')]
  1106. assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
  1107. else:
  1108. empty_indices = [0] * (len(expansion)+1)
  1109. to_include = []
  1110. nones_to_add = 0
  1111. for i, sym in enumerate(expansion):
  1112. nones_to_add += empty_indices[i]
  1113. if keep_all_tokens or not (sym.is_term and sym.filter_out):
  1114. to_include.append((i, _should_expand(sym), nones_to_add))
  1115. nones_to_add = 0
  1116. nones_to_add += empty_indices[len(expansion)]
  1117. if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
  1118. if _empty_indices or ambiguous:
  1119. return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
  1120. else:
  1121. ##
  1122. return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
  1123. class AmbiguousExpander:
  1124. #--
  1125. def __init__(self, to_expand, tree_class, node_builder):
  1126. self.node_builder = node_builder
  1127. self.tree_class = tree_class
  1128. self.to_expand = to_expand
  1129. def __call__(self, children):
  1130. def _is_ambig_tree(child):
  1131. return hasattr(child, 'data') and child.data == '_ambig'
  1132. ##
  1133. ##
  1134. ##
  1135. ##
  1136. ambiguous = []
  1137. for i, child in enumerate(children):
  1138. if _is_ambig_tree(child):
  1139. if i in self.to_expand:
  1140. ambiguous.append(i)
  1141. to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)]
  1142. child.expand_kids_by_index(*to_expand)
  1143. if not ambiguous:
  1144. return self.node_builder(children)
  1145. expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ]
  1146. return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
  1147. def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
  1148. to_expand = [i for i, sym in enumerate(expansion)
  1149. if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
  1150. if to_expand:
  1151. return partial(AmbiguousExpander, to_expand, tree_class)
  1152. def ptb_inline_args(func):
  1153. @wraps(func)
  1154. def f(children):
  1155. return func(*children)
  1156. return f
  1157. def inplace_transformer(func):
  1158. @wraps(func)
  1159. def f(children):
  1160. ##
  1161. tree = Tree(func.__name__, children)
  1162. return func(tree)
  1163. return f
  1164. def apply_visit_wrapper(func, name, wrapper):
  1165. if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
  1166. raise NotImplementedError("Meta args not supported for internal transformer")
  1167. @wraps(func)
  1168. def f(children):
  1169. return wrapper(func, name, children, None)
  1170. return f
  1171. class ParseTreeBuilder:
  1172. def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
  1173. self.tree_class = tree_class
  1174. self.propagate_positions = propagate_positions
  1175. self.always_keep_all_tokens = keep_all_tokens
  1176. self.ambiguous = ambiguous
  1177. self.maybe_placeholders = maybe_placeholders
  1178. self.rule_builders = list(self._init_builders(rules))
  1179. def _init_builders(self, rules):
  1180. for rule in rules:
  1181. options = rule.options
  1182. keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
  1183. expand_single_child = options.expand1
  1184. wrapper_chain = list(filter(None, [
  1185. (expand_single_child and not rule.alias) and ExpandSingleChild,
  1186. maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
  1187. self.propagate_positions and PropagatePositions,
  1188. self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
  1189. ]))
  1190. yield rule, wrapper_chain
  1191. def create_callback(self, transformer=None):
  1192. callbacks = {}
  1193. for rule, wrapper_chain in self.rule_builders:
  1194. user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
  1195. try:
  1196. f = getattr(transformer, user_callback_name)
  1197. ##
  1198. wrapper = getattr(f, 'visit_wrapper', None)
  1199. if wrapper is not None:
  1200. f = apply_visit_wrapper(f, user_callback_name, wrapper)
  1201. else:
  1202. if isinstance(transformer, InlineTransformer):
  1203. f = ptb_inline_args(f)
  1204. elif isinstance(transformer, Transformer_InPlace):
  1205. f = inplace_transformer(f)
  1206. except AttributeError:
  1207. f = partial(self.tree_class, user_callback_name)
  1208. for w in wrapper_chain:
  1209. f = w(f)
  1210. if rule in callbacks:
  1211. raise GrammarError("Rule '%s' already exists" % (rule,))
  1212. callbacks[rule] = f
  1213. return callbacks
  1214. class LALR_Parser(object):
  1215. def __init__(self, parser_conf, debug=False):
  1216. assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
  1217. analysis = LALR_Analyzer(parser_conf, debug=debug)
  1218. analysis.compute_lalr()
  1219. callbacks = parser_conf.callbacks
  1220. self._parse_table = analysis.parse_table
  1221. self.parser_conf = parser_conf
  1222. self.parser = _Parser(analysis.parse_table, callbacks, debug)
  1223. @classmethod
  1224. def deserialize(cls, data, memo, callbacks):
  1225. inst = cls.__new__(cls)
  1226. inst._parse_table = IntParseTable.deserialize(data, memo)
  1227. inst.parser = _Parser(inst._parse_table, callbacks)
  1228. return inst
  1229. def serialize(self, memo):
  1230. return self._parse_table.serialize(memo)
  1231. def parse(self, *args):
  1232. return self.parser.parse(*args)
  1233. class _Parser:
  1234. def __init__(self, parse_table, callbacks, debug=False):
  1235. self.parse_table = parse_table
  1236. self.callbacks = callbacks
  1237. self.debug = debug
  1238. def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None):
  1239. token = None
  1240. stream = iter(seq)
  1241. states = self.parse_table.states
  1242. start_state = self.parse_table.start_states[start]
  1243. end_state = self.parse_table.end_states[start]
  1244. state_stack = state_stack or [start_state]
  1245. value_stack = value_stack or []
  1246. if set_state: set_state(start_state)
  1247. def get_action(token):
  1248. state = state_stack[-1]
  1249. try:
  1250. return states[state][token.type]
  1251. except KeyError:
  1252. expected = {s for s in states[state].keys() if s.isupper()}
  1253. try:
  1254. puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
  1255. except NameError: ##
  1256. puppet = None
  1257. raise UnexpectedToken(token, expected, state=state, puppet=puppet)
  1258. def reduce(rule):
  1259. size = len(rule.expansion)
  1260. if size:
  1261. s = value_stack[-size:]
  1262. del state_stack[-size:]
  1263. del value_stack[-size:]
  1264. else:
  1265. s = []
  1266. value = self.callbacks[rule](s)
  1267. _action, new_state = states[state_stack[-1]][rule.origin.name]
  1268. assert _action is Shift
  1269. state_stack.append(new_state)
  1270. value_stack.append(value)
  1271. ##
  1272. try:
  1273. for token in stream:
  1274. while True:
  1275. action, arg = get_action(token)
  1276. assert arg != end_state
  1277. if action is Shift:
  1278. state_stack.append(arg)
  1279. value_stack.append(token)
  1280. if set_state: set_state(arg)
  1281. break ##
  1282. else:
  1283. reduce(arg)
  1284. except Exception as e:
  1285. if self.debug:
  1286. print("")
  1287. print("STATE STACK DUMP")
  1288. print("----------------")
  1289. for i, s in enumerate(state_stack):
  1290. print('%d)' % i , s)
  1291. print("")
  1292. raise
  1293. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  1294. while True:
  1295. _action, arg = get_action(token)
  1296. assert(_action is Reduce)
  1297. reduce(arg)
  1298. if state_stack[-1] == end_state:
  1299. return value_stack[-1]
  1300. class Action:
  1301. def __init__(self, name):
  1302. self.name = name
  1303. def __str__(self):
  1304. return self.name
  1305. def __repr__(self):
  1306. return str(self)
  1307. Shift = Action('Shift')
  1308. Reduce = Action('Reduce')
  1309. class ParseTable:
  1310. def __init__(self, states, start_states, end_states):
  1311. self.states = states
  1312. self.start_states = start_states
  1313. self.end_states = end_states
  1314. def serialize(self, memo):
  1315. tokens = Enumerator()
  1316. rules = Enumerator()
  1317. states = {
  1318. state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
  1319. for token, (action, arg) in actions.items()}
  1320. for state, actions in self.states.items()
  1321. }
  1322. return {
  1323. 'tokens': tokens.reversed(),
  1324. 'states': states,
  1325. 'start_states': self.start_states,
  1326. 'end_states': self.end_states,
  1327. }
  1328. @classmethod
  1329. def deserialize(cls, data, memo):
  1330. tokens = data['tokens']
  1331. states = {
  1332. state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
  1333. for token, (action, arg) in actions.items()}
  1334. for state, actions in data['states'].items()
  1335. }
  1336. return cls(states, data['start_states'], data['end_states'])
  1337. class IntParseTable(ParseTable):
  1338. @classmethod
  1339. def from_ParseTable(cls, parse_table):
  1340. enum = list(parse_table.states)
  1341. state_to_idx = {s:i for i,s in enumerate(enum)}
  1342. int_states = {}
  1343. for s, la in parse_table.states.items():
  1344. la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
  1345. for k,v in la.items()}
  1346. int_states[ state_to_idx[s] ] = la
  1347. start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
  1348. end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
  1349. return cls(int_states, start_states, end_states)
  1350. def get_frontend(parser, lexer):
  1351. if parser=='lalr':
  1352. if lexer is None:
  1353. raise ValueError('The LALR parser requires use of a lexer')
  1354. elif lexer == 'standard':
  1355. return LALR_TraditionalLexer
  1356. elif lexer == 'contextual':
  1357. return LALR_ContextualLexer
  1358. elif issubclass(lexer, Lexer):
  1359. class LALR_CustomLexerWrapper(LALR_CustomLexer):
  1360. def __init__(self, lexer_conf, parser_conf, options=None):
  1361. super(LALR_CustomLexerWrapper, self).__init__(
  1362. lexer, lexer_conf, parser_conf, options=options)
  1363. def init_lexer(self):
  1364. self.lexer = lexer(self.lexer_conf)
  1365. return LALR_CustomLexerWrapper
  1366. else:
  1367. raise ValueError('Unknown lexer: %s' % lexer)
  1368. elif parser=='earley':
  1369. if lexer=='standard':
  1370. return Earley
  1371. elif lexer=='dynamic':
  1372. return XEarley
  1373. elif lexer=='dynamic_complete':
  1374. return XEarley_CompleteLex
  1375. elif lexer=='contextual':
  1376. raise ValueError('The Earley parser does not support the contextual parser')
  1377. else:
  1378. raise ValueError('Unknown lexer: %s' % lexer)
  1379. elif parser == 'cyk':
  1380. if lexer == 'standard':
  1381. return CYK
  1382. else:
  1383. raise ValueError('CYK parser requires using standard parser.')
  1384. else:
  1385. raise ValueError('Unknown parser: %s' % parser)
  1386. class _ParserFrontend(Serialize):
  1387. def _parse(self, input, start, *args):
  1388. if start is None:
  1389. start = self.start
  1390. if len(start) > 1:
  1391. raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
  1392. start ,= start
  1393. return self.parser.parse(input, start, *args)
  1394. def _get_lexer_callbacks(transformer, terminals):
  1395. result = {}
  1396. for terminal in terminals:
  1397. callback = getattr(transformer, terminal.name, None)
  1398. if callback is not None:
  1399. result[terminal.name] = callback
  1400. return result
  1401. class WithLexer(_ParserFrontend):
  1402. lexer = None
  1403. parser = None
  1404. lexer_conf = None
  1405. start = None
  1406. __serialize_fields__ = 'parser', 'lexer_conf', 'start'
  1407. __serialize_namespace__ = LexerConf,
  1408. def __init__(self, lexer_conf, parser_conf, options=None):
  1409. self.lexer_conf = lexer_conf
  1410. self.start = parser_conf.start
  1411. self.postlex = lexer_conf.postlex
  1412. @classmethod
  1413. def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module):
  1414. inst = super(WithLexer, cls).deserialize(data, memo)
  1415. inst.postlex = postlex
  1416. inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
  1417. terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
  1418. inst.lexer_conf.callbacks = _get_lexer_callbacks(transformer, terminals)
  1419. inst.lexer_conf.re_module = re_module
  1420. inst.lexer_conf.skip_validation=True
  1421. inst.init_lexer()
  1422. return inst
  1423. def _serialize(self, data, memo):
  1424. data['parser'] = data['parser'].serialize(memo)
  1425. def lex(self, *args):
  1426. stream = self.lexer.lex(*args)
  1427. return self.postlex.process(stream) if self.postlex else stream
  1428. def parse(self, text, start=None):
  1429. token_stream = self.lex(text)
  1430. return self._parse(token_stream, start)
  1431. def init_traditional_lexer(self):
  1432. self.lexer = TraditionalLexer(self.lexer_conf)
  1433. class LALR_WithLexer(WithLexer):
  1434. def __init__(self, lexer_conf, parser_conf, options=None):
  1435. debug = options.debug if options else False
  1436. self.parser = LALR_Parser(parser_conf, debug=debug)
  1437. WithLexer.__init__(self, lexer_conf, parser_conf, options)
  1438. self.init_lexer()
  1439. def init_lexer(self, **kw):
  1440. raise NotImplementedError()
  1441. class LALR_TraditionalLexer(LALR_WithLexer):
  1442. def init_lexer(self):
  1443. self.init_traditional_lexer()
  1444. class LALR_ContextualLexer(LALR_WithLexer):
  1445. def init_lexer(self):
  1446. states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
  1447. always_accept = self.postlex.always_accept if self.postlex else ()
  1448. self.lexer = ContextualLexer(self.lexer_conf, states, always_accept=always_accept)
  1449. def parse(self, text, start=None):
  1450. parser_state = [None]
  1451. def set_parser_state(s):
  1452. parser_state[0] = s
  1453. token_stream = self.lex(text, lambda: parser_state[0])
  1454. return self._parse(token_stream, start, set_parser_state)
  1455. class LarkOptions(Serialize):
  1456. #--
  1457. OPTIONS_DOC = """
  1458. **=== General ===**
  1459. start
  1460. The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
  1461. debug
  1462. Display debug information, such as warnings (default: False)
  1463. transformer
  1464. Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster)
  1465. propagate_positions
  1466. Propagates (line, column, end_line, end_column) attributes into all tree branches.
  1467. maybe_placeholders
  1468. When True, the ``[]`` operator returns ``None`` when not matched.
  1469. When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
  1470. (default= ``False``. Recommended to set to ``True``)
  1471. regex
  1472. When True, uses the ``regex`` module instead of the stdlib ``re``.
  1473. cache
  1474. Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
  1475. - When ``False``, does nothing (default)
  1476. - When ``True``, caches to a temporary file in the local directory
  1477. - When given a string, caches to the path pointed by the string
  1478. g_regex_flags
  1479. Flags that are applied to all terminals (both regex and strings)
  1480. keep_all_tokens
  1481. Prevent the tree builder from automagically removing "punctuation" tokens (default: False)
  1482. **=== Algorithm ===**
  1483. parser
  1484. Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
  1485. (there is also a "cyk" option for legacy)
  1486. lexer
  1487. Decides whether or not to use a lexer stage
  1488. - "auto" (default): Choose for me based on the parser
  1489. - "standard": Use a standard lexer
  1490. - "contextual": Stronger lexer (only works with parser="lalr")
  1491. - "dynamic": Flexible and powerful (only with parser="earley")
  1492. - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
  1493. ambiguity
  1494. Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
  1495. - "resolve" - The parser will automatically choose the simplest derivation
  1496. (it chooses consistently: greedy for tokens, non-greedy for rules)
  1497. - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
  1498. **=== Misc. / Domain Specific ===**
  1499. postlex
  1500. Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
  1501. priority
  1502. How priorities should be evaluated - auto, none, normal, invert (Default: auto)
  1503. lexer_callbacks
  1504. Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
  1505. use_bytes
  1506. Accept an input of type ``bytes`` instead of ``str`` (Python 3 only).
  1507. edit_terminals
  1508. A callback for editing the terminals before parse.
  1509. """
  1510. if __doc__:
  1511. __doc__ += OPTIONS_DOC
  1512. _defaults = {
  1513. 'debug': False,
  1514. 'keep_all_tokens': False,
  1515. 'tree_class': None,
  1516. 'cache': False,
  1517. 'postlex': None,
  1518. 'parser': 'earley',
  1519. 'lexer': 'auto',
  1520. 'transformer': None,
  1521. 'start': 'start',
  1522. 'priority': 'auto',
  1523. 'ambiguity': 'auto',
  1524. 'regex': False,
  1525. 'propagate_positions': False,
  1526. 'lexer_callbacks': {},
  1527. 'maybe_placeholders': False,
  1528. 'edit_terminals': None,
  1529. 'g_regex_flags': 0,
  1530. 'use_bytes': False,
  1531. }
  1532. def __init__(self, options_dict):
  1533. o = dict(options_dict)
  1534. options = {}
  1535. for name, default in self._defaults.items():
  1536. if name in o:
  1537. value = o.pop(name)
  1538. if isinstance(default, bool) and name not in ('cache', 'use_bytes'):
  1539. value = bool(value)
  1540. else:
  1541. value = default
  1542. options[name] = value
  1543. if isinstance(options['start'], STRING_TYPE):
  1544. options['start'] = [options['start']]
  1545. self.__dict__['options'] = options
  1546. assert self.parser in ('earley', 'lalr', 'cyk', None)
  1547. if self.parser == 'earley' and self.transformer:
  1548. raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
  1549. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
  1550. if o:
  1551. raise ValueError("Unknown options: %s" % o.keys())
  1552. def __getattr__(self, name):
  1553. try:
  1554. return self.options[name]
  1555. except KeyError as e:
  1556. raise AttributeError(e)
  1557. def __setattr__(self, name, value):
  1558. assert name in self.options
  1559. self.options[name] = value
  1560. def serialize(self, memo):
  1561. return self.options
  1562. @classmethod
  1563. def deserialize(cls, data, memo):
  1564. return cls(data)
  1565. class Lark(Serialize):
  1566. #--
  1567. def __init__(self, grammar, **options):
  1568. self.options = LarkOptions(options)
  1569. ##
  1570. use_regex = self.options.regex
  1571. if use_regex:
  1572. if regex:
  1573. re_module = regex
  1574. else:
  1575. raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
  1576. else:
  1577. re_module = re
  1578. ##
  1579. try:
  1580. self.source = grammar.name
  1581. except AttributeError:
  1582. self.source = '<string>'
  1583. ##
  1584. try:
  1585. read = grammar.read
  1586. except AttributeError:
  1587. pass
  1588. else:
  1589. grammar = read()
  1590. assert isinstance(grammar, STRING_TYPE)
  1591. self.grammar_source = grammar
  1592. if self.options.use_bytes:
  1593. if not isascii(grammar):
  1594. raise ValueError("Grammar must be ascii only, when use_bytes=True")
  1595. if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
  1596. raise NotImplementedError("`use_bytes=True` may have issues on python2."
  1597. "Use `use_bytes='force'` to use it at your own risk.")
  1598. cache_fn = None
  1599. if self.options.cache:
  1600. if self.options.parser != 'lalr':
  1601. raise NotImplementedError("cache only works with parser='lalr' for now")
  1602. if isinstance(self.options.cache, STRING_TYPE):
  1603. cache_fn = self.options.cache
  1604. else:
  1605. if self.options.cache is not True:
  1606. raise ValueError("cache argument must be bool or str")
  1607. unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
  1608. from . import __version__
  1609. options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
  1610. s = grammar + options_str + __version__
  1611. md5 = hashlib.md5(s.encode()).hexdigest()
  1612. cache_fn = '.lark_cache_%s.tmp' % md5
  1613. if FS.exists(cache_fn):
  1614. logger.debug('Loading grammar from cache: %s', cache_fn)
  1615. with FS.open(cache_fn, 'rb') as f:
  1616. self._load(f, self.options.transformer, self.options.postlex)
  1617. return
  1618. if self.options.lexer == 'auto':
  1619. if self.options.parser == 'lalr':
  1620. self.options.lexer = 'contextual'
  1621. elif self.options.parser == 'earley':
  1622. self.options.lexer = 'dynamic'
  1623. elif self.options.parser == 'cyk':
  1624. self.options.lexer = 'standard'
  1625. else:
  1626. assert False, self.options.parser
  1627. lexer = self.options.lexer
  1628. assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
  1629. if self.options.ambiguity == 'auto':
  1630. if self.options.parser == 'earley':
  1631. self.options.ambiguity = 'resolve'
  1632. else:
  1633. disambig_parsers = ['earley', 'cyk']
  1634. assert self.options.parser in disambig_parsers, (
  1635. 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
  1636. if self.options.priority == 'auto':
  1637. if self.options.parser in ('earley', 'cyk', ):
  1638. self.options.priority = 'normal'
  1639. elif self.options.parser in ('lalr', ):
  1640. self.options.priority = None
  1641. elif self.options.priority in ('invert', 'normal'):
  1642. assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"
  1643. assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
  1644. assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
  1645. assert self.options.ambiguity in ('resolve', 'explicit', 'auto', )
  1646. ##
  1647. self.grammar = load_grammar(grammar, self.source, re_module)
  1648. ##
  1649. self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
  1650. if self.options.edit_terminals:
  1651. for t in self.terminals:
  1652. self.options.edit_terminals(t)
  1653. self._terminals_dict = {t.name: t for t in self.terminals}
  1654. ##
  1655. ##
  1656. if self.options.priority == 'invert':
  1657. for rule in self.rules:
  1658. if rule.options.priority is not None:
  1659. rule.options.priority = -rule.options.priority
  1660. ##
  1661. ##
  1662. ##
  1663. elif self.options.priority == None:
  1664. for rule in self.rules:
  1665. if rule.options.priority is not None:
  1666. rule.options.priority = None
  1667. ##
  1668. lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals)
  1669. if self.options.transformer
  1670. else {})
  1671. lexer_callbacks.update(self.options.lexer_callbacks)
  1672. self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes)
  1673. if self.options.parser:
  1674. self.parser = self._build_parser()
  1675. elif lexer:
  1676. self.lexer = self._build_lexer()
  1677. if cache_fn:
  1678. logger.debug('Saving grammar to cache: %s', cache_fn)
  1679. with FS.open(cache_fn, 'wb') as f:
  1680. self.save(f)
  1681. ##
  1682. __doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC
  1683. __serialize_fields__ = 'parser', 'rules', 'options'
  1684. def _build_lexer(self):
  1685. return TraditionalLexer(self.lexer_conf)
  1686. def _prepare_callbacks(self):
  1687. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  1688. self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
  1689. self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
  1690. def _build_parser(self):
  1691. self._prepare_callbacks()
  1692. parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
  1693. return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
  1694. def save(self, f):
  1695. #--
  1696. data, m = self.memo_serialize([TerminalDef, Rule])
  1697. pickle.dump({'data': data, 'memo': m}, f)
  1698. @classmethod
  1699. def load(cls, f):
  1700. #--
  1701. inst = cls.__new__(cls)
  1702. return inst._load(f)
  1703. def _load(self, f, transformer=None, postlex=None):
  1704. if isinstance(f, dict):
  1705. d = f
  1706. else:
  1707. d = pickle.load(f)
  1708. memo = d['memo']
  1709. data = d['data']
  1710. assert memo
  1711. memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
  1712. options = dict(data['options'])
  1713. if transformer is not None:
  1714. options['transformer'] = transformer
  1715. if postlex is not None:
  1716. options['postlex'] = postlex
  1717. self.options = LarkOptions.deserialize(options, memo)
  1718. re_module = regex if self.options.regex else re
  1719. self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
  1720. self.source = '<deserialized>'
  1721. self._prepare_callbacks()
  1722. self.parser = self.parser_class.deserialize(
  1723. data['parser'],
  1724. memo,
  1725. self._callbacks,
  1726. self.options.postlex,
  1727. self.options.transformer,
  1728. re_module
  1729. )
  1730. return self
  1731. @classmethod
  1732. def _load_from_dict(cls, data, memo, transformer=None, postlex=None):
  1733. inst = cls.__new__(cls)
  1734. return inst._load({'data': data, 'memo': memo}, transformer, postlex)
  1735. @classmethod
  1736. def open(cls, grammar_filename, rel_to=None, **options):
  1737. #--
  1738. if rel_to:
  1739. basepath = os.path.dirname(rel_to)
  1740. grammar_filename = os.path.join(basepath, grammar_filename)
  1741. with open(grammar_filename, encoding='utf8') as f:
  1742. return cls(f, **options)
  1743. def __repr__(self):
  1744. return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
  1745. def lex(self, text):
  1746. #--
  1747. if not hasattr(self, 'lexer'):
  1748. self.lexer = self._build_lexer()
  1749. stream = self.lexer.lex(text)
  1750. if self.options.postlex:
  1751. return self.options.postlex.process(stream)
  1752. return stream
  1753. def get_terminal(self, name):
  1754. #--
  1755. return self._terminals_dict[name]
  1756. def parse(self, text, start=None, on_error=None):
  1757. #--
  1758. try:
  1759. return self.parser.parse(text, start=start)
  1760. except UnexpectedToken as e:
  1761. if on_error is None:
  1762. raise
  1763. while True:
  1764. if not on_error(e):
  1765. raise e
  1766. try:
  1767. return e.puppet.resume_parse()
  1768. except UnexpectedToken as e2:
  1769. e = e2
  1770. DATA = (
  1771. {'parser': {'parser': {'tokens': {0: 'RBRACE', 1: 'COMMA', 2: 'RSQB', 3: '$END', 4: '__object_star_1', 5: 'COLON', 6: 'LBRACE', 7: 'value', 8: 'string', 9: 'object', 10: 'TRUE', 11: 'SIGNED_NUMBER', 12: 'LSQB', 13: 'NULL', 14: 'FALSE', 15: 'array', 16: 'ESCAPED_STRING', 17: '__array_star_0', 18: 'pair', 19: 'start'}, 'states': {0: {0: (1, {'@': 12}), 1: (1, {'@': 12})}, 1: {1: (1, {'@': 13}), 2: (1, {'@': 13}), 0: (1, {'@': 13}), 3: (1, {'@': 13})}, 2: {1: (1, {'@': 14}), 2: (1, {'@': 14}), 0: (1, {'@': 14}), 3: (1, {'@': 14})}, 3: {0: (0, 25), 1: (0, 32)}, 4: {4: (0, 3), 1: (0, 27), 0: (0, 33)}, 5: {0: (1, {'@': 15}), 1: (1, {'@': 15})}, 6: {}, 7: {1: (0, 23), 2: (0, 2)}, 8: {1: (1, {'@': 16}), 2: (1, {'@': 16})}, 9: {1: (1, {'@': 17}), 2: (1, {'@': 17}), 5: (1, {'@': 17}), 0: (1, {'@': 17}), 3: (1, {'@': 17})}, 10: {1: (1, {'@': 18}), 2: (1, {'@': 18}), 0: (1, {'@': 18}), 3: (1, {'@': 18})}, 11: {1: (1, {'@': 19}), 2: (1, {'@': 19}), 0: (1, {'@': 19}), 3: (1, {'@': 19})}, 12: {1: (1, {'@': 20}), 2: (1, {'@': 20}), 0: (1, {'@': 20}), 3: (1, {'@': 20})}, 13: {5: (0, 22)}, 14: {6: (0, 21), 7: (0, 29), 8: (0, 12), 9: (0, 1), 10: (0, 16), 11: (0, 11), 12: (0, 26), 13: (0, 30), 14: (0, 15), 15: (0, 10), 16: (0, 9)}, 15: {1: (1, {'@': 21}), 2: (1, {'@': 21}), 0: (1, {'@': 21}), 3: (1, {'@': 21})}, 16: {1: (1, {'@': 22}), 2: (1, {'@': 22}), 0: (1, {'@': 22}), 3: (1, {'@': 22})}, 17: {1: (1, {'@': 23}), 2: (1, {'@': 23}), 0: (1, {'@': 23}), 3: (1, {'@': 23})}, 18: {2: (0, 24), 1: (0, 14), 17: (0, 7)}, 19: {1: (1, {'@': 24}), 2: (1, {'@': 24}), 0: (1, {'@': 24}), 3: (1, {'@': 24})}, 20: {0: (1, {'@': 25}), 1: (1, {'@': 25})}, 21: {8: (0, 13), 18: (0, 4), 16: (0, 9), 0: (0, 19)}, 22: {6: (0, 21), 8: (0, 12), 9: (0, 1), 10: (0, 16), 11: (0, 11), 12: (0, 26), 13: (0, 30), 14: (0, 15), 15: (0, 10), 7: (0, 20), 16: (0, 9)}, 23: {6: (0, 21), 7: (0, 8), 9: (0, 1), 8: (0, 12), 10: (0, 16), 11: (0, 11), 12: (0, 26), 13: (0, 30), 14: (0, 15), 15: (0, 10), 16: (0, 9)}, 24: {1: (1, {'@': 26}), 2: (1, {'@': 26}), 0: (1, {'@': 26}), 3: (1, {'@': 26})}, 25: {1: (1, {'@': 27}), 2: (1, {'@': 27}), 0: (1, {'@': 27}), 3: (1, {'@': 27})}, 26: {6: (0, 21), 10: (0, 16), 12: (0, 26), 13: (0, 30), 14: (0, 15), 7: (0, 18), 8: (0, 12), 16: (0, 9), 9: (0, 1), 11: (0, 11), 15: (0, 10), 2: (0, 17)}, 27: {8: (0, 13), 18: (0, 0), 16: (0, 9)}, 28: {6: (0, 21), 10: (0, 16), 12: (0, 26), 13: (0, 30), 8: (0, 12), 16: (0, 9), 19: (0, 6), 9: (0, 1), 11: (0, 11), 7: (0, 31), 15: (0, 10), 14: (0, 15)}, 29: {1: (1, {'@': 28}), 2: (1, {'@': 28})}, 30: {1: (1, {'@': 29}), 2: (1, {'@': 29}), 0: (1, {'@': 29}), 3: (1, {'@': 29})}, 31: {3: (1, {'@': 30})}, 32: {18: (0, 5), 8: (0, 13), 16: (0, 9)}, 33: {1: (1, {'@': 31}), 2: (1, {'@': 31}), 0: (1, {'@': 31}), 3: (1, {'@': 31})}}, 'start_states': {'start': 28}, 'end_states': {'start': 6}}, 'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': ['WS'], 'g_regex_flags': 0, 'use_bytes': False, '__type__': 'LexerConf'}, 'start': ['start'], '__type__': 'LALR_ContextualLexer'}, 'rules': [{'@': 30}, {'@': 13}, {'@': 18}, {'@': 20}, {'@': 19}, {'@': 22}, {'@': 21}, {'@': 29}, {'@': 14}, {'@': 26}, {'@': 23}, {'@': 27}, {'@': 31}, {'@': 24}, {'@': 25}, {'@': 17}, {'@': 28}, {'@': 16}, {'@': 12}, {'@': 15}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': None, 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False}, '__type__': 'Lark'}
  1772. )
  1773. MEMO = (
  1774. {0: {'name': 'ESCAPED_STRING', 'pattern': {'value': '\\".*?(?<!\\\\)(\\\\\\\\)*?\\"', 'flags': [], '_width': [2, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 1: {'name': 'SIGNED_NUMBER', 'pattern': {'value': '(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 2: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 3: {'name': 'TRUE', 'pattern': {'value': 'true', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 4: {'name': 'FALSE', 'pattern': {'value': 'false', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 5: {'name': 'NULL', 'pattern': {'value': 'null', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 6: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 7: {'name': 'LSQB', 'pattern': {'value': '[', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 8: {'name': 'RSQB', 'pattern': {'value': ']', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 9: {'name': 'LBRACE', 'pattern': {'value': '{', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 10: {'name': 'RBRACE', 'pattern': {'value': '}', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 11: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 12: {'origin': {'name': '__object_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'object', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': 'array', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': '__object_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__object_star_1', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': 'string', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ESCAPED_STRING', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'array', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'SIGNED_NUMBER', 'filter_out': False, '__type__': 'Terminal'}], 'order': 3, 'alias': 'number', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 21: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'FALSE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 5, 'alias': 'false', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 22: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'TRUE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 4, 'alias': 'true', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 23: {'origin': {'name': 'array', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': [False, True, False], '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 24: {'origin': {'name': 'object', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': [False, True, False], '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 25: {'origin': {'name': 'pair', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}, {'name': 'COLON', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 26: {'origin': {'name': 'array', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 27: {'origin': {'name': 'object', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}, {'name': '__object_star_1', '__type__': 'NonTerminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 28: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 29: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'NULL', 'filter_out': True, '__type__': 'Terminal'}], 'order': 6, 'alias': 'null', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 30: {'origin': {'name': 'start', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 31: {'origin': {'name': 'object', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}}
  1775. )
  1776. Shift = 0
  1777. Reduce = 1
  1778. def Lark_StandAlone(transformer=None, postlex=None):
  1779. return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)