This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2075 lines
81 KiB

  1. # The file was automatically generated by Lark v0.8.1
  2. #
  3. #
  4. # Lark Stand-alone Generator Tool
  5. # ----------------------------------
  6. # Generates a stand-alone LALR(1) parser with a standard lexer
  7. #
  8. # Git: https://github.com/erezsh/lark
  9. # Author: Erez Shinan (erezshin@gmail.com)
  10. #
  11. #
  12. # >>> LICENSE
  13. #
  14. # This tool and its generated code use a separate license from Lark,
  15. # and are subject to the terms of the Mozilla Public License, v. 2.0.
  16. # If a copy of the MPL was not distributed with this
  17. # file, You can obtain one at https://mozilla.org/MPL/2.0/.
  18. #
  19. # If you wish to purchase a commercial license for this tool and its
  20. # generated code, you may contact me via email or otherwise.
  21. #
  22. # If MPL2 is incompatible with your free or open-source project,
  23. # contact me and we'll work it out.
  24. #
  25. #
  26. import os
  27. from io import open
  28. class LarkError(Exception):
  29. pass
  30. class GrammarError(LarkError):
  31. pass
  32. class ParseError(LarkError):
  33. pass
  34. class LexError(LarkError):
  35. pass
  36. class UnexpectedEOF(ParseError):
  37. def __init__(self, expected):
  38. self.expected = expected
  39. message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
  40. super(UnexpectedEOF, self).__init__(message)
  41. class UnexpectedInput(LarkError):
  42. pos_in_stream = None
  43. def get_context(self, text, span=40):
  44. pos = self.pos_in_stream
  45. start = max(pos - span, 0)
  46. end = pos + span
  47. before = text[start:pos].rsplit('\n', 1)[-1]
  48. after = text[pos:end].split('\n', 1)[0]
  49. return before + after + '\n' + ' ' * len(before) + '^\n'
  50. def match_examples(self, parse_fn, examples):
  51. """ Given a parser instance and a dictionary mapping some label with
  52. some malformed syntax examples, it'll return the label for the
  53. example that bests matches the current error.
  54. """
  55. assert self.state is not None, "Not supported for this exception"
  56. candidate = None
  57. for label, example in examples.items():
  58. assert not isinstance(example, STRING_TYPE)
  59. for malformed in example:
  60. try:
  61. parse_fn(malformed)
  62. except UnexpectedInput as ut:
  63. if ut.state == self.state:
  64. try:
  65. if ut.token == self.token: # Try exact match first
  66. return label
  67. except AttributeError:
  68. pass
  69. if not candidate:
  70. candidate = label
  71. return candidate
  72. class UnexpectedCharacters(LexError, UnexpectedInput):
  73. def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
  74. message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
  75. self.line = line
  76. self.column = column
  77. self.allowed = allowed
  78. self.considered_tokens = considered_tokens
  79. self.pos_in_stream = lex_pos
  80. self.state = state
  81. message += '\n\n' + self.get_context(seq)
  82. if allowed:
  83. message += '\nExpecting: %s\n' % allowed
  84. if token_history:
  85. message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history)
  86. super(UnexpectedCharacters, self).__init__(message)
  87. class UnexpectedToken(ParseError, UnexpectedInput):
  88. def __init__(self, token, expected, considered_rules=None, state=None):
  89. self.token = token
  90. self.expected = expected # XXX str shouldn't necessary
  91. self.line = getattr(token, 'line', '?')
  92. self.column = getattr(token, 'column', '?')
  93. self.considered_rules = considered_rules
  94. self.state = state
  95. self.pos_in_stream = getattr(token, 'pos_in_stream', None)
  96. message = ("Unexpected token %r at line %s, column %s.\n"
  97. "Expected one of: \n\t* %s\n"
  98. % (token, self.line, self.column, '\n\t* '.join(self.expected)))
  99. super(UnexpectedToken, self).__init__(message)
  100. class VisitError(LarkError):
  101. def __init__(self, rule, obj, orig_exc):
  102. self.obj = obj
  103. self.orig_exc = orig_exc
  104. message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
  105. super(VisitError, self).__init__(message)
  106. def classify(seq, key=None, value=None):
  107. d = {}
  108. for item in seq:
  109. k = key(item) if (key is not None) else item
  110. v = value(item) if (value is not None) else item
  111. if k in d:
  112. d[k].append(v)
  113. else:
  114. d[k] = [v]
  115. return d
  116. def _deserialize(data, namespace, memo):
  117. if isinstance(data, dict):
  118. if '__type__' in data: # Object
  119. class_ = namespace[data['__type__']]
  120. return class_.deserialize(data, memo)
  121. elif '@' in data:
  122. return memo[data['@']]
  123. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  124. elif isinstance(data, list):
  125. return [_deserialize(value, namespace, memo) for value in data]
  126. return data
  127. class Serialize(object):
  128. def memo_serialize(self, types_to_memoize):
  129. memo = SerializeMemoizer(types_to_memoize)
  130. return self.serialize(memo), memo.serialize()
  131. def serialize(self, memo=None):
  132. if memo and memo.in_types(self):
  133. return {'@': memo.memoized.get(self)}
  134. fields = getattr(self, '__serialize_fields__')
  135. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  136. res['__type__'] = type(self).__name__
  137. postprocess = getattr(self, '_serialize', None)
  138. if postprocess:
  139. postprocess(res, memo)
  140. return res
  141. @classmethod
  142. def deserialize(cls, data, memo):
  143. namespace = getattr(cls, '__serialize_namespace__', {})
  144. namespace = {c.__name__:c for c in namespace}
  145. fields = getattr(cls, '__serialize_fields__')
  146. if '@' in data:
  147. return memo[data['@']]
  148. inst = cls.__new__(cls)
  149. for f in fields:
  150. try:
  151. setattr(inst, f, _deserialize(data[f], namespace, memo))
  152. except KeyError as e:
  153. raise KeyError("Cannot find key for class", cls, e)
  154. postprocess = getattr(inst, '_deserialize', None)
  155. if postprocess:
  156. postprocess()
  157. return inst
  158. class SerializeMemoizer(Serialize):
  159. __serialize_fields__ = 'memoized',
  160. def __init__(self, types_to_memoize):
  161. self.types_to_memoize = tuple(types_to_memoize)
  162. self.memoized = Enumerator()
  163. def in_types(self, value):
  164. return isinstance(value, self.types_to_memoize)
  165. def serialize(self):
  166. return _serialize(self.memoized.reversed(), None)
  167. @classmethod
  168. def deserialize(cls, data, namespace, memo):
  169. return _deserialize(data, namespace, memo)
  170. try:
  171. STRING_TYPE = basestring
  172. except NameError: # Python 3
  173. STRING_TYPE = str
  174. import types
  175. from functools import wraps, partial
  176. from contextlib import contextmanager
  177. Str = type(u'')
  178. try:
  179. classtype = types.ClassType # Python2
  180. except AttributeError:
  181. classtype = type # Python3
  182. def smart_decorator(f, create_decorator):
  183. if isinstance(f, types.FunctionType):
  184. return wraps(f)(create_decorator(f, True))
  185. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  186. return wraps(f)(create_decorator(f, False))
  187. elif isinstance(f, types.MethodType):
  188. return wraps(f)(create_decorator(f.__func__, True))
  189. elif isinstance(f, partial):
  190. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  191. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  192. else:
  193. return create_decorator(f.__func__.__call__, True)
  194. import sys, re
  195. Py36 = (sys.version_info[:2] >= (3, 6))
  196. import sre_parse
  197. import sre_constants
  198. def get_regexp_width(regexp):
  199. try:
  200. return [int(x) for x in sre_parse.parse(regexp).getwidth()]
  201. except sre_constants.error:
  202. raise ValueError(regexp)
  203. class Meta:
  204. def __init__(self):
  205. self.empty = True
  206. class Tree(object):
  207. def __init__(self, data, children, meta=None):
  208. self.data = data
  209. self.children = children
  210. self._meta = meta
  211. @property
  212. def meta(self):
  213. if self._meta is None:
  214. self._meta = Meta()
  215. return self._meta
  216. def __repr__(self):
  217. return 'Tree(%s, %s)' % (self.data, self.children)
  218. def _pretty_label(self):
  219. return self.data
  220. def _pretty(self, level, indent_str):
  221. if len(self.children) == 1 and not isinstance(self.children[0], Tree):
  222. return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
  223. l = [ indent_str*level, self._pretty_label(), '\n' ]
  224. for n in self.children:
  225. if isinstance(n, Tree):
  226. l += n._pretty(level+1, indent_str)
  227. else:
  228. l += [ indent_str*(level+1), '%s' % (n,), '\n' ]
  229. return l
  230. def pretty(self, indent_str=' '):
  231. return ''.join(self._pretty(0, indent_str))
  232. def __eq__(self, other):
  233. try:
  234. return self.data == other.data and self.children == other.children
  235. except AttributeError:
  236. return False
  237. def __ne__(self, other):
  238. return not (self == other)
  239. def __hash__(self):
  240. return hash((self.data, tuple(self.children)))
  241. def iter_subtrees(self):
  242. # TODO: Re-write as a more efficient version
  243. visited = set()
  244. q = [self]
  245. l = []
  246. while q:
  247. subtree = q.pop()
  248. l.append( subtree )
  249. if id(subtree) in visited:
  250. continue # already been here from another branch
  251. visited.add(id(subtree))
  252. q += [c for c in subtree.children if isinstance(c, Tree)]
  253. seen = set()
  254. for x in reversed(l):
  255. if id(x) not in seen:
  256. yield x
  257. seen.add(id(x))
  258. def find_pred(self, pred):
  259. "Find all nodes where pred(tree) == True"
  260. return filter(pred, self.iter_subtrees())
  261. def find_data(self, data):
  262. "Find all nodes where tree.data == data"
  263. return self.find_pred(lambda t: t.data == data)
  264. from inspect import getmembers, getmro
  265. class Discard(Exception):
  266. pass
  267. # Transformers
  268. class _Decoratable:
  269. @classmethod
  270. def _apply_decorator(cls, decorator, **kwargs):
  271. mro = getmro(cls)
  272. assert mro[0] is cls
  273. libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
  274. for name, value in getmembers(cls):
  275. # Make sure the function isn't inherited (unless it's overwritten)
  276. if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
  277. continue
  278. if not callable(cls.__dict__[name]):
  279. continue
  280. # Skip if v_args already applied (at the function level)
  281. if hasattr(cls.__dict__[name], 'vargs_applied'):
  282. continue
  283. static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
  284. setattr(cls, name, decorator(value, static=static, **kwargs))
  285. return cls
  286. def __class_getitem__(cls, _):
  287. return cls
  288. class Transformer(_Decoratable):
  289. """Visits the tree recursively, starting with the leaves and finally the root (bottom-up)
  290. Calls its methods (provided by user via inheritance) according to tree.data
  291. The returned value replaces the old one in the structure.
  292. Can be used to implement map or reduce.
  293. """
  294. __visit_tokens__ = True # For backwards compatibility
  295. def __init__(self, visit_tokens=True):
  296. self.__visit_tokens__ = visit_tokens
  297. def _call_userfunc(self, tree, new_children=None):
  298. # Assumes tree is already transformed
  299. children = new_children if new_children is not None else tree.children
  300. try:
  301. f = getattr(self, tree.data)
  302. except AttributeError:
  303. return self.__default__(tree.data, children, tree.meta)
  304. else:
  305. try:
  306. wrapper = getattr(f, 'visit_wrapper', None)
  307. if wrapper is not None:
  308. return f.visit_wrapper(f, tree.data, children, tree.meta)
  309. else:
  310. return f(children)
  311. except (GrammarError, Discard):
  312. raise
  313. except Exception as e:
  314. raise VisitError(tree.data, tree, e)
  315. def _call_userfunc_token(self, token):
  316. try:
  317. f = getattr(self, token.type)
  318. except AttributeError:
  319. return self.__default_token__(token)
  320. else:
  321. try:
  322. return f(token)
  323. except (GrammarError, Discard):
  324. raise
  325. except Exception as e:
  326. raise VisitError(token.type, token, e)
  327. def _transform_children(self, children):
  328. for c in children:
  329. try:
  330. if isinstance(c, Tree):
  331. yield self._transform_tree(c)
  332. elif self.__visit_tokens__ and isinstance(c, Token):
  333. yield self._call_userfunc_token(c)
  334. else:
  335. yield c
  336. except Discard:
  337. pass
  338. def _transform_tree(self, tree):
  339. children = list(self._transform_children(tree.children))
  340. return self._call_userfunc(tree, children)
  341. def transform(self, tree):
  342. return self._transform_tree(tree)
  343. def __mul__(self, other):
  344. return TransformerChain(self, other)
  345. def __default__(self, data, children, meta):
  346. "Default operation on tree (for override)"
  347. return Tree(data, children, meta)
  348. def __default_token__(self, token):
  349. "Default operation on token (for override)"
  350. return token
  351. class InlineTransformer(Transformer): # XXX Deprecated
  352. def _call_userfunc(self, tree, new_children=None):
  353. # Assumes tree is already transformed
  354. children = new_children if new_children is not None else tree.children
  355. try:
  356. f = getattr(self, tree.data)
  357. except AttributeError:
  358. return self.__default__(tree.data, children, tree.meta)
  359. else:
  360. return f(*children)
  361. class TransformerChain(object):
  362. def __init__(self, *transformers):
  363. self.transformers = transformers
  364. def transform(self, tree):
  365. for t in self.transformers:
  366. tree = t.transform(tree)
  367. return tree
  368. def __mul__(self, other):
  369. return TransformerChain(*self.transformers + (other,))
  370. class Transformer_InPlace(Transformer):
  371. "Non-recursive. Changes the tree in-place instead of returning new instances"
  372. def _transform_tree(self, tree): # Cancel recursion
  373. return self._call_userfunc(tree)
  374. def transform(self, tree):
  375. for subtree in tree.iter_subtrees():
  376. subtree.children = list(self._transform_children(subtree.children))
  377. return self._transform_tree(tree)
  378. class Transformer_InPlaceRecursive(Transformer):
  379. "Recursive. Changes the tree in-place instead of returning new instances"
  380. def _transform_tree(self, tree):
  381. tree.children = list(self._transform_children(tree.children))
  382. return self._call_userfunc(tree)
  383. # Visitors
  384. class VisitorBase:
  385. def _call_userfunc(self, tree):
  386. return getattr(self, tree.data, self.__default__)(tree)
  387. def __default__(self, tree):
  388. "Default operation on tree (for override)"
  389. return tree
  390. def __class_getitem__(cls, _):
  391. return cls
  392. class Visitor(VisitorBase):
  393. """Bottom-up visitor, non-recursive
  394. Visits the tree, starting with the leaves and finally the root (bottom-up)
  395. Calls its methods (provided by user via inheritance) according to tree.data
  396. """
  397. def visit(self, tree):
  398. for subtree in tree.iter_subtrees():
  399. self._call_userfunc(subtree)
  400. return tree
  401. def visit_topdown(self,tree):
  402. for subtree in tree.iter_subtrees_topdown():
  403. self._call_userfunc(subtree)
  404. return tree
  405. class Visitor_Recursive(VisitorBase):
  406. """Bottom-up visitor, recursive
  407. Visits the tree, starting with the leaves and finally the root (bottom-up)
  408. Calls its methods (provided by user via inheritance) according to tree.data
  409. """
  410. def visit(self, tree):
  411. for child in tree.children:
  412. if isinstance(child, Tree):
  413. self.visit(child)
  414. self._call_userfunc(tree)
  415. return tree
  416. def visit_topdown(self,tree):
  417. self._call_userfunc(tree)
  418. for child in tree.children:
  419. if isinstance(child, Tree):
  420. self.visit_topdown(child)
  421. return tree
  422. def visit_children_decor(func):
  423. "See Interpreter"
  424. @wraps(func)
  425. def inner(cls, tree):
  426. values = cls.visit_children(tree)
  427. return func(cls, values)
  428. return inner
  429. class Interpreter(_Decoratable):
  430. """Top-down visitor, recursive
  431. Visits the tree, starting with the root and finally the leaves (top-down)
  432. Calls its methods (provided by user via inheritance) according to tree.data
  433. Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches.
  434. The user has to explicitly call visit_children, or use the @visit_children_decor
  435. """
  436. def visit(self, tree):
  437. f = getattr(self, tree.data)
  438. wrapper = getattr(f, 'visit_wrapper', None)
  439. if wrapper is not None:
  440. return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
  441. else:
  442. return f(tree)
  443. def visit_children(self, tree):
  444. return [self.visit(child) if isinstance(child, Tree) else child
  445. for child in tree.children]
  446. def __getattr__(self, name):
  447. return self.__default__
  448. def __default__(self, tree):
  449. return self.visit_children(tree)
  450. # Decorators
  451. def _apply_decorator(obj, decorator, **kwargs):
  452. try:
  453. _apply = obj._apply_decorator
  454. except AttributeError:
  455. return decorator(obj, **kwargs)
  456. else:
  457. return _apply(decorator, **kwargs)
  458. def _inline_args__func(func):
  459. @wraps(func)
  460. def create_decorator(_f, with_self):
  461. if with_self:
  462. def f(self, children):
  463. return _f(self, *children)
  464. else:
  465. def f(self, children):
  466. return _f(*children)
  467. return f
  468. return smart_decorator(func, create_decorator)
  469. def inline_args(obj): # XXX Deprecated
  470. return _apply_decorator(obj, _inline_args__func)
  471. def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
  472. def create_decorator(_f, with_self):
  473. if with_self:
  474. def f(self, *args, **kwargs):
  475. return _f(self, *args, **kwargs)
  476. else:
  477. def f(self, *args, **kwargs):
  478. return _f(*args, **kwargs)
  479. return f
  480. if static:
  481. f = wraps(func)(create_decorator(func, False))
  482. else:
  483. f = smart_decorator(func, create_decorator)
  484. f.vargs_applied = True
  485. f.visit_wrapper = visit_wrapper
  486. return f
  487. def _vargs_inline(f, data, children, meta):
  488. return f(*children)
  489. def _vargs_meta_inline(f, data, children, meta):
  490. return f(meta, *children)
  491. def _vargs_meta(f, data, children, meta):
  492. return f(children, meta) # TODO swap these for consistency? Backwards incompatible!
  493. def _vargs_tree(f, data, children, meta):
  494. return f(Tree(data, children, meta))
  495. def v_args(inline=False, meta=False, tree=False, wrapper=None):
  496. "A convenience decorator factory, for modifying the behavior of user-supplied visitor methods"
  497. if tree and (meta or inline):
  498. raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
  499. func = None
  500. if meta:
  501. if inline:
  502. func = _vargs_meta_inline
  503. else:
  504. func = _vargs_meta
  505. elif inline:
  506. func = _vargs_inline
  507. elif tree:
  508. func = _vargs_tree
  509. if wrapper is not None:
  510. if func is not None:
  511. raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
  512. func = wrapper
  513. def _visitor_args_dec(obj):
  514. return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func)
  515. return _visitor_args_dec
  516. class Indenter:
  517. def __init__(self):
  518. self.paren_level = None
  519. self.indent_level = None
  520. assert self.tab_len > 0
  521. def handle_NL(self, token):
  522. if self.paren_level > 0:
  523. return
  524. yield token
  525. indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
  526. indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
  527. if indent > self.indent_level[-1]:
  528. self.indent_level.append(indent)
  529. yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
  530. else:
  531. while indent < self.indent_level[-1]:
  532. self.indent_level.pop()
  533. yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
  534. assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1])
  535. def _process(self, stream):
  536. for token in stream:
  537. if token.type == self.NL_type:
  538. for t in self.handle_NL(token):
  539. yield t
  540. else:
  541. yield token
  542. if token.type in self.OPEN_PAREN_types:
  543. self.paren_level += 1
  544. elif token.type in self.CLOSE_PAREN_types:
  545. self.paren_level -= 1
  546. assert self.paren_level >= 0
  547. while len(self.indent_level) > 1:
  548. self.indent_level.pop()
  549. yield Token(self.DEDENT_type, '')
  550. assert self.indent_level == [0], self.indent_level
  551. def process(self, stream):
  552. self.paren_level = 0
  553. self.indent_level = [0]
  554. return self._process(stream)
  555. # XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
  556. @property
  557. def always_accept(self):
  558. return (self.NL_type,)
  559. class Symbol(Serialize):
  560. __slots__ = ('name',)
  561. is_term = NotImplemented
  562. def __init__(self, name):
  563. self.name = name
  564. def __eq__(self, other):
  565. assert isinstance(other, Symbol), other
  566. return self.is_term == other.is_term and self.name == other.name
  567. def __ne__(self, other):
  568. return not (self == other)
  569. def __hash__(self):
  570. return hash(self.name)
  571. def __repr__(self):
  572. return '%s(%r)' % (type(self).__name__, self.name)
  573. fullrepr = property(__repr__)
  574. class Terminal(Symbol):
  575. __serialize_fields__ = 'name', 'filter_out'
  576. is_term = True
  577. def __init__(self, name, filter_out=False):
  578. self.name = name
  579. self.filter_out = filter_out
  580. @property
  581. def fullrepr(self):
  582. return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
  583. class NonTerminal(Symbol):
  584. __serialize_fields__ = 'name',
  585. is_term = False
  586. class RuleOptions(Serialize):
  587. __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices'
  588. def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()):
  589. self.keep_all_tokens = keep_all_tokens
  590. self.expand1 = expand1
  591. self.priority = priority
  592. self.empty_indices = empty_indices
  593. def __repr__(self):
  594. return 'RuleOptions(%r, %r, %r)' % (
  595. self.keep_all_tokens,
  596. self.expand1,
  597. self.priority,
  598. )
  599. class Rule(Serialize):
  600. """
  601. origin : a symbol
  602. expansion : a list of symbols
  603. order : index of this expansion amongst all rules of the same name
  604. """
  605. __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
  606. __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
  607. __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
  608. def __init__(self, origin, expansion, order=0, alias=None, options=None):
  609. self.origin = origin
  610. self.expansion = expansion
  611. self.alias = alias
  612. self.order = order
  613. self.options = options or RuleOptions()
  614. self._hash = hash((self.origin, tuple(self.expansion)))
  615. def _deserialize(self):
  616. self._hash = hash((self.origin, tuple(self.expansion)))
  617. def __str__(self):
  618. return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
  619. def __repr__(self):
  620. return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
  621. def __hash__(self):
  622. return self._hash
  623. def __eq__(self, other):
  624. if not isinstance(other, Rule):
  625. return False
  626. return self.origin == other.origin and self.expansion == other.expansion
  627. class Pattern(Serialize):
  628. def __init__(self, value, flags=()):
  629. self.value = value
  630. self.flags = frozenset(flags)
  631. def __repr__(self):
  632. return repr(self.to_regexp())
  633. # Pattern Hashing assumes all subclasses have a different priority!
  634. def __hash__(self):
  635. return hash((type(self), self.value, self.flags))
  636. def __eq__(self, other):
  637. return type(self) == type(other) and self.value == other.value and self.flags == other.flags
  638. def to_regexp(self):
  639. raise NotImplementedError()
  640. if Py36:
  641. # Python 3.6 changed syntax for flags in regular expression
  642. def _get_flags(self, value):
  643. for f in self.flags:
  644. value = ('(?%s:%s)' % (f, value))
  645. return value
  646. else:
  647. def _get_flags(self, value):
  648. for f in self.flags:
  649. value = ('(?%s)' % f) + value
  650. return value
  651. class PatternStr(Pattern):
  652. __serialize_fields__ = 'value', 'flags'
  653. type = "str"
  654. def to_regexp(self):
  655. return self._get_flags(re.escape(self.value))
  656. @property
  657. def min_width(self):
  658. return len(self.value)
  659. max_width = min_width
  660. class PatternRE(Pattern):
  661. __serialize_fields__ = 'value', 'flags', '_width'
  662. type = "re"
  663. def to_regexp(self):
  664. return self._get_flags(self.value)
  665. _width = None
  666. def _get_width(self):
  667. if self._width is None:
  668. self._width = get_regexp_width(self.to_regexp())
  669. return self._width
  670. @property
  671. def min_width(self):
  672. return self._get_width()[0]
  673. @property
  674. def max_width(self):
  675. return self._get_width()[1]
  676. class TerminalDef(Serialize):
  677. __serialize_fields__ = 'name', 'pattern', 'priority'
  678. __serialize_namespace__ = PatternStr, PatternRE
  679. def __init__(self, name, pattern, priority=1):
  680. assert isinstance(pattern, Pattern), pattern
  681. self.name = name
  682. self.pattern = pattern
  683. self.priority = priority
  684. def __repr__(self):
  685. return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
  686. class Token(Str):
  687. __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
  688. def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
  689. try:
  690. self = super(Token, cls).__new__(cls, value)
  691. except UnicodeDecodeError:
  692. value = value.decode('latin1')
  693. self = super(Token, cls).__new__(cls, value)
  694. self.type = type_
  695. self.pos_in_stream = pos_in_stream
  696. self.value = value
  697. self.line = line
  698. self.column = column
  699. self.end_line = end_line
  700. self.end_column = end_column
  701. self.end_pos = end_pos
  702. return self
  703. def update(self, type_=None, value=None):
  704. return Token.new_borrow_pos(
  705. type_ if type_ is not None else self.type,
  706. value if value is not None else self.value,
  707. self
  708. )
  709. @classmethod
  710. def new_borrow_pos(cls, type_, value, borrow_t):
  711. return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
  712. def __reduce__(self):
  713. return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
  714. def __repr__(self):
  715. return 'Token(%s, %r)' % (self.type, self.value)
  716. def __deepcopy__(self, memo):
  717. return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
  718. def __eq__(self, other):
  719. if isinstance(other, Token) and self.type != other.type:
  720. return False
  721. return Str.__eq__(self, other)
  722. __hash__ = Str.__hash__
  723. class LineCounter:
  724. def __init__(self):
  725. self.newline_char = '\n'
  726. self.char_pos = 0
  727. self.line = 1
  728. self.column = 1
  729. self.line_start_pos = 0
  730. def feed(self, token, test_newline=True):
  731. """Consume a token and calculate the new line & column.
  732. As an optional optimization, set test_newline=False is token doesn't contain a newline.
  733. """
  734. if test_newline:
  735. newlines = token.count(self.newline_char)
  736. if newlines:
  737. self.line += newlines
  738. self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
  739. self.char_pos += len(token)
  740. self.column = self.char_pos - self.line_start_pos + 1
  741. class _Lex:
  742. "Built to serve both Lexer and ContextualLexer"
  743. def __init__(self, lexer, state=None):
  744. self.lexer = lexer
  745. self.state = state
  746. def lex(self, stream, newline_types, ignore_types):
  747. newline_types = frozenset(newline_types)
  748. ignore_types = frozenset(ignore_types)
  749. line_ctr = LineCounter()
  750. last_token = None
  751. while line_ctr.char_pos < len(stream):
  752. lexer = self.lexer
  753. res = lexer.match(stream, line_ctr.char_pos)
  754. if not res:
  755. allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - ignore_types
  756. if not allowed:
  757. allowed = {"<END-OF-FILE>"}
  758. raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
  759. value, type_ = res
  760. if type_ not in ignore_types:
  761. t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  762. line_ctr.feed(value, type_ in newline_types)
  763. t.end_line = line_ctr.line
  764. t.end_column = line_ctr.column
  765. t.end_pos = line_ctr.char_pos
  766. if t.type in lexer.callback:
  767. t = lexer.callback[t.type](t)
  768. if not isinstance(t, Token):
  769. raise ValueError("Callbacks must return a token (returned %r)" % t)
  770. yield t
  771. last_token = t
  772. else:
  773. if type_ in lexer.callback:
  774. t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  775. lexer.callback[type_](t2)
  776. line_ctr.feed(value, type_ in newline_types)
  777. class UnlessCallback:
  778. def __init__(self, mres):
  779. self.mres = mres
  780. def __call__(self, t):
  781. for mre, type_from_index in self.mres:
  782. m = mre.match(t.value)
  783. if m:
  784. t.type = type_from_index[m.lastindex]
  785. break
  786. return t
  787. class CallChain:
  788. def __init__(self, callback1, callback2, cond):
  789. self.callback1 = callback1
  790. self.callback2 = callback2
  791. self.cond = cond
  792. def __call__(self, t):
  793. t2 = self.callback1(t)
  794. return self.callback2(t) if self.cond(t2) else t2
  795. def _create_unless(terminals):
  796. tokens_by_type = classify(terminals, lambda t: type(t.pattern))
  797. assert len(tokens_by_type) <= 2, tokens_by_type.keys()
  798. embedded_strs = set()
  799. callback = {}
  800. for retok in tokens_by_type.get(PatternRE, []):
  801. unless = [] # {}
  802. for strtok in tokens_by_type.get(PatternStr, []):
  803. if strtok.priority > retok.priority:
  804. continue
  805. s = strtok.pattern.value
  806. m = re.match(retok.pattern.to_regexp(), s)
  807. if m and m.group(0) == s:
  808. unless.append(strtok)
  809. if strtok.pattern.flags <= retok.pattern.flags:
  810. embedded_strs.add(strtok)
  811. if unless:
  812. callback[retok.name] = UnlessCallback(build_mres(unless, match_whole=True))
  813. terminals = [t for t in terminals if t not in embedded_strs]
  814. return terminals, callback
  815. def _build_mres(terminals, max_size, match_whole):
  816. # Python sets an unreasonable group limit (currently 100) in its re module
  817. # Worse, the only way to know we reached it is by catching an AssertionError!
  818. # This function recursively tries less and less groups until it's successful.
  819. postfix = '$' if match_whole else ''
  820. mres = []
  821. while terminals:
  822. try:
  823. mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]))
  824. except AssertionError: # Yes, this is what Python provides us.. :/
  825. return _build_mres(terminals, max_size//2, match_whole)
  826. # terms_from_name = {t.name: t for t in terminals[:max_size]}
  827. mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
  828. terminals = terminals[max_size:]
  829. return mres
  830. def build_mres(terminals, match_whole=False):
  831. return _build_mres(terminals, len(terminals), match_whole)
  832. def _regexp_has_newline(r):
  833. r"""Expressions that may indicate newlines in a regexp:
  834. - newlines (\n)
  835. - escaped newline (\\n)
  836. - anything but ([^...])
  837. - any-char (.) when the flag (?s) exists
  838. - spaces (\s)
  839. """
  840. return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
  841. class Lexer(object):
  842. """Lexer interface
  843. Method Signatures:
  844. lex(self, stream) -> Iterator[Token]
  845. """
  846. lex = NotImplemented
  847. class TraditionalLexer(Lexer):
  848. def __init__(self, terminals, ignore=(), user_callbacks={}):
  849. assert all(isinstance(t, TerminalDef) for t in terminals), terminals
  850. terminals = list(terminals)
  851. # Sanitization
  852. for t in terminals:
  853. try:
  854. re.compile(t.pattern.to_regexp())
  855. except re.error:
  856. raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
  857. if t.pattern.min_width == 0:
  858. raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
  859. assert set(ignore) <= {t.name for t in terminals}
  860. # Init
  861. self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())]
  862. self.ignore_types = list(ignore)
  863. terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
  864. self.terminals = terminals
  865. self.user_callbacks = user_callbacks
  866. self.build()
  867. def build(self):
  868. terminals, self.callback = _create_unless(self.terminals)
  869. assert all(self.callback.values())
  870. for type_, f in self.user_callbacks.items():
  871. if type_ in self.callback:
  872. # Already a callback there, probably UnlessCallback
  873. self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
  874. else:
  875. self.callback[type_] = f
  876. self.mres = build_mres(terminals)
  877. def match(self, stream, pos):
  878. for mre, type_from_index in self.mres:
  879. m = mre.match(stream, pos)
  880. if m:
  881. return m.group(0), type_from_index[m.lastindex]
  882. def lex(self, stream):
  883. return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
  884. class ContextualLexer(Lexer):
  885. def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
  886. tokens_by_name = {}
  887. for t in terminals:
  888. assert t.name not in tokens_by_name, t
  889. tokens_by_name[t.name] = t
  890. lexer_by_tokens = {}
  891. self.lexers = {}
  892. for state, accepts in states.items():
  893. key = frozenset(accepts)
  894. try:
  895. lexer = lexer_by_tokens[key]
  896. except KeyError:
  897. accepts = set(accepts) | set(ignore) | set(always_accept)
  898. state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name]
  899. lexer = TraditionalLexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
  900. lexer_by_tokens[key] = lexer
  901. self.lexers[state] = lexer
  902. self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks)
  903. def lex(self, stream, get_parser_state):
  904. parser_state = get_parser_state()
  905. l = _Lex(self.lexers[parser_state], parser_state)
  906. try:
  907. for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
  908. yield x
  909. parser_state = get_parser_state()
  910. l.lexer = self.lexers[parser_state]
  911. l.state = parser_state # For debug only, no need to worry about multithreading
  912. except UnexpectedCharacters as e:
  913. # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined,
  914. # but not in the current context.
  915. # This tests the input against the global context, to provide a nicer error.
  916. root_match = self.root_lexer.match(stream, e.pos_in_stream)
  917. if not root_match:
  918. raise
  919. value, type_ = root_match
  920. t = Token(type_, value, e.pos_in_stream, e.line, e.column)
  921. raise UnexpectedToken(t, e.allowed, state=e.state)
  922. class LexerConf(Serialize):
  923. __serialize_fields__ = 'tokens', 'ignore'
  924. __serialize_namespace__ = TerminalDef,
  925. def __init__(self, tokens, ignore=(), postlex=None, callbacks=None):
  926. self.tokens = tokens
  927. self.ignore = ignore
  928. self.postlex = postlex
  929. self.callbacks = callbacks or {}
  930. def _deserialize(self):
  931. self.callbacks = {} # TODO
  932. from functools import partial, wraps
  933. from itertools import repeat, product
  934. class ExpandSingleChild:
  935. def __init__(self, node_builder):
  936. self.node_builder = node_builder
  937. def __call__(self, children):
  938. if len(children) == 1:
  939. return children[0]
  940. else:
  941. return self.node_builder(children)
  942. class PropagatePositions:
  943. def __init__(self, node_builder):
  944. self.node_builder = node_builder
  945. def __call__(self, children):
  946. res = self.node_builder(children)
  947. if isinstance(res, Tree):
  948. for c in children:
  949. if isinstance(c, Tree) and not c.meta.empty:
  950. res.meta.line = c.meta.line
  951. res.meta.column = c.meta.column
  952. res.meta.start_pos = c.meta.start_pos
  953. res.meta.empty = False
  954. break
  955. elif isinstance(c, Token):
  956. res.meta.line = c.line
  957. res.meta.column = c.column
  958. res.meta.start_pos = c.pos_in_stream
  959. res.meta.empty = False
  960. break
  961. for c in reversed(children):
  962. if isinstance(c, Tree) and not c.meta.empty:
  963. res.meta.end_line = c.meta.end_line
  964. res.meta.end_column = c.meta.end_column
  965. res.meta.end_pos = c.meta.end_pos
  966. res.meta.empty = False
  967. break
  968. elif isinstance(c, Token):
  969. res.meta.end_line = c.end_line
  970. res.meta.end_column = c.end_column
  971. res.meta.end_pos = c.end_pos
  972. res.meta.empty = False
  973. break
  974. return res
  975. class ChildFilter:
  976. def __init__(self, to_include, append_none, node_builder):
  977. self.node_builder = node_builder
  978. self.to_include = to_include
  979. self.append_none = append_none
  980. def __call__(self, children):
  981. filtered = []
  982. for i, to_expand, add_none in self.to_include:
  983. if add_none:
  984. filtered += [None] * add_none
  985. if to_expand:
  986. filtered += children[i].children
  987. else:
  988. filtered.append(children[i])
  989. if self.append_none:
  990. filtered += [None] * self.append_none
  991. return self.node_builder(filtered)
  992. class ChildFilterLALR(ChildFilter):
  993. "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
  994. def __call__(self, children):
  995. filtered = []
  996. for i, to_expand, add_none in self.to_include:
  997. if add_none:
  998. filtered += [None] * add_none
  999. if to_expand:
  1000. if filtered:
  1001. filtered += children[i].children
  1002. else: # Optimize for left-recursion
  1003. filtered = children[i].children
  1004. else:
  1005. filtered.append(children[i])
  1006. if self.append_none:
  1007. filtered += [None] * self.append_none
  1008. return self.node_builder(filtered)
  1009. class ChildFilterLALR_NoPlaceholders(ChildFilter):
  1010. "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
  1011. def __init__(self, to_include, node_builder):
  1012. self.node_builder = node_builder
  1013. self.to_include = to_include
  1014. def __call__(self, children):
  1015. filtered = []
  1016. for i, to_expand in self.to_include:
  1017. if to_expand:
  1018. if filtered:
  1019. filtered += children[i].children
  1020. else: # Optimize for left-recursion
  1021. filtered = children[i].children
  1022. else:
  1023. filtered.append(children[i])
  1024. return self.node_builder(filtered)
  1025. def _should_expand(sym):
  1026. return not sym.is_term and sym.name.startswith('_')
  1027. def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
  1028. # Prepare empty_indices as: How many Nones to insert at each index?
  1029. if _empty_indices:
  1030. assert _empty_indices.count(False) == len(expansion)
  1031. s = ''.join(str(int(b)) for b in _empty_indices)
  1032. empty_indices = [len(ones) for ones in s.split('0')]
  1033. assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
  1034. else:
  1035. empty_indices = [0] * (len(expansion)+1)
  1036. to_include = []
  1037. nones_to_add = 0
  1038. for i, sym in enumerate(expansion):
  1039. nones_to_add += empty_indices[i]
  1040. if keep_all_tokens or not (sym.is_term and sym.filter_out):
  1041. to_include.append((i, _should_expand(sym), nones_to_add))
  1042. nones_to_add = 0
  1043. nones_to_add += empty_indices[len(expansion)]
  1044. if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
  1045. if _empty_indices or ambiguous:
  1046. return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
  1047. else:
  1048. # LALR without placeholders
  1049. return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
  1050. class AmbiguousExpander:
  1051. """Deal with the case where we're expanding children ('_rule') into a parent but the children
  1052. are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
  1053. ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children
  1054. into the right parents in the right places, essentially shifting the ambiguiuty up the tree."""
  1055. def __init__(self, to_expand, tree_class, node_builder):
  1056. self.node_builder = node_builder
  1057. self.tree_class = tree_class
  1058. self.to_expand = to_expand
  1059. def __call__(self, children):
  1060. def _is_ambig_tree(child):
  1061. return hasattr(child, 'data') and child.data == '_ambig'
  1062. #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
  1063. # All children of an _ambig node should be a derivation of that ambig node, hence
  1064. # it is safe to assume that if we see an _ambig node nested within an ambig node
  1065. # it is safe to simply expand it into the parent _ambig node as an alternative derivation.
  1066. ambiguous = []
  1067. for i, child in enumerate(children):
  1068. if _is_ambig_tree(child):
  1069. if i in self.to_expand:
  1070. ambiguous.append(i)
  1071. to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)]
  1072. child.expand_kids_by_index(*to_expand)
  1073. if not ambiguous:
  1074. return self.node_builder(children)
  1075. expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ]
  1076. return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
  1077. def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
  1078. to_expand = [i for i, sym in enumerate(expansion)
  1079. if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
  1080. if to_expand:
  1081. return partial(AmbiguousExpander, to_expand, tree_class)
  1082. def ptb_inline_args(func):
  1083. @wraps(func)
  1084. def f(children):
  1085. return func(*children)
  1086. return f
  1087. def inplace_transformer(func):
  1088. @wraps(func)
  1089. def f(children):
  1090. # function name in a Transformer is a rule name.
  1091. tree = Tree(func.__name__, children)
  1092. return func(tree)
  1093. return f
  1094. def apply_visit_wrapper(func, name, wrapper):
  1095. if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
  1096. raise NotImplementedError("Meta args not supported for internal transformer")
  1097. @wraps(func)
  1098. def f(children):
  1099. return wrapper(func, name, children, None)
  1100. return f
  1101. class ParseTreeBuilder:
  1102. def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
  1103. self.tree_class = tree_class
  1104. self.propagate_positions = propagate_positions
  1105. self.always_keep_all_tokens = keep_all_tokens
  1106. self.ambiguous = ambiguous
  1107. self.maybe_placeholders = maybe_placeholders
  1108. self.rule_builders = list(self._init_builders(rules))
  1109. def _init_builders(self, rules):
  1110. for rule in rules:
  1111. options = rule.options
  1112. keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
  1113. expand_single_child = options.expand1
  1114. wrapper_chain = list(filter(None, [
  1115. (expand_single_child and not rule.alias) and ExpandSingleChild,
  1116. maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
  1117. self.propagate_positions and PropagatePositions,
  1118. self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
  1119. ]))
  1120. yield rule, wrapper_chain
  1121. def create_callback(self, transformer=None):
  1122. callbacks = {}
  1123. for rule, wrapper_chain in self.rule_builders:
  1124. user_callback_name = rule.alias or rule.origin.name
  1125. try:
  1126. f = getattr(transformer, user_callback_name)
  1127. # XXX InlineTransformer is deprecated!
  1128. wrapper = getattr(f, 'visit_wrapper', None)
  1129. if wrapper is not None:
  1130. f = apply_visit_wrapper(f, user_callback_name, wrapper)
  1131. else:
  1132. if isinstance(transformer, InlineTransformer):
  1133. f = ptb_inline_args(f)
  1134. elif isinstance(transformer, Transformer_InPlace):
  1135. f = inplace_transformer(f)
  1136. except AttributeError:
  1137. f = partial(self.tree_class, user_callback_name)
  1138. for w in wrapper_chain:
  1139. f = w(f)
  1140. if rule in callbacks:
  1141. raise GrammarError("Rule '%s' already exists" % (rule,))
  1142. callbacks[rule] = f
  1143. return callbacks
  1144. class LALR_Parser(object):
  1145. def __init__(self, parser_conf, debug=False):
  1146. assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
  1147. analysis = LALR_Analyzer(parser_conf, debug=debug)
  1148. analysis.compute_lalr()
  1149. callbacks = parser_conf.callbacks
  1150. self._parse_table = analysis.parse_table
  1151. self.parser_conf = parser_conf
  1152. self.parser = _Parser(analysis.parse_table, callbacks)
  1153. @classmethod
  1154. def deserialize(cls, data, memo, callbacks):
  1155. inst = cls.__new__(cls)
  1156. inst._parse_table = IntParseTable.deserialize(data, memo)
  1157. inst.parser = _Parser(inst._parse_table, callbacks)
  1158. return inst
  1159. def serialize(self, memo):
  1160. return self._parse_table.serialize(memo)
  1161. def parse(self, *args):
  1162. return self.parser.parse(*args)
  1163. class _Parser:
  1164. def __init__(self, parse_table, callbacks):
  1165. self.states = parse_table.states
  1166. self.start_states = parse_table.start_states
  1167. self.end_states = parse_table.end_states
  1168. self.callbacks = callbacks
  1169. def parse(self, seq, start, set_state=None):
  1170. token = None
  1171. stream = iter(seq)
  1172. states = self.states
  1173. start_state = self.start_states[start]
  1174. end_state = self.end_states[start]
  1175. state_stack = [start_state]
  1176. value_stack = []
  1177. if set_state: set_state(start_state)
  1178. def get_action(token):
  1179. state = state_stack[-1]
  1180. try:
  1181. return states[state][token.type]
  1182. except KeyError:
  1183. expected = [s for s in states[state].keys() if s.isupper()]
  1184. raise UnexpectedToken(token, expected, state=state)
  1185. def reduce(rule):
  1186. size = len(rule.expansion)
  1187. if size:
  1188. s = value_stack[-size:]
  1189. del state_stack[-size:]
  1190. del value_stack[-size:]
  1191. else:
  1192. s = []
  1193. value = self.callbacks[rule](s)
  1194. _action, new_state = states[state_stack[-1]][rule.origin.name]
  1195. assert _action is Shift
  1196. state_stack.append(new_state)
  1197. value_stack.append(value)
  1198. # Main LALR-parser loop
  1199. for token in stream:
  1200. while True:
  1201. action, arg = get_action(token)
  1202. assert arg != end_state
  1203. if action is Shift:
  1204. state_stack.append(arg)
  1205. value_stack.append(token)
  1206. if set_state: set_state(arg)
  1207. break # next token
  1208. else:
  1209. reduce(arg)
  1210. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  1211. while True:
  1212. _action, arg = get_action(token)
  1213. assert(_action is Reduce)
  1214. reduce(arg)
  1215. if state_stack[-1] == end_state:
  1216. return value_stack[-1]
  1217. class Action:
  1218. def __init__(self, name):
  1219. self.name = name
  1220. def __str__(self):
  1221. return self.name
  1222. def __repr__(self):
  1223. return str(self)
  1224. Shift = Action('Shift')
  1225. Reduce = Action('Reduce')
  1226. class ParseTable:
  1227. def __init__(self, states, start_states, end_states):
  1228. self.states = states
  1229. self.start_states = start_states
  1230. self.end_states = end_states
  1231. def serialize(self, memo):
  1232. tokens = Enumerator()
  1233. rules = Enumerator()
  1234. states = {
  1235. state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
  1236. for token, (action, arg) in actions.items()}
  1237. for state, actions in self.states.items()
  1238. }
  1239. return {
  1240. 'tokens': tokens.reversed(),
  1241. 'states': states,
  1242. 'start_states': self.start_states,
  1243. 'end_states': self.end_states,
  1244. }
  1245. @classmethod
  1246. def deserialize(cls, data, memo):
  1247. tokens = data['tokens']
  1248. states = {
  1249. state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
  1250. for token, (action, arg) in actions.items()}
  1251. for state, actions in data['states'].items()
  1252. }
  1253. return cls(states, data['start_states'], data['end_states'])
  1254. class IntParseTable(ParseTable):
  1255. @classmethod
  1256. def from_ParseTable(cls, parse_table):
  1257. enum = list(parse_table.states)
  1258. state_to_idx = {s:i for i,s in enumerate(enum)}
  1259. int_states = {}
  1260. for s, la in parse_table.states.items():
  1261. la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
  1262. for k,v in la.items()}
  1263. int_states[ state_to_idx[s] ] = la
  1264. start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
  1265. end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
  1266. return cls(int_states, start_states, end_states)
  1267. def get_frontend(parser, lexer):
  1268. if parser=='lalr':
  1269. if lexer is None:
  1270. raise ValueError('The LALR parser requires use of a lexer')
  1271. elif lexer == 'standard':
  1272. return LALR_TraditionalLexer
  1273. elif lexer == 'contextual':
  1274. return LALR_ContextualLexer
  1275. elif issubclass(lexer, Lexer):
  1276. return partial(LALR_CustomLexer, lexer)
  1277. else:
  1278. raise ValueError('Unknown lexer: %s' % lexer)
  1279. elif parser=='earley':
  1280. if lexer=='standard':
  1281. return Earley
  1282. elif lexer=='dynamic':
  1283. return XEarley
  1284. elif lexer=='dynamic_complete':
  1285. return XEarley_CompleteLex
  1286. elif lexer=='contextual':
  1287. raise ValueError('The Earley parser does not support the contextual parser')
  1288. else:
  1289. raise ValueError('Unknown lexer: %s' % lexer)
  1290. elif parser == 'cyk':
  1291. if lexer == 'standard':
  1292. return CYK
  1293. else:
  1294. raise ValueError('CYK parser requires using standard parser.')
  1295. else:
  1296. raise ValueError('Unknown parser: %s' % parser)
  1297. class _ParserFrontend(Serialize):
  1298. def _parse(self, input, start, *args):
  1299. if start is None:
  1300. start = self.start
  1301. if len(start) > 1:
  1302. raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
  1303. start ,= start
  1304. return self.parser.parse(input, start, *args)
  1305. class WithLexer(_ParserFrontend):
  1306. lexer = None
  1307. parser = None
  1308. lexer_conf = None
  1309. start = None
  1310. __serialize_fields__ = 'parser', 'lexer_conf', 'start'
  1311. __serialize_namespace__ = LexerConf,
  1312. def __init__(self, lexer_conf, parser_conf, options=None):
  1313. self.lexer_conf = lexer_conf
  1314. self.start = parser_conf.start
  1315. self.postlex = lexer_conf.postlex
  1316. @classmethod
  1317. def deserialize(cls, data, memo, callbacks, postlex):
  1318. inst = super(WithLexer, cls).deserialize(data, memo)
  1319. inst.postlex = postlex
  1320. inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
  1321. inst.init_lexer()
  1322. return inst
  1323. def _serialize(self, data, memo):
  1324. data['parser'] = data['parser'].serialize(memo)
  1325. def lex(self, *args):
  1326. stream = self.lexer.lex(*args)
  1327. return self.postlex.process(stream) if self.postlex else stream
  1328. def parse(self, text, start=None):
  1329. token_stream = self.lex(text)
  1330. return self._parse(token_stream, start)
  1331. def init_traditional_lexer(self):
  1332. self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
  1333. class LALR_WithLexer(WithLexer):
  1334. def __init__(self, lexer_conf, parser_conf, options=None):
  1335. debug = options.debug if options else False
  1336. self.parser = LALR_Parser(parser_conf, debug=debug)
  1337. WithLexer.__init__(self, lexer_conf, parser_conf, options)
  1338. self.init_lexer()
  1339. def init_lexer(self):
  1340. raise NotImplementedError()
  1341. class LALR_TraditionalLexer(LALR_WithLexer):
  1342. def init_lexer(self):
  1343. self.init_traditional_lexer()
  1344. class LALR_ContextualLexer(LALR_WithLexer):
  1345. def init_lexer(self):
  1346. states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
  1347. always_accept = self.postlex.always_accept if self.postlex else ()
  1348. self.lexer = ContextualLexer(self.lexer_conf.tokens, states,
  1349. ignore=self.lexer_conf.ignore,
  1350. always_accept=always_accept,
  1351. user_callbacks=self.lexer_conf.callbacks)
  1352. def parse(self, text, start=None):
  1353. parser_state = [None]
  1354. def set_parser_state(s):
  1355. parser_state[0] = s
  1356. token_stream = self.lex(text, lambda: parser_state[0])
  1357. return self._parse(token_stream, start, set_parser_state)
  1358. class LarkOptions(Serialize):
  1359. """Specifies the options for Lark
  1360. """
  1361. OPTIONS_DOC = """
  1362. parser - Decides which parser engine to use, "earley" or "lalr". (Default: "earley")
  1363. Note: "lalr" requires a lexer
  1364. lexer - Decides whether or not to use a lexer stage
  1365. "standard": Use a standard lexer
  1366. "contextual": Stronger lexer (only works with parser="lalr")
  1367. "dynamic": Flexible and powerful (only with parser="earley")
  1368. "dynamic_complete": Same as dynamic, but tries *every* variation
  1369. of tokenizing possible. (only with parser="earley")
  1370. "auto" (default): Choose for me based on grammar and parser
  1371. ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
  1372. "resolve": The parser will automatically choose the simplest derivation
  1373. (it chooses consistently: greedy for tokens, non-greedy for rules)
  1374. "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
  1375. transformer - Applies the transformer to every parse tree
  1376. debug - Affects verbosity (default: False)
  1377. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  1378. cache_grammar - Cache the Lark grammar (Default: False)
  1379. postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
  1380. start - The start symbol, either a string, or a list of strings for multiple possible starts (Default: "start")
  1381. priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
  1382. propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
  1383. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
  1384. maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None
  1385. """
  1386. if __doc__:
  1387. __doc__ += OPTIONS_DOC
  1388. _defaults = {
  1389. 'debug': False,
  1390. 'keep_all_tokens': False,
  1391. 'tree_class': None,
  1392. 'cache_grammar': False,
  1393. 'postlex': None,
  1394. 'parser': 'earley',
  1395. 'lexer': 'auto',
  1396. 'transformer': None,
  1397. 'start': 'start',
  1398. 'priority': 'auto',
  1399. 'ambiguity': 'auto',
  1400. 'propagate_positions': False,
  1401. 'lexer_callbacks': {},
  1402. 'maybe_placeholders': False,
  1403. 'edit_terminals': None,
  1404. }
  1405. def __init__(self, options_dict):
  1406. o = dict(options_dict)
  1407. options = {}
  1408. for name, default in self._defaults.items():
  1409. if name in o:
  1410. value = o.pop(name)
  1411. if isinstance(default, bool):
  1412. value = bool(value)
  1413. else:
  1414. value = default
  1415. options[name] = value
  1416. if isinstance(options['start'], STRING_TYPE):
  1417. options['start'] = [options['start']]
  1418. self.__dict__['options'] = options
  1419. assert self.parser in ('earley', 'lalr', 'cyk', None)
  1420. if self.parser == 'earley' and self.transformer:
  1421. raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
  1422. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
  1423. if o:
  1424. raise ValueError("Unknown options: %s" % o.keys())
  1425. def __getattr__(self, name):
  1426. try:
  1427. return self.options[name]
  1428. except KeyError as e:
  1429. raise AttributeError(e)
  1430. def __setattr__(self, name, value):
  1431. assert name in self.options
  1432. self.options[name] = value
  1433. def serialize(self, memo):
  1434. return self.options
  1435. @classmethod
  1436. def deserialize(cls, data, memo):
  1437. return cls(data)
  1438. class Lark(Serialize):
  1439. def __init__(self, grammar, **options):
  1440. """
  1441. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  1442. options : a dictionary controlling various aspects of Lark.
  1443. """
  1444. self.options = LarkOptions(options)
  1445. # Some, but not all file-like objects have a 'name' attribute
  1446. try:
  1447. self.source = grammar.name
  1448. except AttributeError:
  1449. self.source = '<string>'
  1450. # Drain file-like objects to get their contents
  1451. try:
  1452. read = grammar.read
  1453. except AttributeError:
  1454. pass
  1455. else:
  1456. grammar = read()
  1457. assert isinstance(grammar, STRING_TYPE)
  1458. if self.options.cache_grammar:
  1459. raise NotImplementedError("Not available yet")
  1460. if self.options.lexer == 'auto':
  1461. if self.options.parser == 'lalr':
  1462. self.options.lexer = 'contextual'
  1463. elif self.options.parser == 'earley':
  1464. self.options.lexer = 'dynamic'
  1465. elif self.options.parser == 'cyk':
  1466. self.options.lexer = 'standard'
  1467. else:
  1468. assert False, self.options.parser
  1469. lexer = self.options.lexer
  1470. assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
  1471. if self.options.ambiguity == 'auto':
  1472. if self.options.parser == 'earley':
  1473. self.options.ambiguity = 'resolve'
  1474. else:
  1475. disambig_parsers = ['earley', 'cyk']
  1476. assert self.options.parser in disambig_parsers, (
  1477. 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
  1478. if self.options.priority == 'auto':
  1479. if self.options.parser in ('earley', 'cyk', ):
  1480. self.options.priority = 'normal'
  1481. elif self.options.parser in ('lalr', ):
  1482. self.options.priority = None
  1483. elif self.options.priority in ('invert', 'normal'):
  1484. assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"
  1485. assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
  1486. assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
  1487. assert self.options.ambiguity in ('resolve', 'explicit', 'auto', )
  1488. # Parse the grammar file and compose the grammars (TODO)
  1489. self.grammar = load_grammar(grammar, self.source)
  1490. # Compile the EBNF grammar into BNF
  1491. self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
  1492. if self.options.edit_terminals:
  1493. for t in self.terminals:
  1494. self.options.edit_terminals(t)
  1495. self._terminals_dict = {t.name:t for t in self.terminals}
  1496. # If the user asked to invert the priorities, negate them all here.
  1497. # This replaces the old 'resolve__antiscore_sum' option.
  1498. if self.options.priority == 'invert':
  1499. for rule in self.rules:
  1500. if rule.options.priority is not None:
  1501. rule.options.priority = -rule.options.priority
  1502. # Else, if the user asked to disable priorities, strip them from the
  1503. # rules. This allows the Earley parsers to skip an extra forest walk
  1504. # for improved performance, if you don't need them (or didn't specify any).
  1505. elif self.options.priority == None:
  1506. for rule in self.rules:
  1507. if rule.options.priority is not None:
  1508. rule.options.priority = None
  1509. # TODO Deprecate lexer_callbacks?
  1510. lexer_callbacks = dict(self.options.lexer_callbacks)
  1511. if self.options.transformer:
  1512. t = self.options.transformer
  1513. for term in self.terminals:
  1514. if hasattr(t, term.name):
  1515. lexer_callbacks[term.name] = getattr(t, term.name)
  1516. self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, lexer_callbacks)
  1517. if self.options.parser:
  1518. self.parser = self._build_parser()
  1519. elif lexer:
  1520. self.lexer = self._build_lexer()
  1521. if __init__.__doc__:
  1522. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  1523. __serialize_fields__ = 'parser', 'rules', 'options'
  1524. def _build_lexer(self):
  1525. return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
  1526. def _prepare_callbacks(self):
  1527. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  1528. self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
  1529. self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
  1530. def _build_parser(self):
  1531. self._prepare_callbacks()
  1532. parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
  1533. return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
  1534. @classmethod
  1535. def deserialize(cls, data, namespace, memo, transformer=None, postlex=None):
  1536. if memo:
  1537. memo = SerializeMemoizer.deserialize(memo, namespace, {})
  1538. inst = cls.__new__(cls)
  1539. options = dict(data['options'])
  1540. if transformer is not None:
  1541. options['transformer'] = transformer
  1542. if postlex is not None:
  1543. options['postlex'] = postlex
  1544. inst.options = LarkOptions.deserialize(options, memo)
  1545. inst.rules = [Rule.deserialize(r, memo) for r in data['rules']]
  1546. inst.source = '<deserialized>'
  1547. inst._prepare_callbacks()
  1548. inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex)
  1549. return inst
  1550. def save(self, f):
  1551. data, m = self.memo_serialize([TerminalDef, Rule])
  1552. pickle.dump({'data': data, 'memo': m}, f)
  1553. @classmethod
  1554. def load(cls, f):
  1555. d = pickle.load(f)
  1556. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1557. memo = d['memo']
  1558. return Lark.deserialize(d['data'], namespace, memo)
  1559. @classmethod
  1560. def open(cls, grammar_filename, rel_to=None, **options):
  1561. """Create an instance of Lark with the grammar given by its filename
  1562. If rel_to is provided, the function will find the grammar filename in relation to it.
  1563. Example:
  1564. >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
  1565. Lark(...)
  1566. """
  1567. if rel_to:
  1568. basepath = os.path.dirname(rel_to)
  1569. grammar_filename = os.path.join(basepath, grammar_filename)
  1570. with open(grammar_filename, encoding='utf8') as f:
  1571. return cls(f, **options)
  1572. def __repr__(self):
  1573. return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
  1574. def lex(self, text):
  1575. "Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'"
  1576. if not hasattr(self, 'lexer'):
  1577. self.lexer = self._build_lexer()
  1578. stream = self.lexer.lex(text)
  1579. if self.options.postlex:
  1580. return self.options.postlex.process(stream)
  1581. return stream
  1582. def get_terminal(self, name):
  1583. "Get information about a terminal"
  1584. return self._terminals_dict[name]
  1585. def parse(self, text, start=None):
  1586. """Parse the given text, according to the options provided.
  1587. The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
  1588. Returns a tree, unless specified otherwise.
  1589. """
  1590. return self.parser.parse(text, start=start)
  1591. DATA = (
  1592. {'rules': [{'@': 27}, {'@': 31}, {'@': 26}, {'@': 13}, {'@': 24}, {'@': 18}, {'@': 14}, {'@': 22}, {'@': 28}, {'@': 23}, {'@': 29}, {'@': 12}, {'@': 25}, {'@': 30}, {'@': 19}, {'@': 21}, {'@': 15}, {'@': 20}, {'@': 16}, {'@': 17}], 'parser': {'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': [u'WS'], '__type__': 'LexerConf'}, 'parser': {'tokens': {0: 'COMMA', 1: 'RSQB', 2: 'RBRACE', 3: '$END', 4: 'LBRACE', 5: u'FALSE', 6: u'string', 7: u'object', 8: u'NULL', 9: u'SIGNED_NUMBER', 10: u'value', 11: u'array', 12: u'ESCAPED_STRING', 13: u'TRUE', 14: 'LSQB', 15: 'COLON', 16: u'pair', 17: u'__array_star_0', 18: u'__object_star_1', 19: 'start'}, 'states': {0: {0: (1, {'@': 12}), 1: (1, {'@': 12}), 2: (1, {'@': 12}), 3: (1, {'@': 12})}, 1: {1: (0, 29), 4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 22), 9: (0, 24), 10: (0, 6), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 2: {0: (0, 11), 2: (0, 0)}, 3: {15: (0, 12)}, 4: {16: (0, 13), 12: (0, 21), 6: (0, 3)}, 5: {0: (1, {'@': 13}), 1: (1, {'@': 13}), 2: (1, {'@': 13}), 3: (1, {'@': 13})}, 6: {0: (0, 7), 1: (0, 23), 17: (0, 17)}, 7: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 22), 9: (0, 24), 10: (0, 9), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 8: {0: (1, {'@': 14}), 1: (1, {'@': 14}), 2: (1, {'@': 14}), 3: (1, {'@': 14})}, 9: {0: (1, {'@': 15}), 1: (1, {'@': 15})}, 10: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 22), 9: (0, 24), 10: (0, 20), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 11: {16: (0, 15), 12: (0, 21), 6: (0, 3)}, 12: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 22), 9: (0, 24), 10: (0, 18), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 13: {0: (1, {'@': 16}), 2: (1, {'@': 16})}, 14: {}, 15: {0: (1, {'@': 17}), 2: (1, {'@': 17})}, 16: {0: (1, {'@': 18}), 1: (1, {'@': 18}), 2: (1, {'@': 18}), 3: (1, {'@': 18})}, 17: {0: (0, 10), 1: (0, 28)}, 18: {0: (1, {'@': 19}), 2: (1, {'@': 19})}, 19: {0: (0, 4), 18: (0, 2), 2: (0, 25)}, 20: {0: (1, {'@': 20}), 1: (1, {'@': 20})}, 21: {0: (1, {'@': 21}), 1: (1, {'@': 21}), 2: (1, {'@': 21}), 3: (1, {'@': 21}), 15: (1, {'@': 21})}, 22: {0: (1, {'@': 22}), 1: (1, {'@': 22}), 2: (1, {'@': 22}), 3: (1, {'@': 22})}, 23: {0: (1, {'@': 23}), 1: (1, {'@': 23}), 2: (1, {'@': 23}), 3: (1, {'@': 23})}, 24: {0: (1, {'@': 24}), 1: (1, {'@': 24}), 2: (1, {'@': 24}), 3: (1, {'@': 24})}, 25: {0: (1, {'@': 25}), 1: (1, {'@': 25}), 2: (1, {'@': 25}), 3: (1, {'@': 25})}, 26: {0: (1, {'@': 26}), 1: (1, {'@': 26}), 2: (1, {'@': 26}), 3: (1, {'@': 26})}, 27: {3: (1, {'@': 27})}, 28: {0: (1, {'@': 28}), 1: (1, {'@': 28}), 2: (1, {'@': 28}), 3: (1, {'@': 28})}, 29: {0: (1, {'@': 29}), 1: (1, {'@': 29}), 2: (1, {'@': 29}), 3: (1, {'@': 29})}, 30: {0: (1, {'@': 30}), 1: (1, {'@': 30}), 2: (1, {'@': 30}), 3: (1, {'@': 30})}, 31: {0: (1, {'@': 31}), 1: (1, {'@': 31}), 2: (1, {'@': 31}), 3: (1, {'@': 31})}, 32: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 22), 9: (0, 24), 10: (0, 27), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1), 19: (0, 14)}, 33: {16: (0, 19), 2: (0, 30), 12: (0, 21), 6: (0, 3)}}, 'end_states': {'start': 14}, 'start_states': {'start': 32}}, '__type__': 'LALR_ContextualLexer', 'start': ['start']}, '__type__': 'Lark', 'options': {'transformer': None, 'lexer': 'contextual', 'lexer_callbacks': {}, 'debug': False, 'postlex': None, 'parser': 'lalr', 'cache_grammar': False, 'tree_class': None, 'priority': None, 'start': ['start'], 'keep_all_tokens': False, 'ambiguity': 'auto', 'edit_terminals': None, 'propagate_positions': False, 'maybe_placeholders': False}}
  1593. )
  1594. MEMO = (
  1595. {0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [2, 4294967295], 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 2}, 14: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'false', 'order': 5}, 15: {'origin': {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'__object_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'true', 'order': 4}, 19: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'__array_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'null', 'order': 6}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'number', 'order': 3}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 1}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}}
  1596. )
  1597. Shift = 0
  1598. Reduce = 1
  1599. def Lark_StandAlone(transformer=None, postlex=None):
  1600. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1601. return Lark.deserialize(DATA, namespace, MEMO, transformer=transformer, postlex=postlex)