This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2060 lines
81 KiB

  1. # The file was automatically generated by Lark v0.8.0rc1
  2. #
  3. #
  4. # Lark Stand-alone Generator Tool
  5. # ----------------------------------
  6. # Generates a stand-alone LALR(1) parser with a standard lexer
  7. #
  8. # Git: https://github.com/erezsh/lark
  9. # Author: Erez Shinan (erezshin@gmail.com)
  10. #
  11. #
  12. # >>> LICENSE
  13. #
  14. # This tool and its generated code use a separate license from Lark.
  15. #
  16. # It is licensed under GPLv2 or above.
  17. #
  18. # If you wish to purchase a commercial license for this tool and its
  19. # generated code, contact me via email.
  20. #
  21. # If GPL is incompatible with your free or open-source project,
  22. # contact me and we'll work it out (for free).
  23. #
  24. # This program is free software: you can redistribute it and/or modify
  25. # it under the terms of the GNU General Public License as published by
  26. # the Free Software Foundation, either version 2 of the License, or
  27. # (at your option) any later version.
  28. #
  29. # This program is distributed in the hope that it will be useful,
  30. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  31. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  32. # GNU General Public License for more details.
  33. #
  34. # See <http://www.gnu.org/licenses/>.
  35. #
  36. #
  37. import os
  38. from io import open
  39. class LarkError(Exception):
  40. pass
  41. class GrammarError(LarkError):
  42. pass
  43. class ParseError(LarkError):
  44. pass
  45. class LexError(LarkError):
  46. pass
  47. class UnexpectedEOF(ParseError):
  48. def __init__(self, expected):
  49. self.expected = expected
  50. message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
  51. super(UnexpectedEOF, self).__init__(message)
  52. class UnexpectedInput(LarkError):
  53. pos_in_stream = None
  54. def get_context(self, text, span=40):
  55. pos = self.pos_in_stream
  56. start = max(pos - span, 0)
  57. end = pos + span
  58. before = text[start:pos].rsplit('\n', 1)[-1]
  59. after = text[pos:end].split('\n', 1)[0]
  60. return before + after + '\n' + ' ' * len(before) + '^\n'
  61. def match_examples(self, parse_fn, examples):
  62. """ Given a parser instance and a dictionary mapping some label with
  63. some malformed syntax examples, it'll return the label for the
  64. example that bests matches the current error.
  65. """
  66. assert self.state is not None, "Not supported for this exception"
  67. candidate = None
  68. for label, example in examples.items():
  69. assert not isinstance(example, STRING_TYPE)
  70. for malformed in example:
  71. try:
  72. parse_fn(malformed)
  73. except UnexpectedInput as ut:
  74. if ut.state == self.state:
  75. try:
  76. if ut.token == self.token: # Try exact match first
  77. return label
  78. except AttributeError:
  79. pass
  80. if not candidate:
  81. candidate = label
  82. return candidate
  83. class UnexpectedCharacters(LexError, UnexpectedInput):
  84. def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
  85. message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
  86. self.line = line
  87. self.column = column
  88. self.allowed = allowed
  89. self.considered_tokens = considered_tokens
  90. self.pos_in_stream = lex_pos
  91. self.state = state
  92. message += '\n\n' + self.get_context(seq)
  93. if allowed:
  94. message += '\nExpecting: %s\n' % allowed
  95. if token_history:
  96. message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history)
  97. super(UnexpectedCharacters, self).__init__(message)
  98. class UnexpectedToken(ParseError, UnexpectedInput):
  99. def __init__(self, token, expected, considered_rules=None, state=None):
  100. self.token = token
  101. self.expected = expected # XXX str shouldn't necessary
  102. self.line = getattr(token, 'line', '?')
  103. self.column = getattr(token, 'column', '?')
  104. self.considered_rules = considered_rules
  105. self.state = state
  106. self.pos_in_stream = getattr(token, 'pos_in_stream', None)
  107. message = ("Unexpected token %r at line %s, column %s.\n"
  108. "Expected one of: \n\t* %s\n"
  109. % (token, self.line, self.column, '\n\t* '.join(self.expected)))
  110. super(UnexpectedToken, self).__init__(message)
  111. class VisitError(LarkError):
  112. def __init__(self, rule, obj, orig_exc):
  113. self.obj = obj
  114. self.orig_exc = orig_exc
  115. message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
  116. super(VisitError, self).__init__(message)
  117. def classify(seq, key=None, value=None):
  118. d = {}
  119. for item in seq:
  120. k = key(item) if (key is not None) else item
  121. v = value(item) if (value is not None) else item
  122. if k in d:
  123. d[k].append(v)
  124. else:
  125. d[k] = [v]
  126. return d
  127. def _deserialize(data, namespace, memo):
  128. if isinstance(data, dict):
  129. if '__type__' in data: # Object
  130. class_ = namespace[data['__type__']]
  131. return class_.deserialize(data, memo)
  132. elif '@' in data:
  133. return memo[data['@']]
  134. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  135. elif isinstance(data, list):
  136. return [_deserialize(value, namespace, memo) for value in data]
  137. return data
  138. class Serialize(object):
  139. def memo_serialize(self, types_to_memoize):
  140. memo = SerializeMemoizer(types_to_memoize)
  141. return self.serialize(memo), memo.serialize()
  142. def serialize(self, memo=None):
  143. if memo and memo.in_types(self):
  144. return {'@': memo.memoized.get(self)}
  145. fields = getattr(self, '__serialize_fields__')
  146. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  147. res['__type__'] = type(self).__name__
  148. postprocess = getattr(self, '_serialize', None)
  149. if postprocess:
  150. postprocess(res, memo)
  151. return res
  152. @classmethod
  153. def deserialize(cls, data, memo):
  154. namespace = getattr(cls, '__serialize_namespace__', {})
  155. namespace = {c.__name__:c for c in namespace}
  156. fields = getattr(cls, '__serialize_fields__')
  157. if '@' in data:
  158. return memo[data['@']]
  159. inst = cls.__new__(cls)
  160. for f in fields:
  161. try:
  162. setattr(inst, f, _deserialize(data[f], namespace, memo))
  163. except KeyError as e:
  164. raise KeyError("Cannot find key for class", cls, e)
  165. postprocess = getattr(inst, '_deserialize', None)
  166. if postprocess:
  167. postprocess()
  168. return inst
  169. class SerializeMemoizer(Serialize):
  170. __serialize_fields__ = 'memoized',
  171. def __init__(self, types_to_memoize):
  172. self.types_to_memoize = tuple(types_to_memoize)
  173. self.memoized = Enumerator()
  174. def in_types(self, value):
  175. return isinstance(value, self.types_to_memoize)
  176. def serialize(self):
  177. return _serialize(self.memoized.reversed(), None)
  178. @classmethod
  179. def deserialize(cls, data, namespace, memo):
  180. return _deserialize(data, namespace, memo)
  181. try:
  182. STRING_TYPE = basestring
  183. except NameError: # Python 3
  184. STRING_TYPE = str
  185. import types
  186. from functools import wraps, partial
  187. from contextlib import contextmanager
  188. Str = type(u'')
  189. try:
  190. classtype = types.ClassType # Python2
  191. except AttributeError:
  192. classtype = type # Python3
  193. def smart_decorator(f, create_decorator):
  194. if isinstance(f, types.FunctionType):
  195. return wraps(f)(create_decorator(f, True))
  196. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  197. return wraps(f)(create_decorator(f, False))
  198. elif isinstance(f, types.MethodType):
  199. return wraps(f)(create_decorator(f.__func__, True))
  200. elif isinstance(f, partial):
  201. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  202. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  203. else:
  204. return create_decorator(f.__func__.__call__, True)
  205. import sys, re
  206. Py36 = (sys.version_info[:2] >= (3, 6))
  207. import sre_parse
  208. import sre_constants
  209. def get_regexp_width(regexp):
  210. try:
  211. return [int(x) for x in sre_parse.parse(regexp).getwidth()]
  212. except sre_constants.error:
  213. raise ValueError(regexp)
  214. class Meta:
  215. def __init__(self):
  216. self.empty = True
  217. class Tree(object):
  218. def __init__(self, data, children, meta=None):
  219. self.data = data
  220. self.children = children
  221. self._meta = meta
  222. @property
  223. def meta(self):
  224. if self._meta is None:
  225. self._meta = Meta()
  226. return self._meta
  227. def __repr__(self):
  228. return 'Tree(%s, %s)' % (self.data, self.children)
  229. def _pretty_label(self):
  230. return self.data
  231. def _pretty(self, level, indent_str):
  232. if len(self.children) == 1 and not isinstance(self.children[0], Tree):
  233. return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
  234. l = [ indent_str*level, self._pretty_label(), '\n' ]
  235. for n in self.children:
  236. if isinstance(n, Tree):
  237. l += n._pretty(level+1, indent_str)
  238. else:
  239. l += [ indent_str*(level+1), '%s' % (n,), '\n' ]
  240. return l
  241. def pretty(self, indent_str=' '):
  242. return ''.join(self._pretty(0, indent_str))
  243. def __eq__(self, other):
  244. try:
  245. return self.data == other.data and self.children == other.children
  246. except AttributeError:
  247. return False
  248. def __ne__(self, other):
  249. return not (self == other)
  250. def __hash__(self):
  251. return hash((self.data, tuple(self.children)))
  252. def iter_subtrees(self):
  253. # TODO: Re-write as a more efficient version
  254. visited = set()
  255. q = [self]
  256. l = []
  257. while q:
  258. subtree = q.pop()
  259. l.append( subtree )
  260. if id(subtree) in visited:
  261. continue # already been here from another branch
  262. visited.add(id(subtree))
  263. q += [c for c in subtree.children if isinstance(c, Tree)]
  264. seen = set()
  265. for x in reversed(l):
  266. if id(x) not in seen:
  267. yield x
  268. seen.add(id(x))
  269. def find_pred(self, pred):
  270. "Find all nodes where pred(tree) == True"
  271. return filter(pred, self.iter_subtrees())
  272. def find_data(self, data):
  273. "Find all nodes where tree.data == data"
  274. return self.find_pred(lambda t: t.data == data)
  275. from inspect import getmembers, getmro
  276. class Discard(Exception):
  277. pass
  278. # Transformers
  279. class Transformer:
  280. """Visits the tree recursively, starting with the leaves and finally the root (bottom-up)
  281. Calls its methods (provided by user via inheritance) according to tree.data
  282. The returned value replaces the old one in the structure.
  283. Can be used to implement map or reduce.
  284. """
  285. __visit_tokens__ = True # For backwards compatibility
  286. def __init__(self, visit_tokens=True):
  287. self.__visit_tokens__ = visit_tokens
  288. def _call_userfunc(self, tree, new_children=None):
  289. # Assumes tree is already transformed
  290. children = new_children if new_children is not None else tree.children
  291. try:
  292. f = getattr(self, tree.data)
  293. except AttributeError:
  294. return self.__default__(tree.data, children, tree.meta)
  295. else:
  296. try:
  297. wrapper = getattr(f, 'visit_wrapper', None)
  298. if wrapper is not None:
  299. return f.visit_wrapper(f, tree.data, children, tree.meta)
  300. else:
  301. return f(children)
  302. except (GrammarError, Discard):
  303. raise
  304. except Exception as e:
  305. raise VisitError(tree.data, tree, e)
  306. def _call_userfunc_token(self, token):
  307. try:
  308. f = getattr(self, token.type)
  309. except AttributeError:
  310. return self.__default_token__(token)
  311. else:
  312. try:
  313. return f(token)
  314. except (GrammarError, Discard):
  315. raise
  316. except Exception as e:
  317. raise VisitError(token.type, token, e)
  318. def _transform_children(self, children):
  319. for c in children:
  320. try:
  321. if isinstance(c, Tree):
  322. yield self._transform_tree(c)
  323. elif self.__visit_tokens__ and isinstance(c, Token):
  324. yield self._call_userfunc_token(c)
  325. else:
  326. yield c
  327. except Discard:
  328. pass
  329. def _transform_tree(self, tree):
  330. children = list(self._transform_children(tree.children))
  331. return self._call_userfunc(tree, children)
  332. def transform(self, tree):
  333. return self._transform_tree(tree)
  334. def __mul__(self, other):
  335. return TransformerChain(self, other)
  336. def __default__(self, data, children, meta):
  337. "Default operation on tree (for override)"
  338. return Tree(data, children, meta)
  339. def __default_token__(self, token):
  340. "Default operation on token (for override)"
  341. return token
  342. @classmethod
  343. def _apply_decorator(cls, decorator, **kwargs):
  344. mro = getmro(cls)
  345. assert mro[0] is cls
  346. libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
  347. for name, value in getmembers(cls):
  348. # Make sure the function isn't inherited (unless it's overwritten)
  349. if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
  350. continue
  351. if not callable(cls.__dict__[name]):
  352. continue
  353. # Skip if v_args already applied (at the function level)
  354. if hasattr(cls.__dict__[name], 'vargs_applied'):
  355. continue
  356. static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
  357. setattr(cls, name, decorator(value, static=static, **kwargs))
  358. return cls
  359. class InlineTransformer(Transformer): # XXX Deprecated
  360. def _call_userfunc(self, tree, new_children=None):
  361. # Assumes tree is already transformed
  362. children = new_children if new_children is not None else tree.children
  363. try:
  364. f = getattr(self, tree.data)
  365. except AttributeError:
  366. return self.__default__(tree.data, children, tree.meta)
  367. else:
  368. return f(*children)
  369. class TransformerChain(object):
  370. def __init__(self, *transformers):
  371. self.transformers = transformers
  372. def transform(self, tree):
  373. for t in self.transformers:
  374. tree = t.transform(tree)
  375. return tree
  376. def __mul__(self, other):
  377. return TransformerChain(*self.transformers + (other,))
  378. class Transformer_InPlace(Transformer):
  379. "Non-recursive. Changes the tree in-place instead of returning new instances"
  380. def _transform_tree(self, tree): # Cancel recursion
  381. return self._call_userfunc(tree)
  382. def transform(self, tree):
  383. for subtree in tree.iter_subtrees():
  384. subtree.children = list(self._transform_children(subtree.children))
  385. return self._transform_tree(tree)
  386. class Transformer_InPlaceRecursive(Transformer):
  387. "Recursive. Changes the tree in-place instead of returning new instances"
  388. def _transform_tree(self, tree):
  389. tree.children = list(self._transform_children(tree.children))
  390. return self._call_userfunc(tree)
  391. # Visitors
  392. class VisitorBase:
  393. def _call_userfunc(self, tree):
  394. return getattr(self, tree.data, self.__default__)(tree)
  395. def __default__(self, tree):
  396. "Default operation on tree (for override)"
  397. return tree
  398. class Visitor(VisitorBase):
  399. """Bottom-up visitor, non-recursive
  400. Visits the tree, starting with the leaves and finally the root (bottom-up)
  401. Calls its methods (provided by user via inheritance) according to tree.data
  402. """
  403. def visit(self, tree):
  404. for subtree in tree.iter_subtrees():
  405. self._call_userfunc(subtree)
  406. return tree
  407. def visit_topdown(self,tree):
  408. for subtree in tree.iter_subtrees_topdown():
  409. self._call_userfunc(subtree)
  410. return tree
  411. class Visitor_Recursive(VisitorBase):
  412. """Bottom-up visitor, recursive
  413. Visits the tree, starting with the leaves and finally the root (bottom-up)
  414. Calls its methods (provided by user via inheritance) according to tree.data
  415. """
  416. def visit(self, tree):
  417. for child in tree.children:
  418. if isinstance(child, Tree):
  419. self.visit(child)
  420. self._call_userfunc(tree)
  421. return tree
  422. def visit_topdown(self,tree):
  423. self._call_userfunc(tree)
  424. for child in tree.children:
  425. if isinstance(child, Tree):
  426. self.visit_topdown(child)
  427. return tree
  428. def visit_children_decor(func):
  429. "See Interpreter"
  430. @wraps(func)
  431. def inner(cls, tree):
  432. values = cls.visit_children(tree)
  433. return func(cls, values)
  434. return inner
  435. class Interpreter:
  436. """Top-down visitor, recursive
  437. Visits the tree, starting with the root and finally the leaves (top-down)
  438. Calls its methods (provided by user via inheritance) according to tree.data
  439. Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches.
  440. The user has to explicitly call visit_children, or use the @visit_children_decor
  441. """
  442. def visit(self, tree):
  443. return getattr(self, tree.data)(tree)
  444. def visit_children(self, tree):
  445. return [self.visit(child) if isinstance(child, Tree) else child
  446. for child in tree.children]
  447. def __getattr__(self, name):
  448. return self.__default__
  449. def __default__(self, tree):
  450. return self.visit_children(tree)
  451. # Decorators
  452. def _apply_decorator(obj, decorator, **kwargs):
  453. try:
  454. _apply = obj._apply_decorator
  455. except AttributeError:
  456. return decorator(obj, **kwargs)
  457. else:
  458. return _apply(decorator, **kwargs)
  459. def _inline_args__func(func):
  460. @wraps(func)
  461. def create_decorator(_f, with_self):
  462. if with_self:
  463. def f(self, children):
  464. return _f(self, *children)
  465. else:
  466. def f(self, children):
  467. return _f(*children)
  468. return f
  469. return smart_decorator(func, create_decorator)
  470. def inline_args(obj): # XXX Deprecated
  471. return _apply_decorator(obj, _inline_args__func)
  472. def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
  473. def create_decorator(_f, with_self):
  474. if with_self:
  475. def f(self, *args, **kwargs):
  476. return _f(self, *args, **kwargs)
  477. else:
  478. def f(self, *args, **kwargs):
  479. return _f(*args, **kwargs)
  480. return f
  481. if static:
  482. f = wraps(func)(create_decorator(func, False))
  483. else:
  484. f = smart_decorator(func, create_decorator)
  485. f.vargs_applied = True
  486. f.visit_wrapper = visit_wrapper
  487. return f
  488. def _vargs_inline(f, data, children, meta):
  489. return f(*children)
  490. def _vargs_meta_inline(f, data, children, meta):
  491. return f(meta, *children)
  492. def _vargs_meta(f, data, children, meta):
  493. return f(children, meta) # TODO swap these for consistency? Backwards incompatible!
  494. def _vargs_tree(f, data, children, meta):
  495. return f(Tree(data, children, meta))
  496. def v_args(inline=False, meta=False, tree=False, wrapper=None):
  497. "A convenience decorator factory, for modifying the behavior of user-supplied visitor methods"
  498. if tree and (meta or inline):
  499. raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
  500. func = None
  501. if meta:
  502. if inline:
  503. func = _vargs_meta_inline
  504. else:
  505. func = _vargs_meta
  506. elif inline:
  507. func = _vargs_inline
  508. elif tree:
  509. func = _vargs_tree
  510. if wrapper is not None:
  511. if func is not None:
  512. raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
  513. func = wrapper
  514. def _visitor_args_dec(obj):
  515. return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func)
  516. return _visitor_args_dec
  517. class Indenter:
  518. def __init__(self):
  519. self.paren_level = None
  520. self.indent_level = None
  521. assert self.tab_len > 0
  522. def handle_NL(self, token):
  523. if self.paren_level > 0:
  524. return
  525. yield token
  526. indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
  527. indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
  528. if indent > self.indent_level[-1]:
  529. self.indent_level.append(indent)
  530. yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
  531. else:
  532. while indent < self.indent_level[-1]:
  533. self.indent_level.pop()
  534. yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
  535. assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1])
  536. def _process(self, stream):
  537. for token in stream:
  538. if token.type == self.NL_type:
  539. for t in self.handle_NL(token):
  540. yield t
  541. else:
  542. yield token
  543. if token.type in self.OPEN_PAREN_types:
  544. self.paren_level += 1
  545. elif token.type in self.CLOSE_PAREN_types:
  546. self.paren_level -= 1
  547. assert self.paren_level >= 0
  548. while len(self.indent_level) > 1:
  549. self.indent_level.pop()
  550. yield Token(self.DEDENT_type, '')
  551. assert self.indent_level == [0], self.indent_level
  552. def process(self, stream):
  553. self.paren_level = 0
  554. self.indent_level = [0]
  555. return self._process(stream)
  556. # XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
  557. @property
  558. def always_accept(self):
  559. return (self.NL_type,)
  560. class Symbol(Serialize):
  561. __slots__ = ('name',)
  562. is_term = NotImplemented
  563. def __init__(self, name):
  564. self.name = name
  565. def __eq__(self, other):
  566. assert isinstance(other, Symbol), other
  567. return self.is_term == other.is_term and self.name == other.name
  568. def __ne__(self, other):
  569. return not (self == other)
  570. def __hash__(self):
  571. return hash(self.name)
  572. def __repr__(self):
  573. return '%s(%r)' % (type(self).__name__, self.name)
  574. fullrepr = property(__repr__)
  575. class Terminal(Symbol):
  576. __serialize_fields__ = 'name', 'filter_out'
  577. is_term = True
  578. def __init__(self, name, filter_out=False):
  579. self.name = name
  580. self.filter_out = filter_out
  581. @property
  582. def fullrepr(self):
  583. return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
  584. class NonTerminal(Symbol):
  585. __serialize_fields__ = 'name',
  586. is_term = False
  587. class RuleOptions(Serialize):
  588. __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices'
  589. def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()):
  590. self.keep_all_tokens = keep_all_tokens
  591. self.expand1 = expand1
  592. self.priority = priority
  593. self.empty_indices = empty_indices
  594. def __repr__(self):
  595. return 'RuleOptions(%r, %r, %r)' % (
  596. self.keep_all_tokens,
  597. self.expand1,
  598. self.priority,
  599. )
  600. class Rule(Serialize):
  601. """
  602. origin : a symbol
  603. expansion : a list of symbols
  604. order : index of this expansion amongst all rules of the same name
  605. """
  606. __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
  607. __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
  608. __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
  609. def __init__(self, origin, expansion, order=0, alias=None, options=None):
  610. self.origin = origin
  611. self.expansion = expansion
  612. self.alias = alias
  613. self.order = order
  614. self.options = options or RuleOptions()
  615. self._hash = hash((self.origin, tuple(self.expansion)))
  616. def _deserialize(self):
  617. self._hash = hash((self.origin, tuple(self.expansion)))
  618. def __str__(self):
  619. return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
  620. def __repr__(self):
  621. return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
  622. def __hash__(self):
  623. return self._hash
  624. def __eq__(self, other):
  625. if not isinstance(other, Rule):
  626. return False
  627. return self.origin == other.origin and self.expansion == other.expansion
  628. class Pattern(Serialize):
  629. def __init__(self, value, flags=()):
  630. self.value = value
  631. self.flags = frozenset(flags)
  632. def __repr__(self):
  633. return repr(self.to_regexp())
  634. # Pattern Hashing assumes all subclasses have a different priority!
  635. def __hash__(self):
  636. return hash((type(self), self.value, self.flags))
  637. def __eq__(self, other):
  638. return type(self) == type(other) and self.value == other.value and self.flags == other.flags
  639. def to_regexp(self):
  640. raise NotImplementedError()
  641. if Py36:
  642. # Python 3.6 changed syntax for flags in regular expression
  643. def _get_flags(self, value):
  644. for f in self.flags:
  645. value = ('(?%s:%s)' % (f, value))
  646. return value
  647. else:
  648. def _get_flags(self, value):
  649. for f in self.flags:
  650. value = ('(?%s)' % f) + value
  651. return value
  652. class PatternStr(Pattern):
  653. __serialize_fields__ = 'value', 'flags'
  654. type = "str"
  655. def to_regexp(self):
  656. return self._get_flags(re.escape(self.value))
  657. @property
  658. def min_width(self):
  659. return len(self.value)
  660. max_width = min_width
  661. class PatternRE(Pattern):
  662. __serialize_fields__ = 'value', 'flags', '_width'
  663. type = "re"
  664. def to_regexp(self):
  665. return self._get_flags(self.value)
  666. _width = None
  667. def _get_width(self):
  668. if self._width is None:
  669. self._width = get_regexp_width(self.to_regexp())
  670. return self._width
  671. @property
  672. def min_width(self):
  673. return self._get_width()[0]
  674. @property
  675. def max_width(self):
  676. return self._get_width()[1]
  677. class TerminalDef(Serialize):
  678. __serialize_fields__ = 'name', 'pattern', 'priority'
  679. __serialize_namespace__ = PatternStr, PatternRE
  680. def __init__(self, name, pattern, priority=1):
  681. assert isinstance(pattern, Pattern), pattern
  682. self.name = name
  683. self.pattern = pattern
  684. self.priority = priority
  685. def __repr__(self):
  686. return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
  687. class Token(Str):
  688. __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
  689. def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
  690. try:
  691. self = super(Token, cls).__new__(cls, value)
  692. except UnicodeDecodeError:
  693. value = value.decode('latin1')
  694. self = super(Token, cls).__new__(cls, value)
  695. self.type = type_
  696. self.pos_in_stream = pos_in_stream
  697. self.value = value
  698. self.line = line
  699. self.column = column
  700. self.end_line = end_line
  701. self.end_column = end_column
  702. self.end_pos = end_pos
  703. return self
  704. def update(self, type_=None, value=None):
  705. return Token.new_borrow_pos(
  706. type_ if type_ is not None else self.type,
  707. value if value is not None else self.value,
  708. self
  709. )
  710. @classmethod
  711. def new_borrow_pos(cls, type_, value, borrow_t):
  712. return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
  713. def __reduce__(self):
  714. return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
  715. def __repr__(self):
  716. return 'Token(%s, %r)' % (self.type, self.value)
  717. def __deepcopy__(self, memo):
  718. return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
  719. def __eq__(self, other):
  720. if isinstance(other, Token) and self.type != other.type:
  721. return False
  722. return Str.__eq__(self, other)
  723. __hash__ = Str.__hash__
  724. class LineCounter:
  725. def __init__(self):
  726. self.newline_char = '\n'
  727. self.char_pos = 0
  728. self.line = 1
  729. self.column = 1
  730. self.line_start_pos = 0
  731. def feed(self, token, test_newline=True):
  732. """Consume a token and calculate the new line & column.
  733. As an optional optimization, set test_newline=False is token doesn't contain a newline.
  734. """
  735. if test_newline:
  736. newlines = token.count(self.newline_char)
  737. if newlines:
  738. self.line += newlines
  739. self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
  740. self.char_pos += len(token)
  741. self.column = self.char_pos - self.line_start_pos + 1
  742. class _Lex:
  743. "Built to serve both Lexer and ContextualLexer"
  744. def __init__(self, lexer, state=None):
  745. self.lexer = lexer
  746. self.state = state
  747. def lex(self, stream, newline_types, ignore_types):
  748. newline_types = frozenset(newline_types)
  749. ignore_types = frozenset(ignore_types)
  750. line_ctr = LineCounter()
  751. last_token = None
  752. while line_ctr.char_pos < len(stream):
  753. lexer = self.lexer
  754. res = lexer.match(stream, line_ctr.char_pos)
  755. if not res:
  756. allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - ignore_types
  757. if not allowed:
  758. allowed = {"<END-OF-FILE>"}
  759. raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
  760. value, type_ = res
  761. if type_ not in ignore_types:
  762. t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  763. line_ctr.feed(value, type_ in newline_types)
  764. t.end_line = line_ctr.line
  765. t.end_column = line_ctr.column
  766. t.end_pos = line_ctr.char_pos
  767. if t.type in lexer.callback:
  768. t = lexer.callback[t.type](t)
  769. if not isinstance(t, Token):
  770. raise ValueError("Callbacks must return a token (returned %r)" % t)
  771. yield t
  772. last_token = t
  773. else:
  774. if type_ in lexer.callback:
  775. t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  776. lexer.callback[type_](t2)
  777. line_ctr.feed(value, type_ in newline_types)
  778. class UnlessCallback:
  779. def __init__(self, mres):
  780. self.mres = mres
  781. def __call__(self, t):
  782. for mre, type_from_index in self.mres:
  783. m = mre.match(t.value)
  784. if m:
  785. t.type = type_from_index[m.lastindex]
  786. break
  787. return t
  788. class CallChain:
  789. def __init__(self, callback1, callback2, cond):
  790. self.callback1 = callback1
  791. self.callback2 = callback2
  792. self.cond = cond
  793. def __call__(self, t):
  794. t2 = self.callback1(t)
  795. return self.callback2(t) if self.cond(t2) else t2
  796. def _create_unless(terminals):
  797. tokens_by_type = classify(terminals, lambda t: type(t.pattern))
  798. assert len(tokens_by_type) <= 2, tokens_by_type.keys()
  799. embedded_strs = set()
  800. callback = {}
  801. for retok in tokens_by_type.get(PatternRE, []):
  802. unless = [] # {}
  803. for strtok in tokens_by_type.get(PatternStr, []):
  804. if strtok.priority > retok.priority:
  805. continue
  806. s = strtok.pattern.value
  807. m = re.match(retok.pattern.to_regexp(), s)
  808. if m and m.group(0) == s:
  809. unless.append(strtok)
  810. if strtok.pattern.flags <= retok.pattern.flags:
  811. embedded_strs.add(strtok)
  812. if unless:
  813. callback[retok.name] = UnlessCallback(build_mres(unless, match_whole=True))
  814. terminals = [t for t in terminals if t not in embedded_strs]
  815. return terminals, callback
  816. def _build_mres(terminals, max_size, match_whole):
  817. # Python sets an unreasonable group limit (currently 100) in its re module
  818. # Worse, the only way to know we reached it is by catching an AssertionError!
  819. # This function recursively tries less and less groups until it's successful.
  820. postfix = '$' if match_whole else ''
  821. mres = []
  822. while terminals:
  823. try:
  824. mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]))
  825. except AssertionError: # Yes, this is what Python provides us.. :/
  826. return _build_mres(terminals, max_size//2, match_whole)
  827. # terms_from_name = {t.name: t for t in terminals[:max_size]}
  828. mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
  829. terminals = terminals[max_size:]
  830. return mres
  831. def build_mres(terminals, match_whole=False):
  832. return _build_mres(terminals, len(terminals), match_whole)
  833. def _regexp_has_newline(r):
  834. r"""Expressions that may indicate newlines in a regexp:
  835. - newlines (\n)
  836. - escaped newline (\\n)
  837. - anything but ([^...])
  838. - any-char (.) when the flag (?s) exists
  839. - spaces (\s)
  840. """
  841. return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
  842. class Lexer(object):
  843. """Lexer interface
  844. Method Signatures:
  845. lex(self, stream) -> Iterator[Token]
  846. """
  847. lex = NotImplemented
  848. class TraditionalLexer(Lexer):
  849. def __init__(self, terminals, ignore=(), user_callbacks={}):
  850. assert all(isinstance(t, TerminalDef) for t in terminals), terminals
  851. terminals = list(terminals)
  852. # Sanitization
  853. for t in terminals:
  854. try:
  855. re.compile(t.pattern.to_regexp())
  856. except re.error:
  857. raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
  858. if t.pattern.min_width == 0:
  859. raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
  860. assert set(ignore) <= {t.name for t in terminals}
  861. # Init
  862. self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())]
  863. self.ignore_types = list(ignore)
  864. terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
  865. self.terminals = terminals
  866. self.user_callbacks = user_callbacks
  867. self.build()
  868. def build(self):
  869. terminals, self.callback = _create_unless(self.terminals)
  870. assert all(self.callback.values())
  871. for type_, f in self.user_callbacks.items():
  872. if type_ in self.callback:
  873. # Already a callback there, probably UnlessCallback
  874. self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
  875. else:
  876. self.callback[type_] = f
  877. self.mres = build_mres(terminals)
  878. def match(self, stream, pos):
  879. for mre, type_from_index in self.mres:
  880. m = mre.match(stream, pos)
  881. if m:
  882. return m.group(0), type_from_index[m.lastindex]
  883. def lex(self, stream):
  884. return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
  885. class ContextualLexer(Lexer):
  886. def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
  887. tokens_by_name = {}
  888. for t in terminals:
  889. assert t.name not in tokens_by_name, t
  890. tokens_by_name[t.name] = t
  891. lexer_by_tokens = {}
  892. self.lexers = {}
  893. for state, accepts in states.items():
  894. key = frozenset(accepts)
  895. try:
  896. lexer = lexer_by_tokens[key]
  897. except KeyError:
  898. accepts = set(accepts) | set(ignore) | set(always_accept)
  899. state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name]
  900. lexer = TraditionalLexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
  901. lexer_by_tokens[key] = lexer
  902. self.lexers[state] = lexer
  903. self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks)
  904. def lex(self, stream, get_parser_state):
  905. parser_state = get_parser_state()
  906. l = _Lex(self.lexers[parser_state], parser_state)
  907. try:
  908. for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
  909. yield x
  910. parser_state = get_parser_state()
  911. l.lexer = self.lexers[parser_state]
  912. l.state = parser_state # For debug only, no need to worry about multithreading
  913. except UnexpectedCharacters as e:
  914. # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined,
  915. # but not in the current context.
  916. # This tests the input against the global context, to provide a nicer error.
  917. root_match = self.root_lexer.match(stream, e.pos_in_stream)
  918. if not root_match:
  919. raise
  920. value, type_ = root_match
  921. t = Token(type_, value, e.pos_in_stream, e.line, e.column)
  922. raise UnexpectedToken(t, e.allowed, state=e.state)
  923. class LexerConf(Serialize):
  924. __serialize_fields__ = 'tokens', 'ignore'
  925. __serialize_namespace__ = TerminalDef,
  926. def __init__(self, tokens, ignore=(), postlex=None, callbacks=None):
  927. self.tokens = tokens
  928. self.ignore = ignore
  929. self.postlex = postlex
  930. self.callbacks = callbacks or {}
  931. def _deserialize(self):
  932. self.callbacks = {} # TODO
  933. from functools import partial, wraps
  934. from itertools import repeat, product
  935. class ExpandSingleChild:
  936. def __init__(self, node_builder):
  937. self.node_builder = node_builder
  938. def __call__(self, children):
  939. if len(children) == 1:
  940. return children[0]
  941. else:
  942. return self.node_builder(children)
  943. class PropagatePositions:
  944. def __init__(self, node_builder):
  945. self.node_builder = node_builder
  946. def __call__(self, children):
  947. res = self.node_builder(children)
  948. if isinstance(res, Tree):
  949. for c in children:
  950. if isinstance(c, Tree) and not c.meta.empty:
  951. res.meta.line = c.meta.line
  952. res.meta.column = c.meta.column
  953. res.meta.start_pos = c.meta.start_pos
  954. res.meta.empty = False
  955. break
  956. elif isinstance(c, Token):
  957. res.meta.line = c.line
  958. res.meta.column = c.column
  959. res.meta.start_pos = c.pos_in_stream
  960. res.meta.empty = False
  961. break
  962. for c in reversed(children):
  963. if isinstance(c, Tree) and not c.meta.empty:
  964. res.meta.end_line = c.meta.end_line
  965. res.meta.end_column = c.meta.end_column
  966. res.meta.end_pos = c.meta.end_pos
  967. res.meta.empty = False
  968. break
  969. elif isinstance(c, Token):
  970. res.meta.end_line = c.end_line
  971. res.meta.end_column = c.end_column
  972. res.meta.end_pos = c.end_pos
  973. res.meta.empty = False
  974. break
  975. return res
  976. class ChildFilter:
  977. def __init__(self, to_include, append_none, node_builder):
  978. self.node_builder = node_builder
  979. self.to_include = to_include
  980. self.append_none = append_none
  981. def __call__(self, children):
  982. filtered = []
  983. for i, to_expand, add_none in self.to_include:
  984. if add_none:
  985. filtered += [None] * add_none
  986. if to_expand:
  987. filtered += children[i].children
  988. else:
  989. filtered.append(children[i])
  990. if self.append_none:
  991. filtered += [None] * self.append_none
  992. return self.node_builder(filtered)
  993. class ChildFilterLALR(ChildFilter):
  994. "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
  995. def __call__(self, children):
  996. filtered = []
  997. for i, to_expand, add_none in self.to_include:
  998. if add_none:
  999. filtered += [None] * add_none
  1000. if to_expand:
  1001. if filtered:
  1002. filtered += children[i].children
  1003. else: # Optimize for left-recursion
  1004. filtered = children[i].children
  1005. else:
  1006. filtered.append(children[i])
  1007. if self.append_none:
  1008. filtered += [None] * self.append_none
  1009. return self.node_builder(filtered)
  1010. class ChildFilterLALR_NoPlaceholders(ChildFilter):
  1011. "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
  1012. def __init__(self, to_include, node_builder):
  1013. self.node_builder = node_builder
  1014. self.to_include = to_include
  1015. def __call__(self, children):
  1016. filtered = []
  1017. for i, to_expand in self.to_include:
  1018. if to_expand:
  1019. if filtered:
  1020. filtered += children[i].children
  1021. else: # Optimize for left-recursion
  1022. filtered = children[i].children
  1023. else:
  1024. filtered.append(children[i])
  1025. return self.node_builder(filtered)
  1026. def _should_expand(sym):
  1027. return not sym.is_term and sym.name.startswith('_')
  1028. def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
  1029. # Prepare empty_indices as: How many Nones to insert at each index?
  1030. if _empty_indices:
  1031. assert _empty_indices.count(False) == len(expansion)
  1032. s = ''.join(str(int(b)) for b in _empty_indices)
  1033. empty_indices = [len(ones) for ones in s.split('0')]
  1034. assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
  1035. else:
  1036. empty_indices = [0] * (len(expansion)+1)
  1037. to_include = []
  1038. nones_to_add = 0
  1039. for i, sym in enumerate(expansion):
  1040. nones_to_add += empty_indices[i]
  1041. if keep_all_tokens or not (sym.is_term and sym.filter_out):
  1042. to_include.append((i, _should_expand(sym), nones_to_add))
  1043. nones_to_add = 0
  1044. nones_to_add += empty_indices[len(expansion)]
  1045. if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
  1046. if _empty_indices or ambiguous:
  1047. return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
  1048. else:
  1049. # LALR without placeholders
  1050. return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
  1051. class AmbiguousExpander:
  1052. """Deal with the case where we're expanding children ('_rule') into a parent but the children
  1053. are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
  1054. ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children
  1055. into the right parents in the right places, essentially shifting the ambiguiuty up the tree."""
  1056. def __init__(self, to_expand, tree_class, node_builder):
  1057. self.node_builder = node_builder
  1058. self.tree_class = tree_class
  1059. self.to_expand = to_expand
  1060. def __call__(self, children):
  1061. def _is_ambig_tree(child):
  1062. return hasattr(child, 'data') and child.data == '_ambig'
  1063. #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
  1064. # All children of an _ambig node should be a derivation of that ambig node, hence
  1065. # it is safe to assume that if we see an _ambig node nested within an ambig node
  1066. # it is safe to simply expand it into the parent _ambig node as an alternative derivation.
  1067. ambiguous = []
  1068. for i, child in enumerate(children):
  1069. if _is_ambig_tree(child):
  1070. if i in self.to_expand:
  1071. ambiguous.append(i)
  1072. to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)]
  1073. child.expand_kids_by_index(*to_expand)
  1074. if not ambiguous:
  1075. return self.node_builder(children)
  1076. expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ]
  1077. return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
  1078. def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
  1079. to_expand = [i for i, sym in enumerate(expansion)
  1080. if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
  1081. if to_expand:
  1082. return partial(AmbiguousExpander, to_expand, tree_class)
  1083. def ptb_inline_args(func):
  1084. @wraps(func)
  1085. def f(children):
  1086. return func(*children)
  1087. return f
  1088. def inplace_transformer(func):
  1089. @wraps(func)
  1090. def f(children):
  1091. # function name in a Transformer is a rule name.
  1092. tree = Tree(func.__name__, children)
  1093. return func(tree)
  1094. return f
  1095. def apply_visit_wrapper(func, name, wrapper):
  1096. if wrapper is visitors._vargs_meta or wrapper is visitors._vargs_meta_inline:
  1097. raise NotImplementedError("Meta args not supported for internal transformer")
  1098. @wraps(func)
  1099. def f(children):
  1100. return wrapper(func, name, children, None)
  1101. return f
  1102. class ParseTreeBuilder:
  1103. def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
  1104. self.tree_class = tree_class
  1105. self.propagate_positions = propagate_positions
  1106. self.always_keep_all_tokens = keep_all_tokens
  1107. self.ambiguous = ambiguous
  1108. self.maybe_placeholders = maybe_placeholders
  1109. self.rule_builders = list(self._init_builders(rules))
  1110. def _init_builders(self, rules):
  1111. for rule in rules:
  1112. options = rule.options
  1113. keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
  1114. expand_single_child = options.expand1
  1115. wrapper_chain = list(filter(None, [
  1116. (expand_single_child and not rule.alias) and ExpandSingleChild,
  1117. maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
  1118. self.propagate_positions and PropagatePositions,
  1119. self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
  1120. ]))
  1121. yield rule, wrapper_chain
  1122. def create_callback(self, transformer=None):
  1123. callbacks = {}
  1124. for rule, wrapper_chain in self.rule_builders:
  1125. user_callback_name = rule.alias or rule.origin.name
  1126. try:
  1127. f = getattr(transformer, user_callback_name)
  1128. # XXX InlineTransformer is deprecated!
  1129. wrapper = getattr(f, 'visit_wrapper', None)
  1130. if wrapper is not None:
  1131. f = apply_visit_wrapper(f, user_callback_name, wrapper)
  1132. else:
  1133. if isinstance(transformer, InlineTransformer):
  1134. f = ptb_inline_args(f)
  1135. elif isinstance(transformer, Transformer_InPlace):
  1136. f = inplace_transformer(f)
  1137. except AttributeError:
  1138. f = partial(self.tree_class, user_callback_name)
  1139. for w in wrapper_chain:
  1140. f = w(f)
  1141. if rule in callbacks:
  1142. raise GrammarError("Rule '%s' already exists" % (rule,))
  1143. callbacks[rule] = f
  1144. return callbacks
  1145. class LALR_Parser(object):
  1146. def __init__(self, parser_conf, debug=False):
  1147. assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
  1148. analysis = LALR_Analyzer(parser_conf, debug=debug)
  1149. analysis.compute_lalr()
  1150. callbacks = parser_conf.callbacks
  1151. self._parse_table = analysis.parse_table
  1152. self.parser_conf = parser_conf
  1153. self.parser = _Parser(analysis.parse_table, callbacks)
  1154. @classmethod
  1155. def deserialize(cls, data, memo, callbacks):
  1156. inst = cls.__new__(cls)
  1157. inst._parse_table = IntParseTable.deserialize(data, memo)
  1158. inst.parser = _Parser(inst._parse_table, callbacks)
  1159. return inst
  1160. def serialize(self, memo):
  1161. return self._parse_table.serialize(memo)
  1162. def parse(self, *args):
  1163. return self.parser.parse(*args)
  1164. class _Parser:
  1165. def __init__(self, parse_table, callbacks):
  1166. self.states = parse_table.states
  1167. self.start_states = parse_table.start_states
  1168. self.end_states = parse_table.end_states
  1169. self.callbacks = callbacks
  1170. def parse(self, seq, start, set_state=None):
  1171. token = None
  1172. stream = iter(seq)
  1173. states = self.states
  1174. start_state = self.start_states[start]
  1175. end_state = self.end_states[start]
  1176. state_stack = [start_state]
  1177. value_stack = []
  1178. if set_state: set_state(start_state)
  1179. def get_action(token):
  1180. state = state_stack[-1]
  1181. try:
  1182. return states[state][token.type]
  1183. except KeyError:
  1184. expected = [s for s in states[state].keys() if s.isupper()]
  1185. raise UnexpectedToken(token, expected, state=state)
  1186. def reduce(rule):
  1187. size = len(rule.expansion)
  1188. if size:
  1189. s = value_stack[-size:]
  1190. del state_stack[-size:]
  1191. del value_stack[-size:]
  1192. else:
  1193. s = []
  1194. value = self.callbacks[rule](s)
  1195. _action, new_state = states[state_stack[-1]][rule.origin.name]
  1196. assert _action is Shift
  1197. state_stack.append(new_state)
  1198. value_stack.append(value)
  1199. # Main LALR-parser loop
  1200. for token in stream:
  1201. while True:
  1202. action, arg = get_action(token)
  1203. assert arg != end_state
  1204. if action is Shift:
  1205. state_stack.append(arg)
  1206. value_stack.append(token)
  1207. if set_state: set_state(arg)
  1208. break # next token
  1209. else:
  1210. reduce(arg)
  1211. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  1212. while True:
  1213. _action, arg = get_action(token)
  1214. assert(_action is Reduce)
  1215. reduce(arg)
  1216. if state_stack[-1] == end_state:
  1217. return value_stack[-1]
  1218. class Action:
  1219. def __init__(self, name):
  1220. self.name = name
  1221. def __str__(self):
  1222. return self.name
  1223. def __repr__(self):
  1224. return str(self)
  1225. Shift = Action('Shift')
  1226. Reduce = Action('Reduce')
  1227. class ParseTable:
  1228. def __init__(self, states, start_states, end_states):
  1229. self.states = states
  1230. self.start_states = start_states
  1231. self.end_states = end_states
  1232. def serialize(self, memo):
  1233. tokens = Enumerator()
  1234. rules = Enumerator()
  1235. states = {
  1236. state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
  1237. for token, (action, arg) in actions.items()}
  1238. for state, actions in self.states.items()
  1239. }
  1240. return {
  1241. 'tokens': tokens.reversed(),
  1242. 'states': states,
  1243. 'start_states': self.start_states,
  1244. 'end_states': self.end_states,
  1245. }
  1246. @classmethod
  1247. def deserialize(cls, data, memo):
  1248. tokens = data['tokens']
  1249. states = {
  1250. state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
  1251. for token, (action, arg) in actions.items()}
  1252. for state, actions in data['states'].items()
  1253. }
  1254. return cls(states, data['start_states'], data['end_states'])
  1255. class IntParseTable(ParseTable):
  1256. @classmethod
  1257. def from_ParseTable(cls, parse_table):
  1258. enum = list(parse_table.states)
  1259. state_to_idx = {s:i for i,s in enumerate(enum)}
  1260. int_states = {}
  1261. for s, la in parse_table.states.items():
  1262. la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
  1263. for k,v in la.items()}
  1264. int_states[ state_to_idx[s] ] = la
  1265. start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
  1266. end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
  1267. return cls(int_states, start_states, end_states)
  1268. def get_frontend(parser, lexer):
  1269. if parser=='lalr':
  1270. if lexer is None:
  1271. raise ValueError('The LALR parser requires use of a lexer')
  1272. elif lexer == 'standard':
  1273. return LALR_TraditionalLexer
  1274. elif lexer == 'contextual':
  1275. return LALR_ContextualLexer
  1276. elif issubclass(lexer, Lexer):
  1277. return partial(LALR_CustomLexer, lexer)
  1278. else:
  1279. raise ValueError('Unknown lexer: %s' % lexer)
  1280. elif parser=='earley':
  1281. if lexer=='standard':
  1282. return Earley
  1283. elif lexer=='dynamic':
  1284. return XEarley
  1285. elif lexer=='dynamic_complete':
  1286. return XEarley_CompleteLex
  1287. elif lexer=='contextual':
  1288. raise ValueError('The Earley parser does not support the contextual parser')
  1289. else:
  1290. raise ValueError('Unknown lexer: %s' % lexer)
  1291. elif parser == 'cyk':
  1292. if lexer == 'standard':
  1293. return CYK
  1294. else:
  1295. raise ValueError('CYK parser requires using standard parser.')
  1296. else:
  1297. raise ValueError('Unknown parser: %s' % parser)
  1298. class _ParserFrontend(Serialize):
  1299. def _parse(self, input, start, *args):
  1300. if start is None:
  1301. start = self.start
  1302. if len(start) > 1:
  1303. raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
  1304. start ,= start
  1305. return self.parser.parse(input, start, *args)
  1306. class WithLexer(_ParserFrontend):
  1307. lexer = None
  1308. parser = None
  1309. lexer_conf = None
  1310. start = None
  1311. __serialize_fields__ = 'parser', 'lexer_conf', 'start'
  1312. __serialize_namespace__ = LexerConf,
  1313. def __init__(self, lexer_conf, parser_conf, options=None):
  1314. self.lexer_conf = lexer_conf
  1315. self.start = parser_conf.start
  1316. self.postlex = lexer_conf.postlex
  1317. @classmethod
  1318. def deserialize(cls, data, memo, callbacks, postlex):
  1319. inst = super(WithLexer, cls).deserialize(data, memo)
  1320. inst.postlex = postlex
  1321. inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
  1322. inst.init_lexer()
  1323. return inst
  1324. def _serialize(self, data, memo):
  1325. data['parser'] = data['parser'].serialize(memo)
  1326. def lex(self, *args):
  1327. stream = self.lexer.lex(*args)
  1328. return self.postlex.process(stream) if self.postlex else stream
  1329. def parse(self, text, start=None):
  1330. token_stream = self.lex(text)
  1331. return self._parse(token_stream, start)
  1332. def init_traditional_lexer(self):
  1333. self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
  1334. class LALR_WithLexer(WithLexer):
  1335. def __init__(self, lexer_conf, parser_conf, options=None):
  1336. debug = options.debug if options else False
  1337. self.parser = LALR_Parser(parser_conf, debug=debug)
  1338. WithLexer.__init__(self, lexer_conf, parser_conf, options)
  1339. self.init_lexer()
  1340. def init_lexer(self):
  1341. raise NotImplementedError()
  1342. class LALR_TraditionalLexer(LALR_WithLexer):
  1343. def init_lexer(self):
  1344. self.init_traditional_lexer()
  1345. class LALR_ContextualLexer(LALR_WithLexer):
  1346. def init_lexer(self):
  1347. states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
  1348. always_accept = self.postlex.always_accept if self.postlex else ()
  1349. self.lexer = ContextualLexer(self.lexer_conf.tokens, states,
  1350. ignore=self.lexer_conf.ignore,
  1351. always_accept=always_accept,
  1352. user_callbacks=self.lexer_conf.callbacks)
  1353. def parse(self, text, start=None):
  1354. parser_state = [None]
  1355. def set_parser_state(s):
  1356. parser_state[0] = s
  1357. token_stream = self.lex(text, lambda: parser_state[0])
  1358. return self._parse(token_stream, start, set_parser_state)
  1359. class LarkOptions(Serialize):
  1360. """Specifies the options for Lark
  1361. """
  1362. OPTIONS_DOC = """
  1363. parser - Decides which parser engine to use, "earley" or "lalr". (Default: "earley")
  1364. Note: "lalr" requires a lexer
  1365. lexer - Decides whether or not to use a lexer stage
  1366. "standard": Use a standard lexer
  1367. "contextual": Stronger lexer (only works with parser="lalr")
  1368. "dynamic": Flexible and powerful (only with parser="earley")
  1369. "dynamic_complete": Same as dynamic, but tries *every* variation
  1370. of tokenizing possible. (only with parser="earley")
  1371. "auto" (default): Choose for me based on grammar and parser
  1372. ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
  1373. "resolve": The parser will automatically choose the simplest derivation
  1374. (it chooses consistently: greedy for tokens, non-greedy for rules)
  1375. "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
  1376. transformer - Applies the transformer to every parse tree
  1377. debug - Affects verbosity (default: False)
  1378. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  1379. cache_grammar - Cache the Lark grammar (Default: False)
  1380. postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
  1381. start - The start symbol, either a string, or a list of strings for multiple possible starts (Default: "start")
  1382. priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
  1383. propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
  1384. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
  1385. maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None
  1386. """
  1387. if __doc__:
  1388. __doc__ += OPTIONS_DOC
  1389. _defaults = {
  1390. 'debug': False,
  1391. 'keep_all_tokens': False,
  1392. 'tree_class': None,
  1393. 'cache_grammar': False,
  1394. 'postlex': None,
  1395. 'parser': 'earley',
  1396. 'lexer': 'auto',
  1397. 'transformer': None,
  1398. 'start': 'start',
  1399. 'priority': 'auto',
  1400. 'ambiguity': 'auto',
  1401. 'propagate_positions': True,
  1402. 'lexer_callbacks': {},
  1403. 'maybe_placeholders': True,
  1404. 'edit_terminals': None,
  1405. }
  1406. def __init__(self, options_dict):
  1407. o = dict(options_dict)
  1408. options = {}
  1409. for name, default in self._defaults.items():
  1410. if name in o:
  1411. value = o.pop(name)
  1412. if isinstance(default, bool):
  1413. value = bool(value)
  1414. else:
  1415. value = default
  1416. options[name] = value
  1417. if isinstance(options['start'], STRING_TYPE):
  1418. options['start'] = [options['start']]
  1419. self.__dict__['options'] = options
  1420. assert self.parser in ('earley', 'lalr', 'cyk', None)
  1421. if self.parser == 'earley' and self.transformer:
  1422. raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
  1423. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
  1424. if o:
  1425. raise ValueError("Unknown options: %s" % o.keys())
  1426. def __getattr__(self, name):
  1427. try:
  1428. return self.options[name]
  1429. except KeyError as e:
  1430. raise AttributeError(e)
  1431. def __setattr__(self, name, value):
  1432. assert name in self.options
  1433. self.options[name] = value
  1434. def serialize(self, memo):
  1435. return self.options
  1436. @classmethod
  1437. def deserialize(cls, data, memo):
  1438. return cls(data)
  1439. class Lark(Serialize):
  1440. def __init__(self, grammar, **options):
  1441. """
  1442. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  1443. options : a dictionary controlling various aspects of Lark.
  1444. """
  1445. self.options = LarkOptions(options)
  1446. # Some, but not all file-like objects have a 'name' attribute
  1447. try:
  1448. self.source = grammar.name
  1449. except AttributeError:
  1450. self.source = '<string>'
  1451. # Drain file-like objects to get their contents
  1452. try:
  1453. read = grammar.read
  1454. except AttributeError:
  1455. pass
  1456. else:
  1457. grammar = read()
  1458. assert isinstance(grammar, STRING_TYPE)
  1459. if self.options.cache_grammar:
  1460. raise NotImplementedError("Not available yet")
  1461. if self.options.lexer == 'auto':
  1462. if self.options.parser == 'lalr':
  1463. self.options.lexer = 'contextual'
  1464. elif self.options.parser == 'earley':
  1465. self.options.lexer = 'dynamic'
  1466. elif self.options.parser == 'cyk':
  1467. self.options.lexer = 'standard'
  1468. else:
  1469. assert False, self.options.parser
  1470. lexer = self.options.lexer
  1471. assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
  1472. if self.options.ambiguity == 'auto':
  1473. if self.options.parser == 'earley':
  1474. self.options.ambiguity = 'resolve'
  1475. else:
  1476. disambig_parsers = ['earley', 'cyk']
  1477. assert self.options.parser in disambig_parsers, (
  1478. 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
  1479. if self.options.priority == 'auto':
  1480. if self.options.parser in ('earley', 'cyk', ):
  1481. self.options.priority = 'normal'
  1482. elif self.options.parser in ('lalr', ):
  1483. self.options.priority = None
  1484. elif self.options.priority in ('invert', 'normal'):
  1485. assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"
  1486. assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
  1487. assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
  1488. assert self.options.ambiguity in ('resolve', 'explicit', 'auto', )
  1489. # Parse the grammar file and compose the grammars (TODO)
  1490. self.grammar = load_grammar(grammar, self.source)
  1491. # Compile the EBNF grammar into BNF
  1492. self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
  1493. if self.options.edit_terminals:
  1494. for t in self.terminals:
  1495. self.options.edit_terminals(t)
  1496. self._terminals_dict = {t.name:t for t in self.terminals}
  1497. # If the user asked to invert the priorities, negate them all here.
  1498. # This replaces the old 'resolve__antiscore_sum' option.
  1499. if self.options.priority == 'invert':
  1500. for rule in self.rules:
  1501. if rule.options.priority is not None:
  1502. rule.options.priority = -rule.options.priority
  1503. # Else, if the user asked to disable priorities, strip them from the
  1504. # rules. This allows the Earley parsers to skip an extra forest walk
  1505. # for improved performance, if you don't need them (or didn't specify any).
  1506. elif self.options.priority == None:
  1507. for rule in self.rules:
  1508. if rule.options.priority is not None:
  1509. rule.options.priority = None
  1510. # TODO Deprecate lexer_callbacks?
  1511. lexer_callbacks = dict(self.options.lexer_callbacks)
  1512. if self.options.transformer:
  1513. t = self.options.transformer
  1514. for term in self.terminals:
  1515. if hasattr(t, term.name):
  1516. lexer_callbacks[term.name] = getattr(t, term.name)
  1517. self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, lexer_callbacks)
  1518. if self.options.parser:
  1519. self.parser = self._build_parser()
  1520. elif lexer:
  1521. self.lexer = self._build_lexer()
  1522. if __init__.__doc__:
  1523. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  1524. __serialize_fields__ = 'parser', 'rules', 'options'
  1525. def _build_lexer(self):
  1526. return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
  1527. def _prepare_callbacks(self):
  1528. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  1529. self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
  1530. self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
  1531. def _build_parser(self):
  1532. self._prepare_callbacks()
  1533. parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
  1534. return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
  1535. @classmethod
  1536. def deserialize(cls, data, namespace, memo, transformer=None, postlex=None):
  1537. if memo:
  1538. memo = SerializeMemoizer.deserialize(memo, namespace, {})
  1539. inst = cls.__new__(cls)
  1540. options = dict(data['options'])
  1541. options['transformer'] = transformer
  1542. options['postlex'] = postlex
  1543. inst.options = LarkOptions.deserialize(options, memo)
  1544. inst.rules = [Rule.deserialize(r, memo) for r in data['rules']]
  1545. inst.source = '<deserialized>'
  1546. inst._prepare_callbacks()
  1547. inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex)
  1548. return inst
  1549. @classmethod
  1550. def open(cls, grammar_filename, rel_to=None, **options):
  1551. """Create an instance of Lark with the grammar given by its filename
  1552. If rel_to is provided, the function will find the grammar filename in relation to it.
  1553. Example:
  1554. >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
  1555. Lark(...)
  1556. """
  1557. if rel_to:
  1558. basepath = os.path.dirname(rel_to)
  1559. grammar_filename = os.path.join(basepath, grammar_filename)
  1560. with open(grammar_filename, encoding='utf8') as f:
  1561. return cls(f, **options)
  1562. def __repr__(self):
  1563. return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
  1564. def lex(self, text):
  1565. "Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'"
  1566. if not hasattr(self, 'lexer'):
  1567. self.lexer = self._build_lexer()
  1568. stream = self.lexer.lex(text)
  1569. if self.options.postlex:
  1570. return self.options.postlex.process(stream)
  1571. return stream
  1572. def get_terminal(self, name):
  1573. "Get information about a terminal"
  1574. return self._terminals_dict[name]
  1575. def parse(self, text, start=None):
  1576. """Parse the given text, according to the options provided.
  1577. The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
  1578. Returns a tree, unless specified otherwise.
  1579. """
  1580. return self.parser.parse(text, start=start)
  1581. DATA = (
  1582. {'rules': [{'@': 27}, {'@': 31}, {'@': 26}, {'@': 13}, {'@': 25}, {'@': 18}, {'@': 16}, {'@': 24}, {'@': 22}, {'@': 17}, {'@': 28}, {'@': 30}, {'@': 20}, {'@': 29}, {'@': 21}, {'@': 23}, {'@': 15}, {'@': 19}, {'@': 12}, {'@': 14}], 'parser': {'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': [u'WS'], '__type__': 'LexerConf'}, 'parser': {'tokens': {0: 'LBRACE', 1: u'FALSE', 2: u'string', 3: u'object', 4: u'NULL', 5: u'SIGNED_NUMBER', 6: u'value', 7: 'start', 8: 'LSQB', 9: u'ESCAPED_STRING', 10: u'TRUE', 11: u'array', 12: 'COMMA', 13: 'RBRACE', 14: u'pair', 15: 'COLON', 16: 'RSQB', 17: '$END', 18: '__anon_star_1', 19: '__anon_star_0'}, 'states': {0: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 28), 7: (0, 11), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 1: {12: (0, 2), 13: (0, 31)}, 2: {9: (0, 20), 2: (0, 4), 14: (0, 6)}, 3: {12: (1, {'@': 12}), 13: (1, {'@': 12})}, 4: {15: (0, 15)}, 5: {16: (1, {'@': 13}), 17: (1, {'@': 13}), 12: (1, {'@': 13}), 13: (1, {'@': 13})}, 6: {12: (1, {'@': 14}), 13: (1, {'@': 14})}, 7: {16: (1, {'@': 15}), 12: (1, {'@': 15})}, 8: {16: (1, {'@': 16}), 17: (1, {'@': 16}), 12: (1, {'@': 16}), 13: (1, {'@': 16})}, 9: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 14), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 10: {16: (1, {'@': 17}), 17: (1, {'@': 17}), 12: (1, {'@': 17}), 13: (1, {'@': 17})}, 11: {}, 12: {18: (0, 1), 12: (0, 21), 13: (0, 16)}, 13: {16: (1, {'@': 18}), 17: (1, {'@': 18}), 12: (1, {'@': 18}), 13: (1, {'@': 18})}, 14: {16: (1, {'@': 19}), 12: (1, {'@': 19})}, 15: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 17), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 16: {16: (1, {'@': 20}), 17: (1, {'@': 20}), 12: (1, {'@': 20}), 13: (1, {'@': 20})}, 17: {12: (1, {'@': 21}), 13: (1, {'@': 21})}, 18: {16: (1, {'@': 22}), 17: (1, {'@': 22}), 12: (1, {'@': 22}), 13: (1, {'@': 22})}, 19: {16: (0, 18), 12: (0, 9)}, 20: {16: (1, {'@': 23}), 17: (1, {'@': 23}), 12: (1, {'@': 23}), 13: (1, {'@': 23}), 15: (1, {'@': 23})}, 21: {9: (0, 20), 2: (0, 4), 14: (0, 3)}, 22: {16: (0, 10), 19: (0, 19), 12: (0, 27)}, 23: {16: (1, {'@': 24}), 17: (1, {'@': 24}), 12: (1, {'@': 24}), 13: (1, {'@': 24})}, 24: {16: (1, {'@': 25}), 17: (1, {'@': 25}), 12: (1, {'@': 25}), 13: (1, {'@': 25})}, 25: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 22), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26), 16: (0, 29)}, 26: {16: (1, {'@': 26}), 17: (1, {'@': 26}), 12: (1, {'@': 26}), 13: (1, {'@': 26})}, 27: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 7), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 28: {17: (1, {'@': 27})}, 29: {16: (1, {'@': 28}), 17: (1, {'@': 28}), 12: (1, {'@': 28}), 13: (1, {'@': 28})}, 30: {16: (1, {'@': 29}), 17: (1, {'@': 29}), 12: (1, {'@': 29}), 13: (1, {'@': 29})}, 31: {16: (1, {'@': 30}), 17: (1, {'@': 30}), 12: (1, {'@': 30}), 13: (1, {'@': 30})}, 32: {16: (1, {'@': 31}), 17: (1, {'@': 31}), 12: (1, {'@': 31}), 13: (1, {'@': 31})}, 33: {9: (0, 20), 2: (0, 4), 13: (0, 30), 14: (0, 12)}}, 'end_states': {'start': 11}, 'start_states': {'start': 0}}, '__type__': 'LALR_ContextualLexer', 'start': ['start']}, '__type__': 'Lark', 'options': {'transformer': None, 'lexer': 'contextual', 'lexer_callbacks': {}, 'debug': False, 'postlex': None, 'parser': 'lalr', 'cache_grammar': False, 'tree_class': None, 'priority': None, 'start': ['start'], 'keep_all_tokens': False, 'ambiguity': 'auto', 'edit_terminals': None, 'propagate_positions': True, 'maybe_placeholders': True}}
  1583. )
  1584. MEMO = (
  1585. {0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [2, 4294967295], 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 2}, 14: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 15: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'false', 'order': 5}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'true', 'order': 4}, 19: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'null', 'order': 6}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'number', 'order': 3}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 1}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}}
  1586. )
  1587. Shift = 0
  1588. Reduce = 1
  1589. def Lark_StandAlone(transformer=None, postlex=None):
  1590. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1591. return Lark.deserialize(DATA, namespace, MEMO, transformer=transformer, postlex=postlex)