This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1839 lines
74 KiB

  1. # The file was automatically generated by Lark v0.7.0
  2. #
  3. #
  4. # Lark Stand-alone Generator Tool
  5. # ----------------------------------
  6. # Generates a stand-alone LALR(1) parser with a standard lexer
  7. #
  8. # Git: https://github.com/erezsh/lark
  9. # Author: Erez Shinan (erezshin@gmail.com)
  10. #
  11. #
  12. # >>> LICENSE
  13. #
  14. # This tool and its generated code use a separate license from Lark.
  15. #
  16. # It is licensed under GPLv2 or above.
  17. #
  18. # If you wish to purchase a commercial license for this tool and its
  19. # generated code, contact me via email.
  20. #
  21. # If GPL is incompatible with your free or open-source project,
  22. # contact me and we'll work it out (for free).
  23. #
  24. # This program is free software: you can redistribute it and/or modify
  25. # it under the terms of the GNU General Public License as published by
  26. # the Free Software Foundation, either version 2 of the License, or
  27. # (at your option) any later version.
  28. #
  29. # This program is distributed in the hope that it will be useful,
  30. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  31. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  32. # GNU General Public License for more details.
  33. #
  34. # See <http://www.gnu.org/licenses/>.
  35. #
  36. #
  37. class LarkError(Exception):
  38. pass
  39. class GrammarError(LarkError):
  40. pass
  41. class ParseError(LarkError):
  42. pass
  43. class LexError(LarkError):
  44. pass
  45. class UnexpectedInput(LarkError):
  46. pos_in_stream = None
  47. def get_context(self, text, span=40):
  48. pos = self.pos_in_stream
  49. start = max(pos - span, 0)
  50. end = pos + span
  51. before = text[start:pos].rsplit('\n', 1)[-1]
  52. after = text[pos:end].split('\n', 1)[0]
  53. return before + after + '\n' + ' ' * len(before) + '^\n'
  54. def match_examples(self, parse_fn, examples):
  55. """ Given a parser instance and a dictionary mapping some label with
  56. some malformed syntax examples, it'll return the label for the
  57. example that bests matches the current error.
  58. """
  59. assert self.state is not None, "Not supported for this exception"
  60. candidate = None
  61. for label, example in examples.items():
  62. assert not isinstance(example, STRING_TYPE)
  63. for malformed in example:
  64. try:
  65. parse_fn(malformed)
  66. except UnexpectedInput as ut:
  67. if ut.state == self.state:
  68. try:
  69. if ut.token == self.token: # Try exact match first
  70. return label
  71. except AttributeError:
  72. pass
  73. if not candidate:
  74. candidate = label
  75. return candidate
  76. class UnexpectedCharacters(LexError, UnexpectedInput):
  77. def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None):
  78. message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
  79. self.line = line
  80. self.column = column
  81. self.allowed = allowed
  82. self.considered_tokens = considered_tokens
  83. self.pos_in_stream = lex_pos
  84. self.state = state
  85. message += '\n\n' + self.get_context(seq)
  86. if allowed:
  87. message += '\nExpecting: %s\n' % allowed
  88. super(UnexpectedCharacters, self).__init__(message)
  89. class UnexpectedToken(ParseError, UnexpectedInput):
  90. def __init__(self, token, expected, considered_rules=None, state=None):
  91. self.token = token
  92. self.expected = expected # XXX str shouldn't necessary
  93. self.line = getattr(token, 'line', '?')
  94. self.column = getattr(token, 'column', '?')
  95. self.considered_rules = considered_rules
  96. self.state = state
  97. self.pos_in_stream = getattr(token, 'pos_in_stream', None)
  98. message = ("Unexpected token %r at line %s, column %s.\n"
  99. "Expected one of: \n\t* %s\n"
  100. % (token, self.line, self.column, '\n\t* '.join(self.expected)))
  101. super(UnexpectedToken, self).__init__(message)
  102. class VisitError(LarkError):
  103. def __init__(self, tree, orig_exc):
  104. self.tree = tree
  105. self.orig_exc = orig_exc
  106. message = 'Error trying to process rule "%s":\n\n%s' % (tree.data, orig_exc)
  107. super(VisitError, self).__init__(message)
  108. def _deserialize(data, namespace, memo):
  109. if isinstance(data, dict):
  110. if '__type__' in data: # Object
  111. class_ = namespace[data['__type__']]
  112. return class_.deserialize(data, memo)
  113. elif '@' in data:
  114. return memo[data['@']]
  115. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  116. elif isinstance(data, list):
  117. return [_deserialize(value, namespace, memo) for value in data]
  118. return data
  119. class Serialize(object):
  120. def memo_serialize(self, types_to_memoize):
  121. memo = SerializeMemoizer(types_to_memoize)
  122. return self.serialize(memo), memo.serialize()
  123. def serialize(self, memo=None):
  124. if memo and memo.in_types(self):
  125. return {'@': memo.memoized.get(self)}
  126. fields = getattr(self, '__serialize_fields__')
  127. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  128. res['__type__'] = type(self).__name__
  129. postprocess = getattr(self, '_serialize', None)
  130. if postprocess:
  131. postprocess(res, memo)
  132. return res
  133. @classmethod
  134. def deserialize(cls, data, memo):
  135. namespace = getattr(cls, '__serialize_namespace__', {})
  136. namespace = {c.__name__:c for c in namespace}
  137. fields = getattr(cls, '__serialize_fields__')
  138. if '@' in data:
  139. return memo[data['@']]
  140. inst = cls.__new__(cls)
  141. for f in fields:
  142. setattr(inst, f, _deserialize(data[f], namespace, memo))
  143. postprocess = getattr(inst, '_deserialize', None)
  144. if postprocess:
  145. postprocess()
  146. return inst
  147. class SerializeMemoizer(Serialize):
  148. __serialize_fields__ = 'memoized',
  149. def __init__(self, types_to_memoize):
  150. self.types_to_memoize = tuple(types_to_memoize)
  151. self.memoized = Enumerator()
  152. def in_types(self, value):
  153. return isinstance(value, self.types_to_memoize)
  154. def serialize(self):
  155. return _serialize(self.memoized.reversed(), None)
  156. @classmethod
  157. def deserialize(cls, data, namespace, memo):
  158. return _deserialize(data, namespace, memo)
  159. try:
  160. STRING_TYPE = basestring
  161. except NameError: # Python 3
  162. STRING_TYPE = str
  163. import types
  164. from functools import wraps, partial
  165. from contextlib import contextmanager
  166. Str = type(u'')
  167. try:
  168. classtype = types.ClassType # Python2
  169. except AttributeError:
  170. classtype = type # Python3
  171. def smart_decorator(f, create_decorator):
  172. if isinstance(f, types.FunctionType):
  173. return wraps(f)(create_decorator(f, True))
  174. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  175. return wraps(f)(create_decorator(f, False))
  176. elif isinstance(f, types.MethodType):
  177. return wraps(f)(create_decorator(f.__func__, True))
  178. elif isinstance(f, partial):
  179. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  180. return create_decorator(f.__func__, True)
  181. else:
  182. return create_decorator(f.__func__.__call__, True)
  183. import sys, re
  184. Py36 = (sys.version_info[:2] >= (3, 6))
  185. class Meta:
  186. def __init__(self):
  187. self.empty = True
  188. class Tree(object):
  189. def __init__(self, data, children, meta=None):
  190. self.data = data
  191. self.children = children
  192. self._meta = meta
  193. @property
  194. def meta(self):
  195. if self._meta is None:
  196. self._meta = Meta()
  197. return self._meta
  198. def __repr__(self):
  199. return 'Tree(%s, %s)' % (self.data, self.children)
  200. def _pretty_label(self):
  201. return self.data
  202. def _pretty(self, level, indent_str):
  203. if len(self.children) == 1 and not isinstance(self.children[0], Tree):
  204. return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
  205. l = [ indent_str*level, self._pretty_label(), '\n' ]
  206. for n in self.children:
  207. if isinstance(n, Tree):
  208. l += n._pretty(level+1, indent_str)
  209. else:
  210. l += [ indent_str*(level+1), '%s' % (n,), '\n' ]
  211. return l
  212. def pretty(self, indent_str=' '):
  213. return ''.join(self._pretty(0, indent_str))
  214. def __eq__(self, other):
  215. try:
  216. return self.data == other.data and self.children == other.children
  217. except AttributeError:
  218. return False
  219. def __ne__(self, other):
  220. return not (self == other)
  221. def __hash__(self):
  222. return hash((self.data, tuple(self.children)))
  223. from inspect import getmembers, getmro
  224. class Discard(Exception):
  225. pass
  226. # Transformers
  227. class Transformer:
  228. """Visits the tree recursively, starting with the leaves and finally the root (bottom-up)
  229. Calls its methods (provided by user via inheritance) according to tree.data
  230. The returned value replaces the old one in the structure.
  231. Can be used to implement map or reduce.
  232. """
  233. def _call_userfunc(self, tree, new_children=None):
  234. # Assumes tree is already transformed
  235. children = new_children if new_children is not None else tree.children
  236. try:
  237. f = getattr(self, tree.data)
  238. except AttributeError:
  239. return self.__default__(tree.data, children, tree.meta)
  240. else:
  241. try:
  242. if getattr(f, 'meta', False):
  243. return f(children, tree.meta)
  244. elif getattr(f, 'inline', False):
  245. return f(*children)
  246. elif getattr(f, 'whole_tree', False):
  247. if new_children is not None:
  248. raise NotImplementedError("Doesn't work with the base Transformer class")
  249. return f(tree)
  250. else:
  251. return f(children)
  252. except (GrammarError, Discard):
  253. raise
  254. except Exception as e:
  255. raise VisitError(tree, e)
  256. def _transform_children(self, children):
  257. for c in children:
  258. try:
  259. yield self._transform_tree(c) if isinstance(c, Tree) else c
  260. except Discard:
  261. pass
  262. def _transform_tree(self, tree):
  263. children = list(self._transform_children(tree.children))
  264. return self._call_userfunc(tree, children)
  265. def transform(self, tree):
  266. return self._transform_tree(tree)
  267. def __mul__(self, other):
  268. return TransformerChain(self, other)
  269. def __default__(self, data, children, meta):
  270. "Default operation on tree (for override)"
  271. return Tree(data, children, meta)
  272. @classmethod
  273. def _apply_decorator(cls, decorator, **kwargs):
  274. mro = getmro(cls)
  275. assert mro[0] is cls
  276. libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
  277. for name, value in getmembers(cls):
  278. if name.startswith('_') or name in libmembers:
  279. continue
  280. if not callable(cls.__dict__[name]):
  281. continue
  282. # Skip if v_args already applied (at the function level)
  283. if hasattr(cls.__dict__[name], 'vargs_applied'):
  284. continue
  285. static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
  286. setattr(cls, name, decorator(value, static=static, **kwargs))
  287. return cls
  288. class InlineTransformer(Transformer): # XXX Deprecated
  289. def _call_userfunc(self, tree, new_children=None):
  290. # Assumes tree is already transformed
  291. children = new_children if new_children is not None else tree.children
  292. try:
  293. f = getattr(self, tree.data)
  294. except AttributeError:
  295. return self.__default__(tree.data, children, tree.meta)
  296. else:
  297. return f(*children)
  298. class TransformerChain(object):
  299. def __init__(self, *transformers):
  300. self.transformers = transformers
  301. def transform(self, tree):
  302. for t in self.transformers:
  303. tree = t.transform(tree)
  304. return tree
  305. def __mul__(self, other):
  306. return TransformerChain(*self.transformers + (other,))
  307. class Transformer_InPlace(Transformer):
  308. "Non-recursive. Changes the tree in-place instead of returning new instances"
  309. def _transform_tree(self, tree): # Cancel recursion
  310. return self._call_userfunc(tree)
  311. def transform(self, tree):
  312. for subtree in tree.iter_subtrees():
  313. subtree.children = list(self._transform_children(subtree.children))
  314. return self._transform_tree(tree)
  315. class Transformer_InPlaceRecursive(Transformer):
  316. "Recursive. Changes the tree in-place instead of returning new instances"
  317. def _transform_tree(self, tree):
  318. tree.children = list(self._transform_children(tree.children))
  319. return self._call_userfunc(tree)
  320. # Visitors
  321. class VisitorBase:
  322. def _call_userfunc(self, tree):
  323. return getattr(self, tree.data, self.__default__)(tree)
  324. def __default__(self, tree):
  325. "Default operation on tree (for override)"
  326. return tree
  327. class Visitor(VisitorBase):
  328. """Bottom-up visitor, non-recursive
  329. Visits the tree, starting with the leaves and finally the root (bottom-up)
  330. Calls its methods (provided by user via inheritance) according to tree.data
  331. """
  332. def visit(self, tree):
  333. for subtree in tree.iter_subtrees():
  334. self._call_userfunc(subtree)
  335. return tree
  336. class Visitor_Recursive(VisitorBase):
  337. """Bottom-up visitor, recursive
  338. Visits the tree, starting with the leaves and finally the root (bottom-up)
  339. Calls its methods (provided by user via inheritance) according to tree.data
  340. """
  341. def visit(self, tree):
  342. for child in tree.children:
  343. if isinstance(child, Tree):
  344. self.visit(child)
  345. f = getattr(self, tree.data, self.__default__)
  346. f(tree)
  347. return tree
  348. def visit_children_decor(func):
  349. "See Interpreter"
  350. @wraps(func)
  351. def inner(cls, tree):
  352. values = cls.visit_children(tree)
  353. return func(cls, values)
  354. return inner
  355. class Interpreter:
  356. """Top-down visitor, recursive
  357. Visits the tree, starting with the root and finally the leaves (top-down)
  358. Calls its methods (provided by user via inheritance) according to tree.data
  359. Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches.
  360. The user has to explicitly call visit_children, or use the @visit_children_decor
  361. """
  362. def visit(self, tree):
  363. return getattr(self, tree.data)(tree)
  364. def visit_children(self, tree):
  365. return [self.visit(child) if isinstance(child, Tree) else child
  366. for child in tree.children]
  367. def __getattr__(self, name):
  368. return self.__default__
  369. def __default__(self, tree):
  370. return self.visit_children(tree)
  371. # Decorators
  372. def _apply_decorator(obj, decorator, **kwargs):
  373. try:
  374. _apply = obj._apply_decorator
  375. except AttributeError:
  376. return decorator(obj, **kwargs)
  377. else:
  378. return _apply(decorator, **kwargs)
  379. def _inline_args__func(func):
  380. @wraps(func)
  381. def create_decorator(_f, with_self):
  382. if with_self:
  383. def f(self, children):
  384. return _f(self, *children)
  385. else:
  386. def f(self, children):
  387. return _f(*children)
  388. return f
  389. return smart_decorator(func, create_decorator)
  390. def inline_args(obj): # XXX Deprecated
  391. return _apply_decorator(obj, _inline_args__func)
  392. def _visitor_args_func_dec(func, inline=False, meta=False, whole_tree=False, static=False):
  393. assert [whole_tree, meta, inline].count(True) <= 1
  394. def create_decorator(_f, with_self):
  395. if with_self:
  396. def f(self, *args, **kwargs):
  397. return _f(self, *args, **kwargs)
  398. else:
  399. def f(self, *args, **kwargs):
  400. return _f(*args, **kwargs)
  401. return f
  402. if static:
  403. f = wraps(func)(create_decorator(func, False))
  404. else:
  405. f = smart_decorator(func, create_decorator)
  406. f.vargs_applied = True
  407. f.inline = inline
  408. f.meta = meta
  409. f.whole_tree = whole_tree
  410. return f
  411. def v_args(inline=False, meta=False, tree=False):
  412. "A convenience decorator factory, for modifying the behavior of user-supplied visitor methods"
  413. if [tree, meta, inline].count(True) > 1:
  414. raise ValueError("Visitor functions can either accept tree, or meta, or be inlined. These cannot be combined.")
  415. def _visitor_args_dec(obj):
  416. return _apply_decorator(obj, _visitor_args_func_dec, inline=inline, meta=meta, whole_tree=tree)
  417. return _visitor_args_dec
  418. class Indenter:
  419. def __init__(self):
  420. self.paren_level = None
  421. self.indent_level = None
  422. assert self.tab_len > 0
  423. def handle_NL(self, token):
  424. if self.paren_level > 0:
  425. return
  426. yield token
  427. indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
  428. indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
  429. if indent > self.indent_level[-1]:
  430. self.indent_level.append(indent)
  431. yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
  432. else:
  433. while indent < self.indent_level[-1]:
  434. self.indent_level.pop()
  435. yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
  436. assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1])
  437. def _process(self, stream):
  438. for token in stream:
  439. if token.type == self.NL_type:
  440. for t in self.handle_NL(token):
  441. yield t
  442. else:
  443. yield token
  444. if token.type in self.OPEN_PAREN_types:
  445. self.paren_level += 1
  446. elif token.type in self.CLOSE_PAREN_types:
  447. self.paren_level -= 1
  448. assert self.paren_level >= 0
  449. while len(self.indent_level) > 1:
  450. self.indent_level.pop()
  451. yield Token(self.DEDENT_type, '')
  452. assert self.indent_level == [0], self.indent_level
  453. def process(self, stream):
  454. self.paren_level = 0
  455. self.indent_level = [0]
  456. return self._process(stream)
  457. # XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
  458. @property
  459. def always_accept(self):
  460. return (self.NL_type,)
  461. class Symbol(Serialize):
  462. is_term = NotImplemented
  463. def __init__(self, name):
  464. self.name = name
  465. def __eq__(self, other):
  466. assert isinstance(other, Symbol), other
  467. return self.is_term == other.is_term and self.name == other.name
  468. def __ne__(self, other):
  469. return not (self == other)
  470. def __hash__(self):
  471. return hash(self.name)
  472. def __repr__(self):
  473. return '%s(%r)' % (type(self).__name__, self.name)
  474. fullrepr = property(__repr__)
  475. class Terminal(Symbol):
  476. __serialize_fields__ = 'name', 'filter_out'
  477. is_term = True
  478. def __init__(self, name, filter_out=False):
  479. self.name = name
  480. self.filter_out = filter_out
  481. @property
  482. def fullrepr(self):
  483. return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
  484. class NonTerminal(Symbol):
  485. __serialize_fields__ = 'name',
  486. is_term = False
  487. class RuleOptions(Serialize):
  488. __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices'
  489. def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()):
  490. self.keep_all_tokens = keep_all_tokens
  491. self.expand1 = expand1
  492. self.priority = priority
  493. self.empty_indices = empty_indices
  494. def __repr__(self):
  495. return 'RuleOptions(%r, %r, %r)' % (
  496. self.keep_all_tokens,
  497. self.expand1,
  498. self.priority,
  499. )
  500. class Rule(Serialize):
  501. """
  502. origin : a symbol
  503. expansion : a list of symbols
  504. order : index of this expansion amongst all rules of the same name
  505. """
  506. __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
  507. __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
  508. __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
  509. def __init__(self, origin, expansion, order=0, alias=None, options=None):
  510. self.origin = origin
  511. self.expansion = expansion
  512. self.alias = alias
  513. self.order = order
  514. self.options = options
  515. self._hash = hash((self.origin, tuple(self.expansion)))
  516. def _deserialize(self):
  517. self._hash = hash((self.origin, tuple(self.expansion)))
  518. def __str__(self):
  519. return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
  520. def __repr__(self):
  521. return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
  522. def __hash__(self):
  523. return self._hash
  524. def __eq__(self, other):
  525. if not isinstance(other, Rule):
  526. return False
  527. return self.origin == other.origin and self.expansion == other.expansion
  528. class Pattern(Serialize):
  529. __serialize_fields__ = 'value', 'flags'
  530. def __init__(self, value, flags=()):
  531. self.value = value
  532. self.flags = frozenset(flags)
  533. def __repr__(self):
  534. return repr(self.to_regexp())
  535. # Pattern Hashing assumes all subclasses have a different priority!
  536. def __hash__(self):
  537. return hash((type(self), self.value, self.flags))
  538. def __eq__(self, other):
  539. return type(self) == type(other) and self.value == other.value and self.flags == other.flags
  540. def to_regexp(self):
  541. raise NotImplementedError()
  542. if Py36:
  543. # Python 3.6 changed syntax for flags in regular expression
  544. def _get_flags(self, value):
  545. for f in self.flags:
  546. value = ('(?%s:%s)' % (f, value))
  547. return value
  548. else:
  549. def _get_flags(self, value):
  550. for f in self.flags:
  551. value = ('(?%s)' % f) + value
  552. return value
  553. class PatternStr(Pattern):
  554. def to_regexp(self):
  555. return self._get_flags(re.escape(self.value))
  556. @property
  557. def min_width(self):
  558. return len(self.value)
  559. max_width = min_width
  560. class PatternRE(Pattern):
  561. def to_regexp(self):
  562. return self._get_flags(self.value)
  563. @property
  564. def min_width(self):
  565. return get_regexp_width(self.to_regexp())[0]
  566. @property
  567. def max_width(self):
  568. return get_regexp_width(self.to_regexp())[1]
  569. class TerminalDef(Serialize):
  570. __serialize_fields__ = 'name', 'pattern', 'priority'
  571. __serialize_namespace__ = PatternStr, PatternRE
  572. def __init__(self, name, pattern, priority=1):
  573. assert isinstance(pattern, Pattern), pattern
  574. self.name = name
  575. self.pattern = pattern
  576. self.priority = priority
  577. def __repr__(self):
  578. return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
  579. class Token(Str):
  580. __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column')
  581. def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None):
  582. try:
  583. self = super(Token, cls).__new__(cls, value)
  584. except UnicodeDecodeError:
  585. value = value.decode('latin1')
  586. self = super(Token, cls).__new__(cls, value)
  587. self.type = type_
  588. self.pos_in_stream = pos_in_stream
  589. self.value = value
  590. self.line = line
  591. self.column = column
  592. self.end_line = end_line
  593. self.end_column = end_column
  594. return self
  595. @classmethod
  596. def new_borrow_pos(cls, type_, value, borrow_t):
  597. return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column)
  598. def __reduce__(self):
  599. return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
  600. def __repr__(self):
  601. return 'Token(%s, %r)' % (self.type, self.value)
  602. def __deepcopy__(self, memo):
  603. return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
  604. def __eq__(self, other):
  605. if isinstance(other, Token) and self.type != other.type:
  606. return False
  607. return Str.__eq__(self, other)
  608. __hash__ = Str.__hash__
  609. class LineCounter:
  610. def __init__(self):
  611. self.newline_char = '\n'
  612. self.char_pos = 0
  613. self.line = 1
  614. self.column = 1
  615. self.line_start_pos = 0
  616. def feed(self, token, test_newline=True):
  617. """Consume a token and calculate the new line & column.
  618. As an optional optimization, set test_newline=False is token doesn't contain a newline.
  619. """
  620. if test_newline:
  621. newlines = token.count(self.newline_char)
  622. if newlines:
  623. self.line += newlines
  624. self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
  625. self.char_pos += len(token)
  626. self.column = self.char_pos - self.line_start_pos + 1
  627. class _Lex:
  628. "Built to serve both Lexer and ContextualLexer"
  629. def __init__(self, lexer, state=None):
  630. self.lexer = lexer
  631. self.state = state
  632. def lex(self, stream, newline_types, ignore_types):
  633. newline_types = frozenset(newline_types)
  634. ignore_types = frozenset(ignore_types)
  635. line_ctr = LineCounter()
  636. while line_ctr.char_pos < len(stream):
  637. lexer = self.lexer
  638. for mre, type_from_index in lexer.mres:
  639. m = mre.match(stream, line_ctr.char_pos)
  640. if not m:
  641. continue
  642. t = None
  643. value = m.group(0)
  644. type_ = type_from_index[m.lastindex]
  645. if type_ not in ignore_types:
  646. t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  647. if t.type in lexer.callback:
  648. t = lexer.callback[t.type](t)
  649. if not isinstance(t, Token):
  650. raise ValueError("Callbacks must return a token (returned %r)" % t)
  651. yield t
  652. else:
  653. if type_ in lexer.callback:
  654. t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
  655. lexer.callback[type_](t)
  656. line_ctr.feed(value, type_ in newline_types)
  657. if t:
  658. t.end_line = line_ctr.line
  659. t.end_column = line_ctr.column
  660. break
  661. else:
  662. allowed = [v for m, tfi in lexer.mres for v in tfi.values()]
  663. raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state)
  664. class UnlessCallback:
  665. def __init__(self, mres):
  666. self.mres = mres
  667. def __call__(self, t):
  668. for mre, type_from_index in self.mres:
  669. m = mre.match(t.value)
  670. if m:
  671. t.type = type_from_index[m.lastindex]
  672. break
  673. return t
  674. class CallChain:
  675. def __init__(self, callback1, callback2, cond):
  676. self.callback1 = callback1
  677. self.callback2 = callback2
  678. self.cond = cond
  679. def __call__(self, t):
  680. t2 = self.callback1(t)
  681. return self.callback2(t) if self.cond(t2) else t2
  682. def _create_unless(terminals):
  683. tokens_by_type = classify(terminals, lambda t: type(t.pattern))
  684. assert len(tokens_by_type) <= 2, tokens_by_type.keys()
  685. embedded_strs = set()
  686. callback = {}
  687. for retok in tokens_by_type.get(PatternRE, []):
  688. unless = [] # {}
  689. for strtok in tokens_by_type.get(PatternStr, []):
  690. if strtok.priority > retok.priority:
  691. continue
  692. s = strtok.pattern.value
  693. m = re.match(retok.pattern.to_regexp(), s)
  694. if m and m.group(0) == s:
  695. unless.append(strtok)
  696. if strtok.pattern.flags <= retok.pattern.flags:
  697. embedded_strs.add(strtok)
  698. if unless:
  699. callback[retok.name] = UnlessCallback(build_mres(unless, match_whole=True))
  700. terminals = [t for t in terminals if t not in embedded_strs]
  701. return terminals, callback
  702. def _build_mres(terminals, max_size, match_whole):
  703. # Python sets an unreasonable group limit (currently 100) in its re module
  704. # Worse, the only way to know we reached it is by catching an AssertionError!
  705. # This function recursively tries less and less groups until it's successful.
  706. postfix = '$' if match_whole else ''
  707. mres = []
  708. while terminals:
  709. try:
  710. mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]))
  711. except AssertionError: # Yes, this is what Python provides us.. :/
  712. return _build_mres(terminals, max_size//2, match_whole)
  713. # terms_from_name = {t.name: t for t in terminals[:max_size]}
  714. mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
  715. terminals = terminals[max_size:]
  716. return mres
  717. def build_mres(terminals, match_whole=False):
  718. return _build_mres(terminals, len(terminals), match_whole)
  719. def _regexp_has_newline(r):
  720. """Expressions that may indicate newlines in a regexp:
  721. - newlines (\n)
  722. - escaped newline (\\n)
  723. - anything but ([^...])
  724. - any-char (.) when the flag (?s) exists
  725. """
  726. return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r)
  727. class Lexer(Serialize):
  728. """Lexer interface
  729. Method Signatures:
  730. lex(self, stream) -> Iterator[Token]
  731. set_parser_state(self, state) # Optional
  732. """
  733. set_parser_state = NotImplemented
  734. lex = NotImplemented
  735. class TraditionalLexer(Lexer):
  736. __serialize_fields__ = 'terminals', 'ignore_types', 'newline_types'
  737. __serialize_namespace__ = TerminalDef,
  738. def _deserialize(self):
  739. self.mres = build_mres(self.terminals)
  740. self.callback = {} # TODO implement
  741. def __init__(self, terminals, ignore=(), user_callbacks={}):
  742. assert all(isinstance(t, TerminalDef) for t in terminals), terminals
  743. terminals = list(terminals)
  744. # Sanitization
  745. for t in terminals:
  746. try:
  747. re.compile(t.pattern.to_regexp())
  748. except:
  749. raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
  750. if t.pattern.min_width == 0:
  751. raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
  752. assert set(ignore) <= {t.name for t in terminals}
  753. # Init
  754. self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())]
  755. self.ignore_types = list(ignore)
  756. terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
  757. terminals, self.callback = _create_unless(terminals)
  758. assert all(self.callback.values())
  759. for type_, f in user_callbacks.items():
  760. if type_ in self.callback:
  761. # Already a callback there, probably UnlessCallback
  762. self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
  763. else:
  764. self.callback[type_] = f
  765. self.terminals = terminals
  766. self.mres = build_mres(terminals)
  767. def lex(self, stream):
  768. return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
  769. class ContextualLexer(Lexer):
  770. __serialize_fields__ = 'root_lexer', 'lexers'
  771. __serialize_namespace__ = TraditionalLexer,
  772. def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
  773. tokens_by_name = {}
  774. for t in terminals:
  775. assert t.name not in tokens_by_name, t
  776. tokens_by_name[t.name] = t
  777. lexer_by_tokens = {}
  778. self.lexers = {}
  779. for state, accepts in states.items():
  780. key = frozenset(accepts)
  781. try:
  782. lexer = lexer_by_tokens[key]
  783. except KeyError:
  784. accepts = set(accepts) | set(ignore) | set(always_accept)
  785. state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name]
  786. lexer = TraditionalLexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
  787. lexer_by_tokens[key] = lexer
  788. self.lexers[state] = lexer
  789. self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks)
  790. self.set_parser_state(None) # Needs to be set on the outside
  791. def set_parser_state(self, state):
  792. self.parser_state = state
  793. def lex(self, stream):
  794. l = _Lex(self.lexers[self.parser_state], self.parser_state)
  795. for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
  796. yield x
  797. l.lexer = self.lexers[self.parser_state]
  798. l.state = self.parser_state
  799. from functools import partial, wraps
  800. from itertools import repeat, product
  801. class ExpandSingleChild:
  802. def __init__(self, node_builder):
  803. self.node_builder = node_builder
  804. def __call__(self, children):
  805. if len(children) == 1:
  806. return children[0]
  807. else:
  808. return self.node_builder(children)
  809. class PropagatePositions:
  810. def __init__(self, node_builder):
  811. self.node_builder = node_builder
  812. def __call__(self, children):
  813. res = self.node_builder(children)
  814. if isinstance(res, Tree):
  815. for c in children:
  816. if isinstance(c, Tree) and c.children and not c.meta.empty:
  817. res.meta.line = c.meta.line
  818. res.meta.column = c.meta.column
  819. res.meta.start_pos = c.meta.start_pos
  820. res.meta.empty = False
  821. break
  822. elif isinstance(c, Token):
  823. res.meta.line = c.line
  824. res.meta.column = c.column
  825. res.meta.start_pos = c.pos_in_stream
  826. res.meta.empty = False
  827. break
  828. for c in reversed(children):
  829. if isinstance(c, Tree) and c.children and not c.meta.empty:
  830. res.meta.end_line = c.meta.end_line
  831. res.meta.end_column = c.meta.end_column
  832. res.meta.end_pos = c.meta.end_pos
  833. res.meta.empty = False
  834. break
  835. elif isinstance(c, Token):
  836. res.meta.end_line = c.end_line
  837. res.meta.end_column = c.end_column
  838. res.meta.end_pos = c.pos_in_stream + len(c.value)
  839. res.meta.empty = False
  840. break
  841. return res
  842. class ChildFilter:
  843. def __init__(self, to_include, append_none, node_builder):
  844. self.node_builder = node_builder
  845. self.to_include = to_include
  846. self.append_none = append_none
  847. def __call__(self, children):
  848. filtered = []
  849. for i, to_expand, add_none in self.to_include:
  850. if add_none:
  851. filtered += [None] * add_none
  852. if to_expand:
  853. filtered += children[i].children
  854. else:
  855. filtered.append(children[i])
  856. if self.append_none:
  857. filtered += [None] * self.append_none
  858. return self.node_builder(filtered)
  859. class ChildFilterLALR(ChildFilter):
  860. "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
  861. def __call__(self, children):
  862. filtered = []
  863. for i, to_expand, add_none in self.to_include:
  864. if add_none:
  865. filtered += [None] * add_none
  866. if to_expand:
  867. if filtered:
  868. filtered += children[i].children
  869. else: # Optimize for left-recursion
  870. filtered = children[i].children
  871. else:
  872. filtered.append(children[i])
  873. if self.append_none:
  874. filtered += [None] * self.append_none
  875. return self.node_builder(filtered)
  876. class ChildFilterLALR_NoPlaceholders(ChildFilter):
  877. "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
  878. def __init__(self, to_include, node_builder):
  879. self.node_builder = node_builder
  880. self.to_include = to_include
  881. def __call__(self, children):
  882. filtered = []
  883. for i, to_expand in self.to_include:
  884. if to_expand:
  885. if filtered:
  886. filtered += children[i].children
  887. else: # Optimize for left-recursion
  888. filtered = children[i].children
  889. else:
  890. filtered.append(children[i])
  891. return self.node_builder(filtered)
  892. def _should_expand(sym):
  893. return not sym.is_term and sym.name.startswith('_')
  894. def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
  895. # Prepare empty_indices as: How many Nones to insert at each index?
  896. if _empty_indices:
  897. assert _empty_indices.count(False) == len(expansion)
  898. s = ''.join(str(int(b)) for b in _empty_indices)
  899. empty_indices = [len(ones) for ones in s.split('0')]
  900. assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
  901. else:
  902. empty_indices = [0] * (len(expansion)+1)
  903. to_include = []
  904. nones_to_add = 0
  905. for i, sym in enumerate(expansion):
  906. nones_to_add += empty_indices[i]
  907. if keep_all_tokens or not (sym.is_term and sym.filter_out):
  908. to_include.append((i, _should_expand(sym), nones_to_add))
  909. nones_to_add = 0
  910. nones_to_add += empty_indices[len(expansion)]
  911. if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
  912. if _empty_indices or ambiguous:
  913. return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
  914. else:
  915. # LALR without placeholders
  916. return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
  917. class AmbiguousExpander:
  918. """Deal with the case where we're expanding children ('_rule') into a parent but the children
  919. are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
  920. ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children
  921. into the right parents in the right places, essentially shifting the ambiguiuty up the tree."""
  922. def __init__(self, to_expand, tree_class, node_builder):
  923. self.node_builder = node_builder
  924. self.tree_class = tree_class
  925. self.to_expand = to_expand
  926. def __call__(self, children):
  927. def _is_ambig_tree(child):
  928. return hasattr(child, 'data') and child.data == '_ambig'
  929. #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
  930. # All children of an _ambig node should be a derivation of that ambig node, hence
  931. # it is safe to assume that if we see an _ambig node nested within an ambig node
  932. # it is safe to simply expand it into the parent _ambig node as an alternative derivation.
  933. ambiguous = []
  934. for i, child in enumerate(children):
  935. if _is_ambig_tree(child):
  936. if i in self.to_expand:
  937. ambiguous.append(i)
  938. to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)]
  939. child.expand_kids_by_index(*to_expand)
  940. if not ambiguous:
  941. return self.node_builder(children)
  942. expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ]
  943. return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
  944. def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
  945. to_expand = [i for i, sym in enumerate(expansion)
  946. if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
  947. if to_expand:
  948. return partial(AmbiguousExpander, to_expand, tree_class)
  949. def ptb_inline_args(func):
  950. @wraps(func)
  951. def f(children):
  952. return func(*children)
  953. return f
  954. class ParseTreeBuilder:
  955. def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
  956. self.tree_class = tree_class
  957. self.propagate_positions = propagate_positions
  958. self.always_keep_all_tokens = keep_all_tokens
  959. self.ambiguous = ambiguous
  960. self.maybe_placeholders = maybe_placeholders
  961. self.rule_builders = list(self._init_builders(rules))
  962. def _init_builders(self, rules):
  963. for rule in rules:
  964. options = rule.options
  965. keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
  966. expand_single_child = options.expand1 if options else False
  967. wrapper_chain = list(filter(None, [
  968. (expand_single_child and not rule.alias) and ExpandSingleChild,
  969. maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None),
  970. self.propagate_positions and PropagatePositions,
  971. self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
  972. ]))
  973. yield rule, wrapper_chain
  974. def create_callback(self, transformer=None):
  975. callbacks = {}
  976. for rule, wrapper_chain in self.rule_builders:
  977. user_callback_name = rule.alias or rule.origin.name
  978. try:
  979. f = getattr(transformer, user_callback_name)
  980. assert not getattr(f, 'meta', False), "Meta args not supported for internal transformer"
  981. # XXX InlineTransformer is deprecated!
  982. if getattr(f, 'inline', False) or isinstance(transformer, InlineTransformer):
  983. f = ptb_inline_args(f)
  984. except AttributeError:
  985. f = partial(self.tree_class, user_callback_name)
  986. for w in wrapper_chain:
  987. f = w(f)
  988. if rule in callbacks:
  989. raise GrammarError("Rule '%s' already exists" % (rule,))
  990. callbacks[rule] = f
  991. return callbacks
  992. class LALR_Parser(object):
  993. def __init__(self, parser_conf, debug=False):
  994. assert all(r.options is None or r.options.priority is None
  995. for r in parser_conf.rules), "LALR doesn't yet support prioritization"
  996. analysis = LALR_Analyzer(parser_conf, debug=debug)
  997. analysis.compute_lookahead()
  998. callbacks = parser_conf.callbacks
  999. self._parse_table = analysis.parse_table
  1000. self.parser_conf = parser_conf
  1001. self.parser = _Parser(analysis.parse_table, callbacks)
  1002. @classmethod
  1003. def deserialize(cls, data, memo, callbacks):
  1004. inst = cls.__new__(cls)
  1005. inst.parser = _Parser(IntParseTable.deserialize(data, memo), callbacks)
  1006. return inst
  1007. def serialize(self, memo):
  1008. return self._parse_table.serialize(memo)
  1009. def parse(self, *args):
  1010. return self.parser.parse(*args)
  1011. class _Parser:
  1012. def __init__(self, parse_table, callbacks):
  1013. self.states = parse_table.states
  1014. self.start_state = parse_table.start_state
  1015. self.end_state = parse_table.end_state
  1016. self.callbacks = callbacks
  1017. def parse(self, seq, set_state=None):
  1018. token = None
  1019. stream = iter(seq)
  1020. states = self.states
  1021. state_stack = [self.start_state]
  1022. value_stack = []
  1023. if set_state: set_state(self.start_state)
  1024. def get_action(token):
  1025. state = state_stack[-1]
  1026. try:
  1027. return states[state][token.type]
  1028. except KeyError:
  1029. expected = [s for s in states[state].keys() if s.isupper()]
  1030. raise UnexpectedToken(token, expected, state=state)
  1031. def reduce(rule):
  1032. size = len(rule.expansion)
  1033. if size:
  1034. s = value_stack[-size:]
  1035. del state_stack[-size:]
  1036. del value_stack[-size:]
  1037. else:
  1038. s = []
  1039. value = self.callbacks[rule](s)
  1040. _action, new_state = states[state_stack[-1]][rule.origin.name]
  1041. assert _action is Shift
  1042. state_stack.append(new_state)
  1043. value_stack.append(value)
  1044. # Main LALR-parser loop
  1045. for token in stream:
  1046. while True:
  1047. action, arg = get_action(token)
  1048. assert arg != self.end_state
  1049. if action is Shift:
  1050. state_stack.append(arg)
  1051. value_stack.append(token)
  1052. if set_state: set_state(arg)
  1053. break # next token
  1054. else:
  1055. reduce(arg)
  1056. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  1057. while True:
  1058. _action, arg = get_action(token)
  1059. if _action is Shift:
  1060. assert arg == self.end_state
  1061. val ,= value_stack
  1062. return val
  1063. else:
  1064. reduce(arg)
  1065. class Action:
  1066. def __init__(self, name):
  1067. self.name = name
  1068. def __str__(self):
  1069. return self.name
  1070. def __repr__(self):
  1071. return str(self)
  1072. Shift = Action('Shift')
  1073. Reduce = Action('Reduce')
  1074. class ParseTable:
  1075. def __init__(self, states, start_state, end_state):
  1076. self.states = states
  1077. self.start_state = start_state
  1078. self.end_state = end_state
  1079. def serialize(self, memo):
  1080. tokens = Enumerator()
  1081. rules = Enumerator()
  1082. states = {
  1083. state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
  1084. for token, (action, arg) in actions.items()}
  1085. for state, actions in self.states.items()
  1086. }
  1087. return {
  1088. 'tokens': tokens.reversed(),
  1089. 'states': states,
  1090. 'start_state': self.start_state,
  1091. 'end_state': self.end_state,
  1092. }
  1093. @classmethod
  1094. def deserialize(cls, data, memo):
  1095. tokens = data['tokens']
  1096. states = {
  1097. state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
  1098. for token, (action, arg) in actions.items()}
  1099. for state, actions in data['states'].items()
  1100. }
  1101. return cls(states, data['start_state'], data['end_state'])
  1102. class IntParseTable(ParseTable):
  1103. @classmethod
  1104. def from_ParseTable(cls, parse_table):
  1105. enum = list(parse_table.states)
  1106. state_to_idx = {s:i for i,s in enumerate(enum)}
  1107. int_states = {}
  1108. for s, la in parse_table.states.items():
  1109. la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
  1110. for k,v in la.items()}
  1111. int_states[ state_to_idx[s] ] = la
  1112. start_state = state_to_idx[parse_table.start_state]
  1113. end_state = state_to_idx[parse_table.end_state]
  1114. return cls(int_states, start_state, end_state)
  1115. def get_frontend(parser, lexer):
  1116. if parser=='lalr':
  1117. if lexer is None:
  1118. raise ValueError('The LALR parser requires use of a lexer')
  1119. elif lexer == 'standard':
  1120. return LALR_TraditionalLexer
  1121. elif lexer == 'contextual':
  1122. return LALR_ContextualLexer
  1123. elif issubclass(lexer, Lexer):
  1124. return partial(LALR_CustomLexer, lexer)
  1125. else:
  1126. raise ValueError('Unknown lexer: %s' % lexer)
  1127. elif parser=='earley':
  1128. if lexer=='standard':
  1129. return Earley
  1130. elif lexer=='dynamic':
  1131. return XEarley
  1132. elif lexer=='dynamic_complete':
  1133. return XEarley_CompleteLex
  1134. elif lexer=='contextual':
  1135. raise ValueError('The Earley parser does not support the contextual parser')
  1136. else:
  1137. raise ValueError('Unknown lexer: %s' % lexer)
  1138. elif parser == 'cyk':
  1139. if lexer == 'standard':
  1140. return CYK
  1141. else:
  1142. raise ValueError('CYK parser requires using standard parser.')
  1143. else:
  1144. raise ValueError('Unknown parser: %s' % parser)
  1145. class WithLexer(Serialize):
  1146. lexer = None
  1147. parser = None
  1148. lexer_conf = None
  1149. __serialize_fields__ = 'parser', 'lexer'
  1150. __serialize_namespace__ = Rule, ContextualLexer, TraditionalLexer
  1151. @classmethod
  1152. def deserialize(cls, data, memo, callbacks, postlex):
  1153. inst = super(WithLexer, cls).deserialize(data, memo)
  1154. inst.postlex = postlex
  1155. inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
  1156. return inst
  1157. def _serialize(self, data, memo):
  1158. data['parser'] = data['parser'].serialize(memo)
  1159. def init_traditional_lexer(self, lexer_conf):
  1160. self.lexer_conf = lexer_conf
  1161. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
  1162. self.postlex = lexer_conf.postlex
  1163. def init_contextual_lexer(self, lexer_conf):
  1164. self.lexer_conf = lexer_conf
  1165. self.postlex = lexer_conf.postlex
  1166. states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
  1167. always_accept = self.postlex.always_accept if self.postlex else ()
  1168. self.lexer = ContextualLexer(lexer_conf.tokens, states,
  1169. ignore=lexer_conf.ignore,
  1170. always_accept=always_accept,
  1171. user_callbacks=lexer_conf.callbacks)
  1172. def lex(self, text):
  1173. stream = self.lexer.lex(text)
  1174. return self.postlex.process(stream) if self.postlex else stream
  1175. def parse(self, text):
  1176. token_stream = self.lex(text)
  1177. sps = self.lexer.set_parser_state
  1178. return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else [])
  1179. class LALR_TraditionalLexer(WithLexer):
  1180. def __init__(self, lexer_conf, parser_conf, options=None):
  1181. debug = options.debug if options else False
  1182. self.parser = LALR_Parser(parser_conf, debug=debug)
  1183. self.init_traditional_lexer(lexer_conf)
  1184. class LALR_ContextualLexer(WithLexer):
  1185. def __init__(self, lexer_conf, parser_conf, options=None):
  1186. debug = options.debug if options else False
  1187. self.parser = LALR_Parser(parser_conf, debug=debug)
  1188. self.init_contextual_lexer(lexer_conf)
  1189. class LarkOptions(Serialize):
  1190. """Specifies the options for Lark
  1191. """
  1192. OPTIONS_DOC = """
  1193. parser - Decides which parser engine to use, "earley" or "lalr". (Default: "earley")
  1194. Note: "lalr" requires a lexer
  1195. lexer - Decides whether or not to use a lexer stage
  1196. "standard": Use a standard lexer
  1197. "contextual": Stronger lexer (only works with parser="lalr")
  1198. "dynamic": Flexible and powerful (only with parser="earley")
  1199. "dynamic_complete": Same as dynamic, but tries *every* variation
  1200. of tokenizing possible. (only with parser="earley")
  1201. "auto" (default): Choose for me based on grammar and parser
  1202. ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
  1203. "resolve": The parser will automatically choose the simplest derivation
  1204. (it chooses consistently: greedy for tokens, non-greedy for rules)
  1205. "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
  1206. transformer - Applies the transformer to every parse tree
  1207. debug - Affects verbosity (default: False)
  1208. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  1209. cache_grammar - Cache the Lark grammar (Default: False)
  1210. postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
  1211. start - The start symbol (Default: start)
  1212. profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
  1213. priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
  1214. propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
  1215. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
  1216. maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None
  1217. """
  1218. if __doc__:
  1219. __doc__ += OPTIONS_DOC
  1220. _defaults = {
  1221. 'debug': False,
  1222. 'keep_all_tokens': False,
  1223. 'tree_class': None,
  1224. 'cache_grammar': False,
  1225. 'postlex': None,
  1226. 'parser': 'earley',
  1227. 'lexer': 'auto',
  1228. 'transformer': None,
  1229. 'start': 'start',
  1230. 'profile': False,
  1231. 'priority': 'auto',
  1232. 'ambiguity': 'auto',
  1233. 'propagate_positions': False,
  1234. 'lexer_callbacks': {},
  1235. 'maybe_placeholders': False,
  1236. }
  1237. def __init__(self, options_dict):
  1238. o = dict(options_dict)
  1239. options = {}
  1240. for name, default in self._defaults.items():
  1241. if name in o:
  1242. value = o.pop(name)
  1243. if isinstance(default, bool):
  1244. value = bool(value)
  1245. else:
  1246. value = default
  1247. options[name] = value
  1248. self.__dict__['options'] = options
  1249. assert self.parser in ('earley', 'lalr', 'cyk', None)
  1250. if self.parser == 'earley' and self.transformer:
  1251. raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
  1252. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
  1253. if o:
  1254. raise ValueError("Unknown options: %s" % o.keys())
  1255. def __getattr__(self, name):
  1256. return self.options[name]
  1257. def __setattr__(self, name, value):
  1258. assert name in self.options
  1259. self.options[name] = value
  1260. def serialize(self, memo):
  1261. return self.options
  1262. @classmethod
  1263. def deserialize(cls, data, memo):
  1264. return cls(data)
  1265. class Profiler:
  1266. def __init__(self):
  1267. self.total_time = defaultdict(float)
  1268. self.cur_section = '__init__'
  1269. self.last_enter_time = time.time()
  1270. def enter_section(self, name):
  1271. cur_time = time.time()
  1272. self.total_time[self.cur_section] += cur_time - self.last_enter_time
  1273. self.last_enter_time = cur_time
  1274. self.cur_section = name
  1275. def make_wrapper(self, name, f):
  1276. def wrapper(*args, **kwargs):
  1277. last_section = self.cur_section
  1278. self.enter_section(name)
  1279. try:
  1280. return f(*args, **kwargs)
  1281. finally:
  1282. self.enter_section(last_section)
  1283. return wrapper
  1284. class Lark(Serialize):
  1285. def __init__(self, grammar, **options):
  1286. """
  1287. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  1288. options : a dictionary controlling various aspects of Lark.
  1289. """
  1290. self.options = LarkOptions(options)
  1291. # Some, but not all file-like objects have a 'name' attribute
  1292. try:
  1293. self.source = grammar.name
  1294. except AttributeError:
  1295. self.source = '<string>'
  1296. # Drain file-like objects to get their contents
  1297. try:
  1298. read = grammar.read
  1299. except AttributeError:
  1300. pass
  1301. else:
  1302. grammar = read()
  1303. assert isinstance(grammar, STRING_TYPE)
  1304. if self.options.cache_grammar:
  1305. raise NotImplementedError("Not available yet")
  1306. assert not self.options.profile, "Feature temporarily disabled"
  1307. # self.profiler = Profiler() if self.options.profile else None
  1308. if self.options.lexer == 'auto':
  1309. if self.options.parser == 'lalr':
  1310. self.options.lexer = 'contextual'
  1311. elif self.options.parser == 'earley':
  1312. self.options.lexer = 'dynamic'
  1313. elif self.options.parser == 'cyk':
  1314. self.options.lexer = 'standard'
  1315. else:
  1316. assert False, self.options.parser
  1317. lexer = self.options.lexer
  1318. assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
  1319. if self.options.ambiguity == 'auto':
  1320. if self.options.parser == 'earley':
  1321. self.options.ambiguity = 'resolve'
  1322. else:
  1323. disambig_parsers = ['earley', 'cyk']
  1324. assert self.options.parser in disambig_parsers, (
  1325. 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
  1326. if self.options.priority == 'auto':
  1327. if self.options.parser in ('earley', 'cyk', ):
  1328. self.options.priority = 'normal'
  1329. elif self.options.parser in ('lalr', ):
  1330. self.options.priority = None
  1331. elif self.options.priority in ('invert', 'normal'):
  1332. assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"
  1333. assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
  1334. assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
  1335. assert self.options.ambiguity in ('resolve', 'explicit', 'auto', )
  1336. # Parse the grammar file and compose the grammars (TODO)
  1337. self.grammar = load_grammar(grammar, self.source)
  1338. # Compile the EBNF grammar into BNF
  1339. self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()
  1340. # If the user asked to invert the priorities, negate them all here.
  1341. # This replaces the old 'resolve__antiscore_sum' option.
  1342. if self.options.priority == 'invert':
  1343. for rule in self.rules:
  1344. if rule.options and rule.options.priority is not None:
  1345. rule.options.priority = -rule.options.priority
  1346. # Else, if the user asked to disable priorities, strip them from the
  1347. # rules. This allows the Earley parsers to skip an extra forest walk
  1348. # for improved performance, if you don't need them (or didn't specify any).
  1349. elif self.options.priority == None:
  1350. for rule in self.rules:
  1351. if rule.options and rule.options.priority is not None:
  1352. rule.options.priority = None
  1353. self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
  1354. if self.options.parser:
  1355. self.parser = self._build_parser()
  1356. elif lexer:
  1357. self.lexer = self._build_lexer()
  1358. if __init__.__doc__:
  1359. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  1360. __serialize_fields__ = 'parser', 'rules', 'options'
  1361. def _build_lexer(self):
  1362. return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
  1363. def _prepare_callbacks(self):
  1364. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  1365. self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
  1366. self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
  1367. def _build_parser(self):
  1368. self._prepare_callbacks()
  1369. parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
  1370. return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
  1371. @classmethod
  1372. def deserialize(cls, data, namespace, memo, transformer=None, postlex=None):
  1373. if memo:
  1374. memo = SerializeMemoizer.deserialize(memo, namespace, {})
  1375. inst = cls.__new__(cls)
  1376. options = dict(data['options'])
  1377. options['transformer'] = transformer
  1378. options['postlex'] = postlex
  1379. inst.options = LarkOptions.deserialize(options, memo)
  1380. inst.rules = [Rule.deserialize(r, memo) for r in data['rules']]
  1381. inst._prepare_callbacks()
  1382. inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex)
  1383. return inst
  1384. @classmethod
  1385. def open(cls, grammar_filename, rel_to=None, **options):
  1386. """Create an instance of Lark with the grammar given by its filename
  1387. If rel_to is provided, the function will find the grammar filename in relation to it.
  1388. Example:
  1389. >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
  1390. Lark(...)
  1391. """
  1392. if rel_to:
  1393. basepath = os.path.dirname(rel_to)
  1394. grammar_filename = os.path.join(basepath, grammar_filename)
  1395. with open(grammar_filename, encoding='utf8') as f:
  1396. return cls(f, **options)
  1397. def __repr__(self):
  1398. return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
  1399. def lex(self, text):
  1400. "Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'"
  1401. if not hasattr(self, 'lexer'):
  1402. self.lexer = self._build_lexer()
  1403. stream = self.lexer.lex(text)
  1404. if self.options.postlex:
  1405. return self.options.postlex.process(stream)
  1406. return stream
  1407. def parse(self, text):
  1408. "Parse the given text, according to the options provided. Returns a tree, unless specified otherwise."
  1409. return self.parser.parse(text)
  1410. DATA = (
  1411. {'rules': [{'@': 27}, {'@': 31}, {'@': 26}, {'@': 13}, {'@': 24}, {'@': 18}, {'@': 16}, {'@': 23}, {'@': 21}, {'@': 17}, {'@': 28}, {'@': 30}, {'@': 25}, {'@': 29}, {'@': 20}, {'@': 22}, {'@': 15}, {'@': 19}, {'@': 12}, {'@': 14}], 'parser': {'parser': {'tokens': {0: 'COMMA', 1: 'RBRACE', 2: u'pair', 3: u'ESCAPED_STRING', 4: u'string', 5: 'COLON', 6: 'RSQB', 7: '$END', 8: 'LBRACE', 9: u'FALSE', 10: u'object', 11: u'SIGNED_NUMBER', 12: u'value', 13: 'LSQB', 14: u'NULL', 15: u'TRUE', 16: u'array', 17: '__anon_star_1', 18: '__anon_star_0', 19: 'start'}, 'states': {0: {0: (0, 1), 1: (0, 32)}, 1: {2: (0, 5), 3: (0, 21), 4: (0, 3)}, 2: {0: (1, {'@': 12}), 1: (1, {'@': 12})}, 3: {5: (0, 13)}, 4: {0: (1, {'@': 13}), 1: (1, {'@': 13}), 6: (1, {'@': 13}), 7: (1, {'@': 13})}, 5: {0: (1, {'@': 14}), 1: (1, {'@': 14})}, 6: {0: (1, {'@': 15}), 6: (1, {'@': 15})}, 7: {0: (1, {'@': 16}), 1: (1, {'@': 16}), 6: (1, {'@': 16}), 7: (1, {'@': 16})}, 8: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 12), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 9: {0: (1, {'@': 17}), 1: (1, {'@': 17}), 6: (1, {'@': 17}), 7: (1, {'@': 17})}, 10: {0: (0, 22), 17: (0, 0), 1: (0, 26)}, 11: {0: (1, {'@': 18}), 1: (1, {'@': 18}), 6: (1, {'@': 18}), 7: (1, {'@': 18})}, 12: {0: (1, {'@': 19}), 6: (1, {'@': 19})}, 13: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 15), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 14: {3: (0, 21), 4: (0, 4), 6: (0, 30), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 23), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 15: {0: (1, {'@': 20}), 1: (1, {'@': 20})}, 16: {0: (1, {'@': 21}), 1: (1, {'@': 21}), 6: (1, {'@': 21}), 7: (1, {'@': 21})}, 17: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 6), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 18: {}, 19: {7: (0, 18)}, 20: {0: (0, 8), 6: (0, 16)}, 21: {0: (1, {'@': 22}), 1: (1, {'@': 22}), 5: (1, {'@': 22}), 6: (1, {'@': 22}), 7: (1, {'@': 22})}, 22: {2: (0, 2), 3: (0, 21), 4: (0, 3)}, 23: {0: (0, 17), 18: (0, 20), 6: (0, 9)}, 24: {0: (1, {'@': 23}), 1: (1, {'@': 23}), 6: (1, {'@': 23}), 7: (1, {'@': 23})}, 25: {0: (1, {'@': 24}), 1: (1, {'@': 24}), 6: (1, {'@': 24}), 7: (1, {'@': 24})}, 26: {0: (1, {'@': 25}), 1: (1, {'@': 25}), 6: (1, {'@': 25}), 7: (1, {'@': 25})}, 27: {0: (1, {'@': 26}), 1: (1, {'@': 26}), 6: (1, {'@': 26}), 7: (1, {'@': 26})}, 28: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 29), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27), 19: (0, 19)}, 29: {7: (1, {'@': 27})}, 30: {0: (1, {'@': 28}), 1: (1, {'@': 28}), 6: (1, {'@': 28}), 7: (1, {'@': 28})}, 31: {0: (1, {'@': 29}), 1: (1, {'@': 29}), 6: (1, {'@': 29}), 7: (1, {'@': 29})}, 32: {0: (1, {'@': 30}), 1: (1, {'@': 30}), 6: (1, {'@': 30}), 7: (1, {'@': 30})}, 33: {0: (1, {'@': 31}), 1: (1, {'@': 31}), 6: (1, {'@': 31}), 7: (1, {'@': 31})}, 34: {1: (0, 31), 2: (0, 10), 3: (0, 21), 4: (0, 3)}}, 'end_state': 18, 'start_state': 28}, '__type__': 'LALR_TraditionalLexer', 'lexer': {'ignore_types': [u'WS'], 'terminals': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], '__type__': 'TraditionalLexer', 'newline_types': [u'WS']}}, '__type__': 'Lark', 'options': {'profile': False, 'transformer': None, 'lexer': 'standard', 'lexer_callbacks': {}, 'postlex': None, 'parser': 'lalr', 'cache_grammar': False, 'tree_class': None, 'priority': None, 'start': 'start', 'keep_all_tokens': False, 'ambiguity': 'auto', 'debug': False, 'propagate_positions': False, 'maybe_placeholders': False}}
  1412. )
  1413. MEMO = (
  1414. {0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': None, 'alias': None, 'order': 1}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 14: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': None, 'alias': None, 'order': 2}, 15: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': None, 'alias': None, 'order': 1}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'false', 'order': 0}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'true', 'order': 0}, 19: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': None, 'alias': None, 'order': 2}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 3}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'null', 'order': 0}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'number', 'order': 0}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 3}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}}
  1415. )
  1416. Shift = 0
  1417. Reduce = 1
  1418. def Lark_StandAlone(transformer=None, postlex=None):
  1419. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1420. return Lark.deserialize(DATA, namespace, MEMO, transformer=transformer, postlex=postlex)