This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

219 lines
7.2 KiB

  1. from __future__ import absolute_import
  2. import os
  3. from .utils import STRING_TYPE, inline_args
  4. from .load_grammar import load_grammar
  5. from .tree import Tree, Transformer
  6. from .lexer import Lexer
  7. from .grammar_analysis import GrammarAnalyzer, is_terminal
  8. from . import parser, earley
  9. class LarkOptions(object):
  10. """Specifies the options for Lark
  11. """
  12. OPTIONS_DOC = """
  13. parser - Which parser engine to use ("earley" or "lalr". Default: "earley")
  14. Note: Both will use Lark's lexer.
  15. transformer - Applies the transformer to every parse tree
  16. debug - Affects verbosity (default: False)
  17. only_lex - Don't build a parser. Useful for debugging (default: False)
  18. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: True)
  19. cache_grammar - Cache the Lark grammar (Default: False)
  20. postlex - Lexer post-processing (Default: None)
  21. """
  22. __doc__ += OPTIONS_DOC
  23. def __init__(self, options_dict):
  24. o = dict(options_dict)
  25. self.debug = bool(o.pop('debug', False))
  26. self.only_lex = bool(o.pop('only_lex', False))
  27. self.keep_all_tokens = bool(o.pop('keep_all_tokens', False))
  28. self.tree_class = o.pop('tree_class', Tree)
  29. self.cache_grammar = o.pop('cache_grammar', False)
  30. self.postlex = o.pop('postlex', None)
  31. self.parser = o.pop('parser', 'earley')
  32. self.transformer = o.pop('transformer', None)
  33. assert self.parser in ENGINE_DICT
  34. if self.parser == 'earley' and self.transformer:
  35. raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm. Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
  36. if self.keep_all_tokens:
  37. raise NotImplementedError("Not implemented yet!")
  38. if o:
  39. raise ValueError("Unknown options: %s" % o.keys())
  40. class Callback(object):
  41. pass
  42. class RuleTreeToText(Transformer):
  43. def expansions(self, x):
  44. return x
  45. def expansion(self, symbols):
  46. return [sym.value for sym in symbols], None
  47. def alias(self, ((expansion, _alias), alias)):
  48. assert _alias is None, (alias, expansion, '-', _alias)
  49. return expansion, alias.value
  50. def create_rule_handler(expansion, usermethod):
  51. to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion)
  52. if not (is_terminal(sym) and sym.startswith('_'))]
  53. def _build_ast(match):
  54. children = []
  55. for i, to_expand in to_include:
  56. if to_expand:
  57. children += match[i].children
  58. else:
  59. children.append(match[i])
  60. return usermethod(children)
  61. return _build_ast
  62. def create_expand1_tree_builder_function(tree_builder):
  63. def f(children):
  64. if len(children) == 1:
  65. return children[0]
  66. else:
  67. return tree_builder(children)
  68. return f
  69. class LALR:
  70. def build_parser(self, rules, callback):
  71. ga = GrammarAnalyzer(rules)
  72. ga.analyze()
  73. return parser.Parser(ga, callback)
  74. class Earley:
  75. @staticmethod
  76. def _process_expansion(x):
  77. return [{'literal': s} if is_terminal(s) else s for s in x]
  78. def build_parser(self, rules, callback):
  79. rules = [{'name':n, 'symbols': self._process_expansion(x), 'postprocess':getattr(callback, a)} for n,x,a in rules]
  80. return EarleyParser(earley.Parser(rules, 'start'))
  81. class EarleyParser:
  82. def __init__(self, parser):
  83. self.parser = parser
  84. def parse(self, text):
  85. res = self.parser.parse(text)
  86. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  87. return res[0]
  88. ENGINE_DICT = { 'lalr': LALR, 'earley': Earley }
  89. class Lark:
  90. def __init__(self, grammar, **options):
  91. """
  92. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  93. options : a dictionary controlling various aspects of Lark.
  94. """
  95. self.options = LarkOptions(options)
  96. # Some, but not all file-like objects have a 'name' attribute
  97. try:
  98. source = grammar.name
  99. except AttributeError:
  100. source = '<string>'
  101. cache_file = "larkcache_%s" % str(hash(grammar)%(2**32))
  102. else:
  103. cache_file = "larkcache_%s" % os.path.basename(source)
  104. # Drain file-like objects to get their contents
  105. try:
  106. read = grammar.read
  107. except AttributeError:
  108. pass
  109. else:
  110. grammar = read()
  111. assert isinstance(grammar, STRING_TYPE)
  112. if self.options.cache_grammar:
  113. raise NotImplementedError("Not available yet")
  114. self.tokens, self.rules = load_grammar(grammar)
  115. self.lexer = self._build_lexer()
  116. if not self.options.only_lex:
  117. self.parser_engine = ENGINE_DICT[self.options.parser]()
  118. self.parser = self._build_parser()
  119. def _build_lexer(self):
  120. ignore_tokens = []
  121. tokens = []
  122. for name, value, flags in self.tokens:
  123. if 'ignore' in flags:
  124. ignore_tokens.append(name)
  125. tokens.append((name, value))
  126. return Lexer(tokens, {}, ignore=ignore_tokens)
  127. def _build_parser(self):
  128. transformer = self.options.transformer
  129. callback = Callback()
  130. rules = []
  131. rule_tree_to_text = RuleTreeToText()
  132. for origin, tree in self.rules.items():
  133. for expansion, alias in rule_tree_to_text.transform(tree):
  134. if alias and origin.startswith('_'):
  135. raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases" % origin)
  136. expand1 = origin.startswith('?')
  137. _origin = origin.lstrip('?*')
  138. if alias:
  139. alias = alias.lstrip('*')
  140. _alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion))
  141. try:
  142. f = transformer._get_func(alias or _origin)
  143. # f = getattr(transformer, alias or _origin)
  144. except AttributeError:
  145. if alias:
  146. f = self._create_tree_builder_function(alias)
  147. else:
  148. f = self._create_tree_builder_function(_origin)
  149. if expand1:
  150. f = create_expand1_tree_builder_function(f)
  151. alias_handler = create_rule_handler(expansion, f)
  152. assert not hasattr(callback, _alias)
  153. setattr(callback, _alias, alias_handler)
  154. rules.append((_origin, expansion, _alias))
  155. return self.parser_engine.build_parser(rules, callback)
  156. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  157. def _create_tree_builder_function(self, name):
  158. tree_class = self.options.tree_class
  159. def f(children):
  160. return tree_class(name, children)
  161. return f
  162. def lex(self, text):
  163. stream = self.lexer.lex(text)
  164. if self.options.postlex:
  165. return self.options.postlex.process(stream)
  166. else:
  167. return stream
  168. def parse(self, text):
  169. assert not self.options.only_lex
  170. l = list(self.lex(text))
  171. return self.parser.parse(l)