This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

182 líneas
6.2 KiB

  1. from __future__ import absolute_import
  2. import os
  3. from .utils import STRING_TYPE, inline_args
  4. from .load_grammar import load_grammar
  5. from .tree import Tree, Transformer
  6. from .common import GrammarError
  7. from .lexer import Lexer
  8. from .parse_tree_builder import ParseTreeBuilder
  9. from .parser_frontends import ENGINE_DICT
  10. class LarkOptions(object):
  11. """Specifies the options for Lark
  12. """
  13. OPTIONS_DOC = """
  14. parser - Which parser engine to use ("earley" or "lalr". Default: "earley")
  15. Note: Both will use Lark's lexer.
  16. transformer - Applies the transformer to every parse tree
  17. debug - Affects verbosity (default: False)
  18. only_lex - Don't build a parser. Useful for debugging (default: False)
  19. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  20. cache_grammar - Cache the Lark grammar (Default: False)
  21. postlex - Lexer post-processing (Default: None)
  22. start - The start symbol (Default: start)
  23. profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
  24. """
  25. __doc__ += OPTIONS_DOC
  26. def __init__(self, options_dict):
  27. o = dict(options_dict)
  28. self.debug = bool(o.pop('debug', False))
  29. self.only_lex = bool(o.pop('only_lex', False))
  30. self.keep_all_tokens = bool(o.pop('keep_all_tokens', False))
  31. self.tree_class = o.pop('tree_class', Tree)
  32. self.cache_grammar = o.pop('cache_grammar', False)
  33. self.postlex = o.pop('postlex', None)
  34. self.parser = o.pop('parser', 'earley')
  35. self.transformer = o.pop('transformer', None)
  36. self.start = o.pop('start', 'start')
  37. self.profile = o.pop('profile', False)
  38. assert self.parser in ENGINE_DICT
  39. if self.parser == 'earley' and self.transformer:
  40. raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm. Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
  41. if self.keep_all_tokens:
  42. raise NotImplementedError("Not implemented yet!")
  43. if o:
  44. raise ValueError("Unknown options: %s" % o.keys())
  45. import time
  46. from collections import defaultdict
  47. class Profiler:
  48. def __init__(self):
  49. self.total_time = defaultdict(float)
  50. self.cur_section = '__init__'
  51. self.last_enter_time = time.time()
  52. def enter_section(self, name):
  53. cur_time = time.time()
  54. self.total_time[self.cur_section] += cur_time - self.last_enter_time
  55. self.last_enter_time = cur_time
  56. self.cur_section = name
  57. def make_wrapper(self, name, f):
  58. def _f(*args, **kwargs):
  59. last_section = self.cur_section
  60. self.enter_section(name)
  61. try:
  62. return f(*args, **kwargs)
  63. finally:
  64. self.enter_section(last_section)
  65. return _f
  66. class Lark:
  67. def __init__(self, grammar, **options):
  68. """
  69. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  70. options : a dictionary controlling various aspects of Lark.
  71. """
  72. self.options = LarkOptions(options)
  73. # Some, but not all file-like objects have a 'name' attribute
  74. try:
  75. source = grammar.name
  76. except AttributeError:
  77. source = '<string>'
  78. cache_file = "larkcache_%s" % str(hash(grammar)%(2**32))
  79. else:
  80. cache_file = "larkcache_%s" % os.path.basename(source)
  81. # Drain file-like objects to get their contents
  82. try:
  83. read = grammar.read
  84. except AttributeError:
  85. pass
  86. else:
  87. grammar = read()
  88. assert isinstance(grammar, STRING_TYPE)
  89. if self.options.cache_grammar:
  90. raise NotImplementedError("Not available yet")
  91. self.profiler = Profiler() if self.options.profile else None
  92. self.tokens, self.rules = load_grammar(grammar)
  93. self.lexer = self._build_lexer()
  94. if not self.options.only_lex:
  95. self.parser_engine = ENGINE_DICT[self.options.parser]()
  96. self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class)
  97. self.parser = self._build_parser()
  98. if self.profiler: self.profiler.enter_section('outside_lark')
  99. def _create_unless_callback(self, strs):
  100. def f(t):
  101. if t in strs:
  102. t.type = strs[t]
  103. return t
  104. return f
  105. def _build_lexer(self):
  106. ignore_tokens = []
  107. tokens = []
  108. callbacks = {}
  109. for name, value, flags in self.tokens:
  110. for flag in flags:
  111. if flag == 'ignore':
  112. ignore_tokens.append(name)
  113. elif isinstance(flag, tuple) and flag[0] == 'unless':
  114. _, strs = flag
  115. callbacks[name] = self._create_unless_callback(strs)
  116. else:
  117. raise GrammarError("No such flag: %s" % flag)
  118. tokens.append((name, value))
  119. return Lexer(tokens, callbacks, ignore=ignore_tokens)
  120. def _build_parser(self):
  121. rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer)
  122. if self.profiler:
  123. for f in dir(callback):
  124. if not f.startswith('__'):
  125. setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))
  126. return self.parser_engine.build_parser(rules, callback, self.options.start)
  127. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  128. def lex(self, text):
  129. stream = self.lexer.lex(text)
  130. if self.options.postlex:
  131. return self.options.postlex.process(stream)
  132. else:
  133. return stream
  134. def parse(self, text):
  135. assert not self.options.only_lex
  136. if self.profiler:
  137. self.profiler.enter_section('lex')
  138. l = list(self.lex(text))
  139. self.profiler.enter_section('parse')
  140. try:
  141. return self.parser.parse(l)
  142. finally:
  143. self.profiler.enter_section('outside_lark')
  144. else:
  145. l = list(self.lex(text))
  146. return self.parser.parse(l)