This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.

204 righe
7.6 KiB

  1. from __future__ import absolute_import
  2. import os
  3. import time
  4. from collections import defaultdict
  5. from .utils import STRING_TYPE
  6. from .load_grammar import load_grammar
  7. from .tree import Tree
  8. from .common import LexerConf, ParserConf
  9. from .lexer import Lexer
  10. from .parse_tree_builder import ParseTreeBuilder
  11. from .parser_frontends import get_frontend
  12. class LarkOptions(object):
  13. """Specifies the options for Lark
  14. """
  15. OPTIONS_DOC = """
  16. parser - Decides which parser engine to use, "earley" or "lalr". (Default: "earley")
  17. Note: "lalr" requires a lexer
  18. lexer - Decides whether or not to use a lexer stage
  19. None: Don't use a lexer (scanless, only works with parser="earley")
  20. "standard": Use a standard lexer
  21. "contextual": Stronger lexer (only works with parser="lalr")
  22. "auto" (default): Choose for me based on grammar and parser
  23. ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
  24. "resolve": The parser will automatically choose the simplest derivation
  25. (it chooses consistently: greedy for tokens, non-greedy for rules)
  26. "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
  27. transformer - Applies the transformer to every parse tree
  28. debug - Affects verbosity (default: False)
  29. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  30. cache_grammar - Cache the Lark grammar (Default: False)
  31. postlex - Lexer post-processing (Default: None)
  32. start - The start symbol (Default: start)
  33. profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
  34. propagate_positions - Experimental. Don't use yet.
  35. """
  36. __doc__ += OPTIONS_DOC
  37. def __init__(self, options_dict):
  38. o = dict(options_dict)
  39. self.debug = bool(o.pop('debug', False))
  40. self.keep_all_tokens = bool(o.pop('keep_all_tokens', False))
  41. self.tree_class = o.pop('tree_class', Tree)
  42. self.cache_grammar = o.pop('cache_grammar', False)
  43. self.postlex = o.pop('postlex', None)
  44. self.parser = o.pop('parser', 'earley')
  45. self.lexer = o.pop('lexer', 'auto')
  46. self.transformer = o.pop('transformer', None)
  47. self.start = o.pop('start', 'start')
  48. self.profile = o.pop('profile', False)
  49. self.ambiguity = o.pop('ambiguity', 'auto')
  50. self.propagate_positions = o.pop('propagate_positions', False)
  51. assert self.parser in ('earley', 'lalr', None)
  52. if self.parser == 'earley' and self.transformer:
  53. raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.'
  54. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
  55. if o:
  56. raise ValueError("Unknown options: %s" % o.keys())
  57. class Profiler:
  58. def __init__(self):
  59. self.total_time = defaultdict(float)
  60. self.cur_section = '__init__'
  61. self.last_enter_time = time.time()
  62. def enter_section(self, name):
  63. cur_time = time.time()
  64. self.total_time[self.cur_section] += cur_time - self.last_enter_time
  65. self.last_enter_time = cur_time
  66. self.cur_section = name
  67. def make_wrapper(self, name, f):
  68. def wrapper(*args, **kwargs):
  69. last_section = self.cur_section
  70. self.enter_section(name)
  71. try:
  72. return f(*args, **kwargs)
  73. finally:
  74. self.enter_section(last_section)
  75. return wrapper
  76. class Lark:
  77. def __init__(self, grammar, **options):
  78. """
  79. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  80. options : a dictionary controlling various aspects of Lark.
  81. """
  82. self.options = LarkOptions(options)
  83. # Some, but not all file-like objects have a 'name' attribute
  84. try:
  85. source = grammar.name
  86. except AttributeError:
  87. source = '<string>'
  88. cache_file = "larkcache_%s" % str(hash(grammar)%(2**32))
  89. else:
  90. cache_file = "larkcache_%s" % os.path.basename(source)
  91. # Drain file-like objects to get their contents
  92. try:
  93. read = grammar.read
  94. except AttributeError:
  95. pass
  96. else:
  97. grammar = read()
  98. assert isinstance(grammar, STRING_TYPE)
  99. if self.options.cache_grammar:
  100. raise NotImplementedError("Not available yet")
  101. assert not self.options.profile, "Feature temporarily disabled"
  102. self.profiler = Profiler() if self.options.profile else None
  103. if self.options.lexer == 'auto':
  104. if self.options.parser == 'lalr':
  105. self.options.lexer = 'standard'
  106. elif self.options.parser == 'earley':
  107. self.options.lexer = 'dynamic'
  108. else:
  109. assert False, self.options.parser
  110. lexer = self.options.lexer
  111. assert lexer in ('standard', 'contextual', 'dynamic', None)
  112. if self.options.ambiguity == 'auto':
  113. if self.options.parser == 'earley':
  114. self.options.ambiguity = 'resolve'
  115. else:
  116. assert self.options.parser == 'earley'
  117. assert self.options.ambiguity in ('resolve', 'explicit', 'auto')
  118. # Parse the grammar file and compose the grammars (TODO)
  119. self.grammar = load_grammar(grammar, source)
  120. # Compile the EBNF grammar into BNF
  121. tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start)
  122. self.ignore_tokens = self.grammar.extra['ignore']
  123. self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex)
  124. if self.options.parser:
  125. self.parser = self._build_parser()
  126. elif lexer:
  127. self.lexer = self._build_lexer()
  128. if self.profiler: self.profiler.enter_section('outside_lark')
  129. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  130. def _build_lexer(self):
  131. return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore)
  132. def _build_parser(self):
  133. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  134. self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
  135. rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer)
  136. if self.profiler:
  137. for f in dir(callback):
  138. if not (f.startswith('__') and f.endswith('__')):
  139. setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))
  140. parser_conf = ParserConf(rules, callback, self.options.start)
  141. return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
  142. def lex(self, text):
  143. if not hasattr(self, 'lexer'):
  144. self.lexer = self._build_lexer()
  145. stream = self.lexer.lex(text)
  146. if self.options.postlex:
  147. return self.options.postlex.process(stream)
  148. else:
  149. return stream
  150. def parse(self, text):
  151. return self.parser.parse(text)
  152. # if self.profiler:
  153. # self.profiler.enter_section('lex')
  154. # l = list(self.lex(text))
  155. # self.profiler.enter_section('parse')
  156. # try:
  157. # return self.parser.parse(l)
  158. # finally:
  159. # self.profiler.enter_section('outside_lark')
  160. # else:
  161. # l = list(self.lex(text))
  162. # return self.parser.parse(l)