This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

233 lignes
9.0 KiB

  1. from __future__ import absolute_import
  2. import os
  3. import time
  4. from collections import defaultdict
  5. from io import open
  6. from .utils import STRING_TYPE
  7. from .load_grammar import load_grammar
  8. from .tree import Tree
  9. from .common import LexerConf, ParserConf
  10. from .lexer import Lexer
  11. from .parse_tree_builder import ParseTreeBuilder
  12. from .parser_frontends import get_frontend
  13. class LarkOptions(object):
  14. """Specifies the options for Lark
  15. """
  16. OPTIONS_DOC = """
  17. parser - Decides which parser engine to use, "earley" or "lalr". (Default: "earley")
  18. Note: "lalr" requires a lexer
  19. lexer - Decides whether or not to use a lexer stage
  20. None: Don't use a lexer (scanless, only works with parser="earley")
  21. "standard": Use a standard lexer
  22. "contextual": Stronger lexer (only works with parser="lalr")
  23. "auto" (default): Choose for me based on grammar and parser
  24. ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
  25. "resolve": The parser will automatically choose the simplest derivation
  26. (it chooses consistently: greedy for tokens, non-greedy for rules)
  27. "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
  28. transformer - Applies the transformer to every parse tree
  29. debug - Affects verbosity (default: False)
  30. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  31. cache_grammar - Cache the Lark grammar (Default: False)
  32. postlex - Lexer post-processing (Requires standard lexer. Default: None)
  33. start - The start symbol (Default: start)
  34. profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
  35. propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
  36. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
  37. """
  38. __doc__ += OPTIONS_DOC
  39. def __init__(self, options_dict):
  40. o = dict(options_dict)
  41. self.debug = bool(o.pop('debug', False))
  42. self.keep_all_tokens = bool(o.pop('keep_all_tokens', False))
  43. self.tree_class = o.pop('tree_class', Tree)
  44. self.cache_grammar = o.pop('cache_grammar', False)
  45. self.postlex = o.pop('postlex', None)
  46. self.parser = o.pop('parser', 'earley')
  47. self.lexer = o.pop('lexer', 'auto')
  48. self.transformer = o.pop('transformer', None)
  49. self.start = o.pop('start', 'start')
  50. self.profile = o.pop('profile', False)
  51. self.ambiguity = o.pop('ambiguity', 'auto')
  52. self.propagate_positions = o.pop('propagate_positions', False)
  53. self.earley__predict_all = o.pop('earley__predict_all', False)
  54. self.lexer_callbacks = o.pop('lexer_callbacks', {})
  55. assert self.parser in ('earley', 'lalr', 'cyk', None)
  56. if self.parser == 'earley' and self.transformer:
  57. raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
  58. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
  59. if o:
  60. raise ValueError("Unknown options: %s" % o.keys())
  61. class Profiler:
  62. def __init__(self):
  63. self.total_time = defaultdict(float)
  64. self.cur_section = '__init__'
  65. self.last_enter_time = time.time()
  66. def enter_section(self, name):
  67. cur_time = time.time()
  68. self.total_time[self.cur_section] += cur_time - self.last_enter_time
  69. self.last_enter_time = cur_time
  70. self.cur_section = name
  71. def make_wrapper(self, name, f):
  72. def wrapper(*args, **kwargs):
  73. last_section = self.cur_section
  74. self.enter_section(name)
  75. try:
  76. return f(*args, **kwargs)
  77. finally:
  78. self.enter_section(last_section)
  79. return wrapper
  80. class Lark:
  81. def __init__(self, grammar, **options):
  82. """
  83. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  84. options : a dictionary controlling various aspects of Lark.
  85. """
  86. self.options = LarkOptions(options)
  87. # Some, but not all file-like objects have a 'name' attribute
  88. try:
  89. self.source = grammar.name
  90. except AttributeError:
  91. self.source = '<string>'
  92. cache_file = "larkcache_%s" % str(hash(grammar)%(2**32))
  93. else:
  94. cache_file = "larkcache_%s" % os.path.basename(self.source)
  95. # Drain file-like objects to get their contents
  96. try:
  97. read = grammar.read
  98. except AttributeError:
  99. pass
  100. else:
  101. grammar = read()
  102. assert isinstance(grammar, STRING_TYPE)
  103. if self.options.cache_grammar:
  104. raise NotImplementedError("Not available yet")
  105. assert not self.options.profile, "Feature temporarily disabled"
  106. self.profiler = Profiler() if self.options.profile else None
  107. if self.options.lexer == 'auto':
  108. if self.options.parser == 'lalr':
  109. self.options.lexer = 'standard'
  110. elif self.options.parser == 'earley':
  111. self.options.lexer = 'dynamic'
  112. elif self.options.parser == 'cyk':
  113. self.options.lexer = 'standard'
  114. else:
  115. assert False, self.options.parser
  116. lexer = self.options.lexer
  117. assert lexer in ('standard', 'contextual', 'dynamic', None)
  118. if self.options.ambiguity == 'auto':
  119. if self.options.parser == 'earley':
  120. self.options.ambiguity = 'resolve'
  121. else:
  122. disambig_parsers = ['earley', 'cyk']
  123. assert self.options.parser in disambig_parsers, (
  124. 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
  125. assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum')
  126. # Parse the grammar file and compose the grammars (TODO)
  127. self.grammar = load_grammar(grammar, self.source)
  128. # Compile the EBNF grammar into BNF
  129. tokens, self.rules, self.ignore_tokens = self.grammar.compile(lexer=bool(lexer), start=self.options.start)
  130. self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
  131. if self.options.parser:
  132. self.parser = self._build_parser()
  133. elif lexer:
  134. self.lexer = self._build_lexer()
  135. if self.profiler: self.profiler.enter_section('outside_lark')
  136. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  137. def _build_lexer(self):
  138. return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
  139. def _build_parser(self):
  140. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  141. self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr')
  142. callback = self._parse_tree_builder.create_callback(self.options.transformer)
  143. if self.profiler:
  144. for f in dir(callback):
  145. if not (f.startswith('__') and f.endswith('__')):
  146. setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))
  147. parser_conf = ParserConf(self.rules, callback, self.options.start)
  148. return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
  149. @classmethod
  150. def open(cls, grammar_filename, rel_to=None, **options):
  151. """Create an instance of Lark with the grammar given by its filename
  152. If rel_to is provided, the function will find the grammar filename in relation to it.
  153. Example:
  154. >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
  155. Lark(...)
  156. """
  157. if rel_to:
  158. basepath = os.path.dirname(rel_to)
  159. grammar_filename = os.path.join(basepath, grammar_filename)
  160. with open(grammar_filename) as f:
  161. return cls(f, **options)
  162. def __repr__(self):
  163. return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
  164. def lex(self, text):
  165. if not hasattr(self, 'lexer'):
  166. self.lexer = self._build_lexer()
  167. stream = self.lexer.lex(text)
  168. if self.options.postlex:
  169. return self.options.postlex.process(stream)
  170. else:
  171. return stream
  172. def parse(self, text):
  173. return self.parser.parse(text)
  174. # if self.profiler:
  175. # self.profiler.enter_section('lex')
  176. # l = list(self.lex(text))
  177. # self.profiler.enter_section('parse')
  178. # try:
  179. # return self.parser.parse(l)
  180. # finally:
  181. # self.profiler.enter_section('outside_lark')
  182. # else:
  183. # l = list(self.lex(text))
  184. # return self.parser.parse(l)