This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

288 líneas
11 KiB

  1. from __future__ import absolute_import
  2. import os
  3. import time
  4. from collections import defaultdict
  5. from io import open
  6. from .utils import STRING_TYPE, Serialize
  7. from .load_grammar import load_grammar
  8. from .tree import Tree
  9. from .common import LexerConf, ParserConf
  10. from .lexer import Lexer, TraditionalLexer
  11. from .parse_tree_builder import ParseTreeBuilder
  12. from .parser_frontends import get_frontend
  13. from .grammar import Rule
  14. ###{standalone
  15. class LarkOptions(Serialize):
  16. """Specifies the options for Lark
  17. """
  18. OPTIONS_DOC = """
  19. parser - Decides which parser engine to use, "earley" or "lalr". (Default: "earley")
  20. Note: "lalr" requires a lexer
  21. lexer - Decides whether or not to use a lexer stage
  22. "standard": Use a standard lexer
  23. "contextual": Stronger lexer (only works with parser="lalr")
  24. "dynamic": Flexible and powerful (only with parser="earley")
  25. "dynamic_complete": Same as dynamic, but tries *every* variation
  26. of tokenizing possible. (only with parser="earley")
  27. "auto" (default): Choose for me based on grammar and parser
  28. ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
  29. "resolve": The parser will automatically choose the simplest derivation
  30. (it chooses consistently: greedy for tokens, non-greedy for rules)
  31. "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
  32. transformer - Applies the transformer to every parse tree
  33. debug - Affects verbosity (default: False)
  34. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  35. cache_grammar - Cache the Lark grammar (Default: False)
  36. postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
  37. start - The start symbol (Default: start)
  38. profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
  39. priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
  40. propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
  41. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
  42. maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None
  43. """
  44. if __doc__:
  45. __doc__ += OPTIONS_DOC
  46. _defaults = {
  47. 'debug': False,
  48. 'keep_all_tokens': False,
  49. 'tree_class': None,
  50. 'cache_grammar': False,
  51. 'postlex': None,
  52. 'parser': 'earley',
  53. 'lexer': 'auto',
  54. 'transformer': None,
  55. 'start': 'start',
  56. 'profile': False,
  57. 'priority': 'auto',
  58. 'ambiguity': 'auto',
  59. 'propagate_positions': False,
  60. 'lexer_callbacks': {},
  61. 'maybe_placeholders': False,
  62. }
  63. def __init__(self, options_dict):
  64. o = dict(options_dict)
  65. options = {}
  66. for name, default in self._defaults.items():
  67. if name in o:
  68. value = o.pop(name)
  69. if isinstance(default, bool):
  70. value = bool(value)
  71. else:
  72. value = default
  73. options[name] = value
  74. self.__dict__['options'] = options
  75. assert self.parser in ('earley', 'lalr', 'cyk', None)
  76. if self.parser == 'earley' and self.transformer:
  77. raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
  78. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
  79. if o:
  80. raise ValueError("Unknown options: %s" % o.keys())
  81. def __getattr__(self, name):
  82. return self.options[name]
  83. def __setattr__(self, name, value):
  84. assert name in self.options
  85. self.options[name] = value
  86. def serialize(self, memo):
  87. return self.options
  88. @classmethod
  89. def deserialize(cls, data, memo):
  90. return cls(data)
  91. class Profiler:
  92. def __init__(self):
  93. self.total_time = defaultdict(float)
  94. self.cur_section = '__init__'
  95. self.last_enter_time = time.time()
  96. def enter_section(self, name):
  97. cur_time = time.time()
  98. self.total_time[self.cur_section] += cur_time - self.last_enter_time
  99. self.last_enter_time = cur_time
  100. self.cur_section = name
  101. def make_wrapper(self, name, f):
  102. def wrapper(*args, **kwargs):
  103. last_section = self.cur_section
  104. self.enter_section(name)
  105. try:
  106. return f(*args, **kwargs)
  107. finally:
  108. self.enter_section(last_section)
  109. return wrapper
  110. class Lark(Serialize):
  111. def __init__(self, grammar, **options):
  112. """
  113. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  114. options : a dictionary controlling various aspects of Lark.
  115. """
  116. self.options = LarkOptions(options)
  117. # Some, but not all file-like objects have a 'name' attribute
  118. try:
  119. self.source = grammar.name
  120. except AttributeError:
  121. self.source = '<string>'
  122. # Drain file-like objects to get their contents
  123. try:
  124. read = grammar.read
  125. except AttributeError:
  126. pass
  127. else:
  128. grammar = read()
  129. assert isinstance(grammar, STRING_TYPE)
  130. if self.options.cache_grammar:
  131. raise NotImplementedError("Not available yet")
  132. assert not self.options.profile, "Feature temporarily disabled"
  133. # self.profiler = Profiler() if self.options.profile else None
  134. if self.options.lexer == 'auto':
  135. if self.options.parser == 'lalr':
  136. self.options.lexer = 'contextual'
  137. elif self.options.parser == 'earley':
  138. self.options.lexer = 'dynamic'
  139. elif self.options.parser == 'cyk':
  140. self.options.lexer = 'standard'
  141. else:
  142. assert False, self.options.parser
  143. lexer = self.options.lexer
  144. assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
  145. if self.options.ambiguity == 'auto':
  146. if self.options.parser == 'earley':
  147. self.options.ambiguity = 'resolve'
  148. else:
  149. disambig_parsers = ['earley', 'cyk']
  150. assert self.options.parser in disambig_parsers, (
  151. 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
  152. if self.options.priority == 'auto':
  153. if self.options.parser in ('earley', 'cyk', ):
  154. self.options.priority = 'normal'
  155. elif self.options.parser in ('lalr', ):
  156. self.options.priority = None
  157. elif self.options.priority in ('invert', 'normal'):
  158. assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"
  159. assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
  160. assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
  161. assert self.options.ambiguity in ('resolve', 'explicit', 'auto', )
  162. # Parse the grammar file and compose the grammars (TODO)
  163. self.grammar = load_grammar(grammar, self.source)
  164. # Compile the EBNF grammar into BNF
  165. self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()
  166. # If the user asked to invert the priorities, negate them all here.
  167. # This replaces the old 'resolve__antiscore_sum' option.
  168. if self.options.priority == 'invert':
  169. for rule in self.rules:
  170. if rule.options and rule.options.priority is not None:
  171. rule.options.priority = -rule.options.priority
  172. # Else, if the user asked to disable priorities, strip them from the
  173. # rules. This allows the Earley parsers to skip an extra forest walk
  174. # for improved performance, if you don't need them (or didn't specify any).
  175. elif self.options.priority == None:
  176. for rule in self.rules:
  177. if rule.options and rule.options.priority is not None:
  178. rule.options.priority = None
  179. self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
  180. if self.options.parser:
  181. self.parser = self._build_parser()
  182. elif lexer:
  183. self.lexer = self._build_lexer()
  184. if __init__.__doc__:
  185. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  186. __serialize_fields__ = 'parser', 'rules', 'options'
  187. def _build_lexer(self):
  188. return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
  189. def _prepare_callbacks(self):
  190. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  191. self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
  192. self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
  193. def _build_parser(self):
  194. self._prepare_callbacks()
  195. parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
  196. return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
  197. @classmethod
  198. def deserialize(cls, data, memo):
  199. inst = cls.__new__(cls)
  200. inst.options = LarkOptions.deserialize(data['options'], memo)
  201. inst.rules = [Rule.deserialize(r, memo) for r in data['rules']]
  202. inst._prepare_callbacks()
  203. inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks)
  204. return inst
  205. @classmethod
  206. def open(cls, grammar_filename, rel_to=None, **options):
  207. """Create an instance of Lark with the grammar given by its filename
  208. If rel_to is provided, the function will find the grammar filename in relation to it.
  209. Example:
  210. >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
  211. Lark(...)
  212. """
  213. if rel_to:
  214. basepath = os.path.dirname(rel_to)
  215. grammar_filename = os.path.join(basepath, grammar_filename)
  216. with open(grammar_filename, encoding='utf8') as f:
  217. return cls(f, **options)
  218. def __repr__(self):
  219. return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
  220. def lex(self, text):
  221. "Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'"
  222. if not hasattr(self, 'lexer'):
  223. self.lexer = self._build_lexer()
  224. stream = self.lexer.lex(text)
  225. if self.options.postlex:
  226. return self.options.postlex.process(stream)
  227. return stream
  228. def parse(self, text):
  229. "Parse the given text, according to the options provided. Returns a tree, unless specified otherwise."
  230. return self.parser.parse(text)
  231. ###}