This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

185 rader
6.6 KiB

  1. from __future__ import absolute_import
  2. import os
  3. import time
  4. from collections import defaultdict
  5. from .utils import STRING_TYPE
  6. from .load_grammar import load_grammar
  7. from .tree import Tree
  8. from .common import GrammarError, LexerConf, ParserConf
  9. from .lexer import Lexer
  10. from .parse_tree_builder import ParseTreeBuilder
  11. from .parser_frontends import get_frontend
  12. class LarkOptions(object):
  13. """Specifies the options for Lark
  14. """
  15. OPTIONS_DOC = """
  16. parser - Which parser engine to use ("earley" or "lalr". Default: "earley")
  17. Note: "lalr" requires a lexer
  18. lexer - Whether or not to use a lexer stage
  19. None: Don't use a lexer
  20. "standard": Use a standard lexer
  21. "contextual": Stronger lexer (only works with parser="lalr")
  22. "auto" (default): Choose for me based on grammar and parser
  23. transformer - Applies the transformer to every parse tree
  24. debug - Affects verbosity (default: False)
  25. only_lex - Don't build a parser. Useful for debugging (default: False)
  26. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  27. cache_grammar - Cache the Lark grammar (Default: False)
  28. postlex - Lexer post-processing (Default: None)
  29. start - The start symbol (Default: start)
  30. profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
  31. """
  32. __doc__ += OPTIONS_DOC
  33. def __init__(self, options_dict):
  34. o = dict(options_dict)
  35. self.debug = bool(o.pop('debug', False))
  36. self.only_lex = bool(o.pop('only_lex', False))
  37. self.keep_all_tokens = bool(o.pop('keep_all_tokens', False))
  38. self.tree_class = o.pop('tree_class', Tree)
  39. self.cache_grammar = o.pop('cache_grammar', False)
  40. self.postlex = o.pop('postlex', None)
  41. self.parser = o.pop('parser', 'earley')
  42. self.lexer = o.pop('lexer', 'auto')
  43. self.transformer = o.pop('transformer', None)
  44. self.start = o.pop('start', 'start')
  45. self.profile = o.pop('profile', False)
  46. # assert self.parser in ENGINE_DICT
  47. if self.parser == 'earley' and self.transformer:
  48. raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.'
  49. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
  50. if self.keep_all_tokens:
  51. raise NotImplementedError("Not implemented yet!")
  52. if o:
  53. raise ValueError("Unknown options: %s" % o.keys())
  54. class Profiler:
  55. def __init__(self):
  56. self.total_time = defaultdict(float)
  57. self.cur_section = '__init__'
  58. self.last_enter_time = time.time()
  59. def enter_section(self, name):
  60. cur_time = time.time()
  61. self.total_time[self.cur_section] += cur_time - self.last_enter_time
  62. self.last_enter_time = cur_time
  63. self.cur_section = name
  64. def make_wrapper(self, name, f):
  65. def wrapper(*args, **kwargs):
  66. last_section = self.cur_section
  67. self.enter_section(name)
  68. try:
  69. return f(*args, **kwargs)
  70. finally:
  71. self.enter_section(last_section)
  72. return wrapper
  73. class Lark:
  74. def __init__(self, grammar, **options):
  75. """
  76. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  77. options : a dictionary controlling various aspects of Lark.
  78. """
  79. self.options = LarkOptions(options)
  80. # Some, but not all file-like objects have a 'name' attribute
  81. try:
  82. source = grammar.name
  83. except AttributeError:
  84. source = '<string>'
  85. cache_file = "larkcache_%s" % str(hash(grammar)%(2**32))
  86. else:
  87. cache_file = "larkcache_%s" % os.path.basename(source)
  88. # Drain file-like objects to get their contents
  89. try:
  90. read = grammar.read
  91. except AttributeError:
  92. pass
  93. else:
  94. grammar = read()
  95. assert isinstance(grammar, STRING_TYPE)
  96. if self.options.cache_grammar or self.options.keep_all_tokens:
  97. raise NotImplementedError("Not available yet")
  98. assert not self.options.profile, "Feature temporarily disabled"
  99. self.profiler = Profiler() if self.options.profile else None
  100. lexer = self.options.lexer
  101. if lexer == 'auto':
  102. if self.options.parser == 'lalr':
  103. lexer = 'standard'
  104. elif self.options.parser == 'earley':
  105. lexer = 'standard'
  106. self.options.lexer = lexer
  107. self.grammar = load_grammar(grammar)
  108. tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer))
  109. self.ignore_tokens = self.grammar.extra['ignore']
  110. self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex)
  111. if self.options.parser:
  112. self.parser = self._build_parser()
  113. elif lexer:
  114. self.lexer = self._build_lexer()
  115. if self.profiler: self.profiler.enter_section('outside_lark')
  116. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  117. def _build_lexer(self):
  118. return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore)
  119. def _build_parser(self):
  120. self.parser_class = get_frontend(self.options.parser, self.options.lexer)
  121. self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class)
  122. rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer)
  123. if self.profiler:
  124. for f in dir(callback):
  125. if not (f.startswith('__') and f.endswith('__')):
  126. setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))
  127. parser_conf = ParserConf(rules, callback, self.options.start)
  128. return self.parser_class(self.lexer_conf, parser_conf)
  129. def lex(self, text):
  130. stream = self.lexer.lex(text)
  131. if self.options.postlex:
  132. return self.options.postlex.process(stream)
  133. else:
  134. return stream
  135. def parse(self, text):
  136. assert not self.options.only_lex
  137. return self.parser.parse(text)
  138. # if self.profiler:
  139. # self.profiler.enter_section('lex')
  140. # l = list(self.lex(text))
  141. # self.profiler.enter_section('parse')
  142. # try:
  143. # return self.parser.parse(l)
  144. # finally:
  145. # self.profiler.enter_section('outside_lark')
  146. # else:
  147. # l = list(self.lex(text))
  148. # return self.parser.parse(l)