This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

175 linhas
6.2 KiB

  1. from __future__ import absolute_import
  2. import os
  3. import time
  4. from collections import defaultdict
  5. from .utils import STRING_TYPE
  6. from .load_grammar import load_grammar
  7. from .tree import Tree
  8. from .common import GrammarError, LexerConf, ParserConf
  9. from .lexer import Lexer
  10. from .parse_tree_builder import ParseTreeBuilder
  11. from .parser_frontends import ENGINE_DICT
  12. class LarkOptions(object):
  13. """Specifies the options for Lark
  14. """
  15. OPTIONS_DOC = """
  16. parser - Which parser engine to use ("earley" or "lalr". Default: "earley")
  17. Note: Both will use Lark's lexer.
  18. transformer - Applies the transformer to every parse tree
  19. debug - Affects verbosity (default: False)
  20. only_lex - Don't build a parser. Useful for debugging (default: False)
  21. keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
  22. cache_grammar - Cache the Lark grammar (Default: False)
  23. postlex - Lexer post-processing (Default: None)
  24. start - The start symbol (Default: start)
  25. profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
  26. """
  27. __doc__ += OPTIONS_DOC
  28. def __init__(self, options_dict):
  29. o = dict(options_dict)
  30. self.debug = bool(o.pop('debug', False))
  31. self.only_lex = bool(o.pop('only_lex', False))
  32. self.keep_all_tokens = bool(o.pop('keep_all_tokens', False))
  33. self.tree_class = o.pop('tree_class', Tree)
  34. self.cache_grammar = o.pop('cache_grammar', False)
  35. self.postlex = o.pop('postlex', None)
  36. self.parser = o.pop('parser', 'earley')
  37. self.transformer = o.pop('transformer', None)
  38. self.start = o.pop('start', 'start')
  39. self.profile = o.pop('profile', False)
  40. assert self.parser in ENGINE_DICT
  41. if self.parser == 'earley' and self.transformer:
  42. raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.'
  43. 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
  44. if self.keep_all_tokens:
  45. raise NotImplementedError("Not implemented yet!")
  46. if o:
  47. raise ValueError("Unknown options: %s" % o.keys())
  48. class Profiler:
  49. def __init__(self):
  50. self.total_time = defaultdict(float)
  51. self.cur_section = '__init__'
  52. self.last_enter_time = time.time()
  53. def enter_section(self, name):
  54. cur_time = time.time()
  55. self.total_time[self.cur_section] += cur_time - self.last_enter_time
  56. self.last_enter_time = cur_time
  57. self.cur_section = name
  58. def make_wrapper(self, name, f):
  59. def wrapper(*args, **kwargs):
  60. last_section = self.cur_section
  61. self.enter_section(name)
  62. try:
  63. return f(*args, **kwargs)
  64. finally:
  65. self.enter_section(last_section)
  66. return wrapper
  67. class Lark:
  68. def __init__(self, grammar, **options):
  69. """
  70. grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax)
  71. options : a dictionary controlling various aspects of Lark.
  72. """
  73. self.options = LarkOptions(options)
  74. # Some, but not all file-like objects have a 'name' attribute
  75. try:
  76. source = grammar.name
  77. except AttributeError:
  78. source = '<string>'
  79. cache_file = "larkcache_%s" % str(hash(grammar)%(2**32))
  80. else:
  81. cache_file = "larkcache_%s" % os.path.basename(source)
  82. # Drain file-like objects to get their contents
  83. try:
  84. read = grammar.read
  85. except AttributeError:
  86. pass
  87. else:
  88. grammar = read()
  89. assert isinstance(grammar, STRING_TYPE)
  90. if self.options.cache_grammar or self.options.keep_all_tokens:
  91. raise NotImplementedError("Not available yet")
  92. assert not self.options.profile, "Feature temporarily disabled"
  93. self.profiler = Profiler() if self.options.profile else None
  94. tokens, self.rules = load_grammar(grammar)
  95. self.ignore_tokens = []
  96. for tokendef, flags in tokens:
  97. for flag in flags:
  98. if flag == 'ignore':
  99. self.ignore_tokens.append(tokendef.name)
  100. else:
  101. raise GrammarError("No such flag: %s" % flag)
  102. self.lexer_conf = LexerConf([t[0] for t in tokens], self.ignore_tokens, self.options.postlex)
  103. if not self.options.only_lex:
  104. self.parser = self._build_parser()
  105. else:
  106. self.lexer = self._build_lexer()
  107. if self.profiler: self.profiler.enter_section('outside_lark')
  108. __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
  109. def _build_lexer(self):
  110. return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore)
  111. def _build_parser(self):
  112. self.parser_class = ENGINE_DICT[self.options.parser]
  113. self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class)
  114. rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer)
  115. if self.profiler:
  116. for f in dir(callback):
  117. if not (f.startswith('__') and f.endswith('__')):
  118. setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))
  119. parser_conf = ParserConf(rules, callback, self.options.start)
  120. return self.parser_class(self.lexer_conf, parser_conf)
  121. def lex(self, text):
  122. stream = self.lexer.lex(text)
  123. if self.options.postlex:
  124. return self.options.postlex.process(stream)
  125. else:
  126. return stream
  127. def parse(self, text):
  128. assert not self.options.only_lex
  129. return self.parser.parse(text)
  130. # if self.profiler:
  131. # self.profiler.enter_section('lex')
  132. # l = list(self.lex(text))
  133. # self.profiler.enter_section('parse')
  134. # try:
  135. # return self.parser.parse(l)
  136. # finally:
  137. # self.profiler.enter_section('outside_lark')
  138. # else:
  139. # l = list(self.lex(text))
  140. # return self.parser.parse(l)