This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

240 linhas
8.8 KiB

  1. from .exceptions import ConfigurationError, GrammarError, assert_config
  2. from .utils import get_regexp_width, Serialize
  3. from .parsers.grammar_analysis import GrammarAnalyzer
  4. from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
  5. from .parsers import earley, xearley, cyk
  6. from .parsers.lalr_parser import LALR_Parser
  7. from .tree import Tree
  8. from .common import LexerConf, ParserConf
  9. try:
  10. import regex
  11. except ImportError:
  12. regex = None
  13. import re
  14. ###{standalone
  15. def _wrap_lexer(lexer_class):
  16. future_interface = getattr(lexer_class, '__future_interface__', False)
  17. if future_interface:
  18. return lexer_class
  19. else:
  20. class CustomLexerWrapper(Lexer):
  21. def __init__(self, lexer_conf):
  22. self.lexer = lexer_class(lexer_conf)
  23. def lex(self, lexer_state, parser_state):
  24. return self.lexer.lex(lexer_state.text)
  25. return CustomLexerWrapper
  26. class MakeParsingFrontend:
  27. def __init__(self, parser_type, lexer_type):
  28. self.parser_type = parser_type
  29. self.lexer_type = lexer_type
  30. def __call__(self, lexer_conf, parser_conf, options):
  31. assert isinstance(lexer_conf, LexerConf)
  32. assert isinstance(parser_conf, ParserConf)
  33. parser_conf.parser_type = self.parser_type
  34. lexer_conf.lexer_type = self.lexer_type
  35. return ParsingFrontend(lexer_conf, parser_conf, options)
  36. def deserialize(self, data, memo, lexer_conf, callbacks, options):
  37. parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
  38. parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
  39. parser_conf.callbacks = callbacks
  40. return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
  41. class ParsingFrontend(Serialize):
  42. __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser', 'options'
  43. def __init__(self, lexer_conf, parser_conf, options, parser=None):
  44. self.parser_conf = parser_conf
  45. self.lexer_conf = lexer_conf
  46. self.options = options
  47. # Set-up parser
  48. if parser: # From cache
  49. self.parser = parser
  50. else:
  51. create_parser = {
  52. 'lalr': create_lalr_parser,
  53. 'earley': create_earley_parser,
  54. 'cyk': CYK_FrontEnd,
  55. }[parser_conf.parser_type]
  56. self.parser = create_parser(lexer_conf, parser_conf, options)
  57. # Set-up lexer
  58. lexer_type = lexer_conf.lexer_type
  59. self.skip_lexer = False
  60. if lexer_type in ('dynamic', 'dynamic_complete'):
  61. assert lexer_conf.postlex is None
  62. self.skip_lexer = True
  63. return
  64. try:
  65. create_lexer = {
  66. 'standard': create_traditional_lexer,
  67. 'contextual': create_contextual_lexer,
  68. }[lexer_type]
  69. except KeyError:
  70. assert issubclass(lexer_type, Lexer), lexer_type
  71. self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
  72. else:
  73. self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex)
  74. if lexer_conf.postlex:
  75. self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
  76. def _verify_start(self, start=None):
  77. if start is None:
  78. start_decls = self.parser_conf.start
  79. if len(start_decls) > 1:
  80. raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
  81. start ,= start_decls
  82. elif start not in self.parser_conf.start:
  83. raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
  84. return start
  85. def parse(self, text, start=None, on_error=None):
  86. chosen_start = self._verify_start(start)
  87. stream = text if self.skip_lexer else LexerThread(self.lexer, text)
  88. kw = {} if on_error is None else {'on_error': on_error}
  89. return self.parser.parse(stream, chosen_start, **kw)
  90. def parse_interactive(self, text=None, start=None):
  91. chosen_start = self._verify_start(start)
  92. if self.parser_conf.parser_type != 'lalr':
  93. raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
  94. stream = text if self.skip_lexer else LexerThread(self.lexer, text)
  95. return self.parser.parse_interactive(stream, chosen_start)
  96. def get_frontend(parser, lexer):
  97. assert_config(parser, ('lalr', 'earley', 'cyk'))
  98. if not isinstance(lexer, type): # not custom lexer?
  99. expected = {
  100. 'lalr': ('standard', 'contextual'),
  101. 'earley': ('standard', 'dynamic', 'dynamic_complete'),
  102. 'cyk': ('standard', ),
  103. }[parser]
  104. assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
  105. return MakeParsingFrontend(parser, lexer)
  106. def _get_lexer_callbacks(transformer, terminals):
  107. result = {}
  108. for terminal in terminals:
  109. callback = getattr(transformer, terminal.name, None)
  110. if callback is not None:
  111. result[terminal.name] = callback
  112. return result
  113. class PostLexConnector:
  114. def __init__(self, lexer, postlexer):
  115. self.lexer = lexer
  116. self.postlexer = postlexer
  117. def make_lexer_state(self, text):
  118. return self.lexer.make_lexer_state(text)
  119. def lex(self, lexer_state, parser_state):
  120. i = self.lexer.lex(lexer_state, parser_state)
  121. return self.postlexer.process(i)
  122. def create_traditional_lexer(lexer_conf, parser, postlex):
  123. return TraditionalLexer(lexer_conf)
  124. def create_contextual_lexer(lexer_conf, parser, postlex):
  125. states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
  126. always_accept = postlex.always_accept if postlex else ()
  127. return ContextualLexer(lexer_conf, states, always_accept=always_accept)
  128. def create_lalr_parser(lexer_conf, parser_conf, options=None):
  129. debug = options.debug if options else False
  130. return LALR_Parser(parser_conf, debug=debug)
  131. create_earley_parser = NotImplemented
  132. CYK_FrontEnd = NotImplemented
  133. ###}
  134. class EarleyRegexpMatcher:
  135. def __init__(self, lexer_conf):
  136. self.regexps = {}
  137. for t in lexer_conf.terminals:
  138. if t.priority != 1:
  139. raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
  140. regexp = t.pattern.to_regexp()
  141. try:
  142. width = get_regexp_width(regexp)[0]
  143. except ValueError:
  144. raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
  145. else:
  146. if width == 0:
  147. raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
  148. if lexer_conf.use_bytes:
  149. regexp = regexp.encode('utf-8')
  150. self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
  151. def match(self, term, text, index=0):
  152. return self.regexps[term.name].match(text, index)
  153. def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
  154. earley_matcher = EarleyRegexpMatcher(lexer_conf)
  155. return xearley.Parser(parser_conf, earley_matcher.match, ignore=lexer_conf.ignore, **kw)
  156. def _match_earley_basic(term, token):
  157. return term.name == token.type
  158. def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw):
  159. return earley.Parser(parser_conf, _match_earley_basic, **kw)
  160. def create_earley_parser(lexer_conf, parser_conf, options):
  161. resolve_ambiguity = options.ambiguity == 'resolve'
  162. debug = options.debug if options else False
  163. tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
  164. extra = {}
  165. if lexer_conf.lexer_type == 'dynamic':
  166. f = create_earley_parser__dynamic
  167. elif lexer_conf.lexer_type == 'dynamic_complete':
  168. extra['complete_lex'] =True
  169. f = create_earley_parser__dynamic
  170. else:
  171. f = create_earley_parser__basic
  172. return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
  173. class CYK_FrontEnd:
  174. def __init__(self, lexer_conf, parser_conf, options=None):
  175. self._analysis = GrammarAnalyzer(parser_conf)
  176. self.parser = cyk.Parser(parser_conf.rules)
  177. self.callbacks = parser_conf.callbacks
  178. def parse(self, lexer_thread, start):
  179. tokens = list(lexer_thread.lex(None))
  180. tree = self.parser.parse(tokens, start)
  181. return self._transform(tree)
  182. def _transform(self, tree):
  183. subtrees = list(tree.iter_subtrees())
  184. for subtree in subtrees:
  185. subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children]
  186. return self._apply_callback(tree)
  187. def _apply_callback(self, tree):
  188. return self.callbacks[tree.rule](tree.children)