This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

241 rinda
8.8 KiB

  1. from .exceptions import ConfigurationError, GrammarError, assert_config
  2. from .utils import get_regexp_width, Serialize
  3. from .parsers.grammar_analysis import GrammarAnalyzer
  4. from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
  5. from .parsers import earley, xearley, cyk
  6. from .parsers.lalr_parser import LALR_Parser
  7. from .tree import Tree
  8. from .common import LexerConf, ParserConf
  9. try:
  10. import regex
  11. except ImportError:
  12. regex = None
  13. import re
  14. ###{standalone
  15. def _wrap_lexer(lexer_class):
  16. future_interface = getattr(lexer_class, '__future_interface__', False)
  17. if future_interface:
  18. return lexer_class
  19. else:
  20. class CustomLexerWrapper(Lexer):
  21. def __init__(self, lexer_conf):
  22. self.lexer = lexer_class(lexer_conf)
  23. def lex(self, lexer_state, parser_state):
  24. return self.lexer.lex(lexer_state.text)
  25. return CustomLexerWrapper
  26. class MakeParsingFrontend:
  27. def __init__(self, parser_type, lexer_type):
  28. self.parser_type = parser_type
  29. self.lexer_type = lexer_type
  30. def __call__(self, lexer_conf, parser_conf, options):
  31. assert isinstance(lexer_conf, LexerConf)
  32. assert isinstance(parser_conf, ParserConf)
  33. parser_conf.parser_type = self.parser_type
  34. lexer_conf.lexer_type = self.lexer_type
  35. return ParsingFrontend(lexer_conf, parser_conf, options)
  36. @classmethod
  37. def deserialize(cls, data, memo, lexer_conf, callbacks, options):
  38. parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
  39. parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
  40. parser_conf.callbacks = callbacks
  41. return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
  42. class ParsingFrontend(Serialize):
  43. __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser', 'options'
  44. def __init__(self, lexer_conf, parser_conf, options, parser=None):
  45. self.parser_conf = parser_conf
  46. self.lexer_conf = lexer_conf
  47. self.options = options
  48. # Set-up parser
  49. if parser: # From cache
  50. self.parser = parser
  51. else:
  52. create_parser = {
  53. 'lalr': create_lalr_parser,
  54. 'earley': create_earley_parser,
  55. 'cyk': CYK_FrontEnd,
  56. }[parser_conf.parser_type]
  57. self.parser = create_parser(lexer_conf, parser_conf, options)
  58. # Set-up lexer
  59. lexer_type = lexer_conf.lexer_type
  60. self.skip_lexer = False
  61. if lexer_type in ('dynamic', 'dynamic_complete'):
  62. assert lexer_conf.postlex is None
  63. self.skip_lexer = True
  64. return
  65. try:
  66. create_lexer = {
  67. 'standard': create_traditional_lexer,
  68. 'contextual': create_contextual_lexer,
  69. }[lexer_type]
  70. except KeyError:
  71. assert issubclass(lexer_type, Lexer), lexer_type
  72. self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
  73. else:
  74. self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex)
  75. if lexer_conf.postlex:
  76. self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
  77. def _verify_start(self, start=None):
  78. if start is None:
  79. start_decls = self.parser_conf.start
  80. if len(start_decls) > 1:
  81. raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
  82. start ,= start_decls
  83. elif start not in self.parser_conf.start:
  84. raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
  85. return start
  86. def parse(self, text, start=None, on_error=None):
  87. chosen_start = self._verify_start(start)
  88. stream = text if self.skip_lexer else LexerThread(self.lexer, text)
  89. kw = {} if on_error is None else {'on_error': on_error}
  90. return self.parser.parse(stream, chosen_start, **kw)
  91. def parse_interactive(self, text=None, start=None):
  92. chosen_start = self._verify_start(start)
  93. if self.parser_conf.parser_type != 'lalr':
  94. raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
  95. stream = text if self.skip_lexer else LexerThread(self.lexer, text)
  96. return self.parser.parse_interactive(stream, chosen_start)
  97. def get_frontend(parser, lexer):
  98. assert_config(parser, ('lalr', 'earley', 'cyk'))
  99. if not isinstance(lexer, type): # not custom lexer?
  100. expected = {
  101. 'lalr': ('standard', 'contextual'),
  102. 'earley': ('standard', 'dynamic', 'dynamic_complete'),
  103. 'cyk': ('standard', ),
  104. }[parser]
  105. assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
  106. return MakeParsingFrontend(parser, lexer)
  107. def _get_lexer_callbacks(transformer, terminals):
  108. result = {}
  109. for terminal in terminals:
  110. callback = getattr(transformer, terminal.name, None)
  111. if callback is not None:
  112. result[terminal.name] = callback
  113. return result
  114. class PostLexConnector:
  115. def __init__(self, lexer, postlexer):
  116. self.lexer = lexer
  117. self.postlexer = postlexer
  118. def make_lexer_state(self, text):
  119. return self.lexer.make_lexer_state(text)
  120. def lex(self, lexer_state, parser_state):
  121. i = self.lexer.lex(lexer_state, parser_state)
  122. return self.postlexer.process(i)
  123. def create_traditional_lexer(lexer_conf, parser, postlex):
  124. return TraditionalLexer(lexer_conf)
  125. def create_contextual_lexer(lexer_conf, parser, postlex):
  126. states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
  127. always_accept = postlex.always_accept if postlex else ()
  128. return ContextualLexer(lexer_conf, states, always_accept=always_accept)
  129. def create_lalr_parser(lexer_conf, parser_conf, options=None):
  130. debug = options.debug if options else False
  131. return LALR_Parser(parser_conf, debug=debug)
  132. create_earley_parser = NotImplemented
  133. CYK_FrontEnd = NotImplemented
  134. ###}
  135. class EarleyRegexpMatcher:
  136. def __init__(self, lexer_conf):
  137. self.regexps = {}
  138. for t in lexer_conf.terminals:
  139. if t.priority != 1:
  140. raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
  141. regexp = t.pattern.to_regexp()
  142. try:
  143. width = get_regexp_width(regexp)[0]
  144. except ValueError:
  145. raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
  146. else:
  147. if width == 0:
  148. raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
  149. if lexer_conf.use_bytes:
  150. regexp = regexp.encode('utf-8')
  151. self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
  152. def match(self, term, text, index=0):
  153. return self.regexps[term.name].match(text, index)
  154. def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
  155. earley_matcher = EarleyRegexpMatcher(lexer_conf)
  156. return xearley.Parser(parser_conf, earley_matcher.match, ignore=lexer_conf.ignore, **kw)
  157. def _match_earley_basic(term, token):
  158. return term.name == token.type
  159. def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw):
  160. return earley.Parser(parser_conf, _match_earley_basic, **kw)
  161. def create_earley_parser(lexer_conf, parser_conf, options):
  162. resolve_ambiguity = options.ambiguity == 'resolve'
  163. debug = options.debug if options else False
  164. tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
  165. extra = {}
  166. if lexer_conf.lexer_type == 'dynamic':
  167. f = create_earley_parser__dynamic
  168. elif lexer_conf.lexer_type == 'dynamic_complete':
  169. extra['complete_lex'] =True
  170. f = create_earley_parser__dynamic
  171. else:
  172. f = create_earley_parser__basic
  173. return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
  174. class CYK_FrontEnd:
  175. def __init__(self, lexer_conf, parser_conf, options=None):
  176. self._analysis = GrammarAnalyzer(parser_conf)
  177. self.parser = cyk.Parser(parser_conf.rules)
  178. self.callbacks = parser_conf.callbacks
  179. def parse(self, lexer_thread, start):
  180. tokens = list(lexer_thread.lex(None))
  181. tree = self.parser.parse(tokens, start)
  182. return self._transform(tree)
  183. def _transform(self, tree):
  184. subtrees = list(tree.iter_subtrees())
  185. for subtree in subtrees:
  186. subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children]
  187. return self._apply_callback(tree)
  188. def _apply_callback(self, tree):
  189. return self.callbacks[tree.rule](tree.children)