This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

190 lignes
6.5 KiB

  1. import re
  2. import sre_parse
  3. from .lexer import Lexer, ContextualLexer, Token
  4. from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Terminal_Token
  5. from .parsers import lalr_parser, old_earley, nearley, earley
  6. from .tree import Transformer
  7. class WithLexer:
  8. def __init__(self, lexer_conf):
  9. self.lexer_conf = lexer_conf
  10. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  11. def lex(self, text):
  12. stream = self.lexer.lex(text)
  13. if self.lexer_conf.postlex:
  14. return self.lexer_conf.postlex.process(stream)
  15. else:
  16. return stream
  17. class LALR(WithLexer):
  18. def __init__(self, lexer_conf, parser_conf):
  19. WithLexer.__init__(self, lexer_conf)
  20. self.parser_conf = parser_conf
  21. self.parser = lalr_parser.Parser(parser_conf)
  22. def parse(self, text):
  23. tokens = list(self.lex(text))
  24. return self.parser.parse(tokens)
  25. class LALR_ContextualLexer:
  26. def __init__(self, lexer_conf, parser_conf):
  27. self.lexer_conf = lexer_conf
  28. self.parser_conf = parser_conf
  29. self.parser = lalr_parser.Parser(parser_conf)
  30. d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()}
  31. always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
  32. self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)
  33. def parse(self, text):
  34. tokens = self.lexer.lex(text)
  35. if self.lexer_conf.postlex:
  36. tokens = self.lexer_conf.postlex.process(tokens)
  37. return self.parser.parse(tokens, self.lexer.set_parser_state)
  38. class Nearley(WithLexer):
  39. def __init__(self, lexer_conf, parser_conf):
  40. WithLexer.__init__(self, lexer_conf)
  41. rules = [{'name':n,
  42. 'symbols': self._prepare_expansion(x),
  43. 'postprocess': getattr(parser_conf.callback, a)}
  44. for n,x,a in parser_conf.rules]
  45. self.parser = nearley.Parser(rules, parser_conf.start)
  46. def _prepare_expansion(self, expansion):
  47. return [(sym, None) if is_terminal(sym) else sym for sym in expansion]
  48. def parse(self, text):
  49. tokens = list(self.lex(text))
  50. res = self.parser.parse(tokens)
  51. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  52. return res[0]
  53. class OldEarley(WithLexer):
  54. def __init__(self, lexer_conf, parser_conf):
  55. WithLexer.__init__(self, lexer_conf)
  56. rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules]
  57. self.parser = old_earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  58. def _prepare_expansion(self, expansion):
  59. return [(sym,) if is_terminal(sym) else sym for sym in expansion]
  60. def parse(self, text):
  61. tokens = list(self.lex(text))
  62. res = self.parser.parse(tokens)
  63. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  64. return res[0]
  65. def tokenize_text(text):
  66. new_text = []
  67. line = 1
  68. col_start_pos = 0
  69. for i, ch in enumerate(text):
  70. if '\n' in ch:
  71. line += ch.count('\n')
  72. col_start_pos = i + ch.rindex('\n')
  73. new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos))
  74. return new_text
  75. class OldEarley_NoLex:
  76. def __init__(self, lexer_conf, parser_conf):
  77. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  78. rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules]
  79. self.parser = old_earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  80. def _prepare_expansion(self, expansion):
  81. for sym in expansion:
  82. if is_terminal(sym):
  83. regexp = self.token_by_name[sym].pattern.to_regexp()
  84. width = sre_parse.parse(regexp).getwidth()
  85. if width != (1,1):
  86. raise GrammarError('Scanless parsing (lexer=None) requires all tokens to have a width of 1 (terminal %s: %s is %s)' % (sym, regexp, width))
  87. yield (re.compile(regexp).match, regexp)
  88. else:
  89. yield sym
  90. def parse(self, text):
  91. new_text = tokenize_text(text)
  92. res = self.parser.parse(new_text)
  93. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  94. return res[0]
  95. class Earley_NoLex:
  96. def __init__(self, lexer_conf, parser_conf):
  97. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  98. rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules]
  99. self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback)
  100. def _prepare_expansion(self, expansion):
  101. for sym in expansion:
  102. if is_terminal(sym):
  103. regexp = self.token_by_name[sym].pattern.to_regexp()
  104. width = sre_parse.parse(regexp).getwidth()
  105. if width != (1,1):
  106. raise GrammarError('Scanless parsing (lexer=None) requires all tokens to have a width of 1 (terminal %s: %s is %s)' % (sym, regexp, width))
  107. yield Terminal_Regexp(regexp)
  108. else:
  109. yield sym
  110. def parse(self, text):
  111. new_text = tokenize_text(text)
  112. return self.parser.parse(new_text)
  113. class Earley(WithLexer):
  114. def __init__(self, lexer_conf, parser_conf):
  115. WithLexer.__init__(self, lexer_conf)
  116. rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules]
  117. self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback)
  118. def _prepare_expansion(self, expansion):
  119. return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion]
  120. def parse(self, text):
  121. tokens = list(self.lex(text))
  122. return self.parser.parse(tokens)
  123. def get_frontend(parser, lexer):
  124. if parser=='lalr':
  125. if lexer is None:
  126. raise ValueError('The LALR parser requires use of a lexer')
  127. elif lexer == 'standard':
  128. return LALR
  129. elif lexer == 'contextual':
  130. return LALR_ContextualLexer
  131. else:
  132. raise ValueError('Unknown lexer: %s' % lexer)
  133. elif parser=='earley':
  134. if lexer is None:
  135. return Earley_NoLex
  136. elif lexer=='standard':
  137. return Earley
  138. elif lexer=='contextual':
  139. raise ValueError('The Earley parser does not support the contextual parser')
  140. else:
  141. raise ValueError('Unknown lexer: %s' % lexer)
  142. else:
  143. raise ValueError('Unknown parser: %s' % parser)