This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

188 рядки
6.3 KiB

  1. import re
  2. import sre_parse
  3. from .lexer import Lexer, ContextualLexer, Token
  4. from .common import is_terminal, GrammarError, ParserConf
  5. from .parsers import lalr_parser, earley, nearley
  6. from .parsers.grammar_analysis import Rule
  7. from .tree import Transformer
  8. class WithLexer:
  9. def __init__(self, lexer_conf):
  10. self.lexer_conf = lexer_conf
  11. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  12. def lex(self, text):
  13. stream = self.lexer.lex(text)
  14. if self.lexer_conf.postlex:
  15. return self.lexer_conf.postlex.process(stream)
  16. else:
  17. return stream
  18. class LALR(WithLexer):
  19. def __init__(self, lexer_conf, parser_conf):
  20. WithLexer.__init__(self, lexer_conf)
  21. self.parser_conf = parser_conf
  22. self.parser = lalr_parser.Parser(parser_conf)
  23. def parse(self, text):
  24. tokens = list(self.lex(text))
  25. return self.parser.parse(tokens)
  26. class LALR_ContextualLexer:
  27. def __init__(self, lexer_conf, parser_conf):
  28. self.lexer_conf = lexer_conf
  29. self.parser_conf = parser_conf
  30. self.parser = lalr_parser.Parser(parser_conf)
  31. d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()}
  32. self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore,
  33. always_accept=lexer_conf.postlex.always_accept
  34. if lexer_conf.postlex else ())
  35. def parse(self, text):
  36. tokens = self.lexer.lex(text)
  37. if self.lexer_conf.postlex:
  38. tokens = self.lexer_conf.postlex.process(tokens)
  39. return self.parser.parse(tokens, self.lexer.set_parser_state)
  40. class Nearley(WithLexer):
  41. def __init__(self, lexer_conf, parser_conf):
  42. WithLexer.__init__(self, lexer_conf)
  43. rules = [{'name':n,
  44. 'symbols': self._prepare_expansion(x),
  45. 'postprocess': getattr(parser_conf.callback, a)}
  46. for n,x,a in parser_conf.rules]
  47. self.parser = nearley.Parser(rules, parser_conf.start)
  48. def _prepare_expansion(self, expansion):
  49. return [(sym, None) if is_terminal(sym) else sym for sym in expansion]
  50. def parse(self, text):
  51. tokens = list(self.lex(text))
  52. res = self.parser.parse(tokens)
  53. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  54. return res[0]
  55. class Earley(WithLexer):
  56. def __init__(self, lexer_conf, parser_conf):
  57. WithLexer.__init__(self, lexer_conf)
  58. rules = [(n, self._prepare_expansion(x), a)
  59. for n,x,a in parser_conf.rules]
  60. self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  61. def _prepare_expansion(self, expansion):
  62. return [(sym,) if is_terminal(sym) else sym for sym in expansion]
  63. def parse(self, text):
  64. tokens = list(self.lex(text))
  65. res = self.parser.parse(tokens)
  66. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  67. return res[0]
  68. class Nearley_NoLex:
  69. def __init__(self, lexer_conf, parser_conf):
  70. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  71. rules = [{'name':n,
  72. 'symbols': list(self._prepare_expansion(x)),
  73. 'postprocess': getattr(parser_conf.callback, a)}
  74. for n,x,a in parser_conf.rules]
  75. self.parser = nearley.Parser(rules, parser_conf.start)
  76. def _prepare_expansion(self, expansion):
  77. for sym in expansion:
  78. if is_terminal(sym):
  79. regexp = self.token_by_name[sym].to_regexp()
  80. width = sre_parse.parse(regexp).getwidth()
  81. if not width == (1,1):
  82. raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
  83. yield sym, re.compile(regexp)
  84. else:
  85. yield sym
  86. def parse(self, text):
  87. res = self.parser.parse(text)
  88. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  89. return res[0]
  90. class Earley_NoLex:
  91. def __init__(self, lexer_conf, parser_conf):
  92. self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)}
  93. rules = []
  94. for name, exp, alias in parser_conf.rules:
  95. name = self.tokens_to_convert.get(name, name)
  96. exp = [self.tokens_to_convert.get(x, x) for x in exp]
  97. rules.append((name, exp, alias))
  98. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  99. rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules]
  100. self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  101. def _prepare_expansion(self, expansion):
  102. for sym in expansion:
  103. if is_terminal(sym):
  104. regexp = self.token_by_name[sym].pattern.to_regexp()
  105. width = sre_parse.parse(regexp).getwidth()
  106. if not width == (1,1):
  107. raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
  108. yield (re.compile(regexp).match,)
  109. else:
  110. yield sym
  111. def parse(self, text):
  112. res = self.parser.parse(text)
  113. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  114. res = res[0]
  115. class RestoreTokens(Transformer):
  116. pass
  117. for t in self.tokens_to_convert:
  118. setattr(RestoreTokens, t, ''.join)
  119. res = RestoreTokens().transform(res)
  120. return res
  121. def get_frontend(parser, lexer):
  122. if parser=='lalr':
  123. if lexer is None:
  124. raise ValueError('The LALR parser requires use of a lexer')
  125. elif lexer == 'standard':
  126. return LALR
  127. elif lexer == 'contextual':
  128. return LALR_ContextualLexer
  129. else:
  130. raise ValueError('Unknown lexer: %s' % lexer)
  131. elif parser=='earley':
  132. if lexer is None:
  133. return Earley_NoLex
  134. elif lexer=='standard':
  135. return Earley
  136. elif lexer=='contextual':
  137. raise ValueError('The Earley parser does not support the contextual parser')
  138. else:
  139. raise ValueError('Unknown lexer: %s' % lexer)
  140. else:
  141. raise ValueError('Unknown parser: %s' % parser)