This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

153 linhas
5.1 KiB

  1. import re
  2. import sre_parse
  3. from .lexer import Lexer, ContextualLexer, Token
  4. from .common import is_terminal, GrammarError, ParserConf
  5. from .parsers import lalr_parser, earley, nearley
  6. from .parsers.grammar_analysis import Rule
  7. class WithLexer:
  8. def __init__(self, lexer_conf):
  9. self.lexer_conf = lexer_conf
  10. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  11. def lex(self, text):
  12. stream = self.lexer.lex(text)
  13. if self.lexer_conf.postlex:
  14. return self.lexer_conf.postlex.process(stream)
  15. else:
  16. return stream
  17. class LALR(WithLexer):
  18. def __init__(self, lexer_conf, parser_conf):
  19. WithLexer.__init__(self, lexer_conf)
  20. self.parser_conf = parser_conf
  21. self.parser = lalr_parser.Parser(parser_conf)
  22. def parse(self, text):
  23. tokens = list(self.lex(text))
  24. return self.parser.parse(tokens)
  25. class LALR_ContextualLexer:
  26. def __init__(self, lexer_conf, parser_conf):
  27. self.lexer_conf = lexer_conf
  28. self.parser_conf = parser_conf
  29. self.parser = lalr_parser.Parser(parser_conf)
  30. d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()}
  31. self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore,
  32. always_accept=lexer_conf.postlex.always_accept
  33. if lexer_conf.postlex else ())
  34. def parse(self, text):
  35. tokens = self.lexer.lex(text)
  36. if self.lexer_conf.postlex:
  37. tokens = self.lexer_conf.postlex.process(tokens)
  38. return self.parser.parse(tokens, self.lexer.set_parser_state)
  39. class Nearley(WithLexer):
  40. def __init__(self, lexer_conf, parser_conf):
  41. WithLexer.__init__(self, lexer_conf)
  42. rules = [{'name':n,
  43. 'symbols': self._prepare_expansion(x),
  44. 'postprocess': getattr(parser_conf.callback, a)}
  45. for n,x,a in parser_conf.rules]
  46. self.parser = nearley.Parser(rules, parser_conf.start)
  47. def _prepare_expansion(self, expansion):
  48. return [(sym, None) if is_terminal(sym) else sym for sym in expansion]
  49. def parse(self, text):
  50. tokens = list(self.lex(text))
  51. res = self.parser.parse(tokens)
  52. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  53. return res[0]
  54. class Earley(WithLexer):
  55. def __init__(self, lexer_conf, parser_conf):
  56. WithLexer.__init__(self, lexer_conf)
  57. rules = [(n, self._prepare_expansion(x), a)
  58. for n,x,a in parser_conf.rules]
  59. self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  60. def _prepare_expansion(self, expansion):
  61. return [(sym,) if is_terminal(sym) else sym for sym in expansion]
  62. def parse(self, text):
  63. tokens = list(self.lex(text))
  64. res = self.parser.parse(tokens)
  65. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  66. return res[0]
  67. class Nearley_NoLex:
  68. def __init__(self, lexer_conf, parser_conf):
  69. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  70. rules = [{'name':n,
  71. 'symbols': list(self._prepare_expansion(x)),
  72. 'postprocess': getattr(parser_conf.callback, a)}
  73. for n,x,a in parser_conf.rules]
  74. self.parser = nearley.Parser(rules, parser_conf.start)
  75. def _prepare_expansion(self, expansion):
  76. for sym in expansion:
  77. if is_terminal(sym):
  78. regexp = self.token_by_name[sym].to_regexp()
  79. width = sre_parse.parse(regexp).getwidth()
  80. if not width == (1,1):
  81. raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
  82. yield sym, re.compile(regexp)
  83. else:
  84. yield sym
  85. def parse(self, text):
  86. res = self.parser.parse(text)
  87. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  88. return res[0]
  89. class Earley_NoLex:
  90. def __init__(self, lexer_conf, parser_conf):
  91. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  92. rules = [(n, list(self._prepare_expansion(x)), a)
  93. for n,x,a in parser_conf.rules]
  94. self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  95. def _prepare_expansion(self, expansion):
  96. for sym in expansion:
  97. if is_terminal(sym):
  98. regexp = self.token_by_name[sym].to_regexp()
  99. width = sre_parse.parse(regexp).getwidth()
  100. if not width == (1,1):
  101. raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
  102. yield (re.compile(regexp).match,)
  103. else:
  104. yield sym
  105. def parse(self, text):
  106. res = self.parser.parse(text)
  107. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  108. return res[0]
  109. ENGINE_DICT = {
  110. 'lalr': LALR,
  111. 'earley': Earley,
  112. 'earley_nolex': Earley_NoLex,
  113. 'lalr_contextual_lexer': LALR_ContextualLexer
  114. }