This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

110 lines
3.8 KiB

  1. import re
  2. import sre_parse
  3. from .lexer import Lexer, ContextualLexer
  4. from .parsers.lalr_analysis import GrammarAnalyzer
  5. from .common import is_terminal, GrammarError
  6. from .parsers import lalr_parser, earley
  7. class WithLexer:
  8. def __init__(self, lexer_conf):
  9. self.lexer_conf = lexer_conf
  10. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  11. def lex(self, text):
  12. stream = self.lexer.lex(text)
  13. if self.lexer_conf.postlex:
  14. return self.lexer_conf.postlex.process(stream)
  15. else:
  16. return stream
  17. class LALR(WithLexer):
  18. def __init__(self, lexer_conf, parser_conf):
  19. WithLexer.__init__(self, lexer_conf)
  20. self.parser_conf = parser_conf
  21. analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
  22. analyzer.analyze()
  23. self.parser = lalr_parser.Parser(analyzer, parser_conf.callback)
  24. def parse(self, text):
  25. tokens = list(self.lex(text))
  26. return self.parser.parse(tokens)
  27. class LALR_ContextualLexer:
  28. def __init__(self, lexer_conf, parser_conf):
  29. self.lexer_conf = lexer_conf
  30. self.parser_conf = parser_conf
  31. self.analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
  32. self.analyzer.analyze()
  33. d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()}
  34. self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore,
  35. always_accept=lexer_conf.postlex.always_accept
  36. if lexer_conf.postlex else ())
  37. def parse(self, text):
  38. parser = lalr_parser.Parser(self.analyzer, self.parser_conf.callback)
  39. tokens = self.lexer.lex(text, parser)
  40. if self.lexer_conf.postlex:
  41. tokens = self.lexer_conf.postlex.process(tokens)
  42. return parser.parse(tokens, True)
  43. class Earley(WithLexer):
  44. def __init__(self, lexer_conf, parser_conf):
  45. WithLexer.__init__(self, lexer_conf)
  46. rules = [{'name':n,
  47. 'symbols': list(self._prepare_expansion(x)),
  48. 'postprocess': getattr(parser_conf.callback, a)}
  49. for n,x,a in parser_conf.rules]
  50. self.parser = earley.Parser(rules, parser_conf.start)
  51. def _prepare_expansion(self, expansion):
  52. for sym in expansion:
  53. if is_terminal(sym):
  54. yield sym, None
  55. else:
  56. yield sym
  57. def parse(self, text):
  58. tokens = list(self.lex(text))
  59. res = self.parser.parse(tokens)
  60. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  61. return res[0]
  62. class Earley_NoLex:
  63. def __init__(self, lexer_conf, parser_conf):
  64. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  65. rules = [{'name':n,
  66. 'symbols': list(self._prepare_expansion(x)),
  67. 'postprocess': getattr(parser_conf.callback, a)}
  68. for n,x,a in parser_conf.rules]
  69. self.parser = earley.Parser(rules, parser_conf.start)
  70. def _prepare_expansion(self, expansion):
  71. for sym in expansion:
  72. if is_terminal(sym):
  73. regexp = self.token_by_name[sym].to_regexp()
  74. width = sre_parse.parse(regexp).getwidth()
  75. if not width == (1,1):
  76. raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
  77. yield sym, re.compile(regexp)
  78. else:
  79. yield sym
  80. def parse(self, text):
  81. res = self.parser.parse(text)
  82. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  83. return res[0]
  84. ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex, 'lalr_contextual_lexer': LALR_ContextualLexer }