This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

109 lines
3.7 KiB

  1. import re
  2. import sre_parse
  3. from .lexer import Lexer, ContextualLexer
  4. from .parsers.lalr_analysis import GrammarAnalyzer
  5. from .common import is_terminal, GrammarError
  6. from .parsers import lalr_parser, earley
  7. class WithLexer:
  8. def __init__(self, lexer_conf):
  9. self.lexer_conf = lexer_conf
  10. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  11. def lex(self, text):
  12. stream = self.lexer.lex(text)
  13. if self.lexer_conf.postlex:
  14. return self.lexer_conf.postlex.process(stream)
  15. else:
  16. return stream
  17. class LALR(WithLexer):
  18. def __init__(self, lexer_conf, parser_conf):
  19. WithLexer.__init__(self, lexer_conf)
  20. analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
  21. analyzer.analyze()
  22. self.parser = lalr_parser.Parser(analyzer, parser_conf.callback)
  23. def parse(self, text):
  24. tokens = list(self.lex(text))
  25. return self.parser.parse(tokens)
  26. class LALR_ContextualLexer:
  27. def __init__(self, lexer_conf, parser_conf):
  28. self.lexer_conf = lexer_conf
  29. self.parser_conf = parser_conf
  30. self.analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
  31. self.analyzer.analyze()
  32. d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()}
  33. self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore,
  34. always_accept=lexer_conf.postlex.always_accept
  35. if lexer_conf.postlex else ())
  36. def parse(self, text):
  37. parser = lalr_parser.Parser(self.analyzer, self.parser_conf.callback)
  38. tokens = self.lexer.lex(text, parser)
  39. if self.lexer_conf.postlex:
  40. tokens = self.lexer_conf.postlex.process(tokens)
  41. return parser.parse(tokens, True)
  42. class Earley(WithLexer):
  43. def __init__(self, lexer_conf, parser_conf):
  44. WithLexer.__init__(self, lexer_conf)
  45. rules = [{'name':n,
  46. 'symbols': list(self._prepare_expansion(x)),
  47. 'postprocess': getattr(parser_conf.callback, a)}
  48. for n,x,a in parser_conf.rules]
  49. self.parser = earley.Parser(rules, parser_conf.start)
  50. def _prepare_expansion(self, expansion):
  51. for sym in expansion:
  52. if is_terminal(sym):
  53. yield sym, None
  54. else:
  55. yield sym
  56. def parse(self, text):
  57. tokens = list(self.lex(text))
  58. res = self.parser.parse(tokens)
  59. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  60. return res[0]
  61. class Earley_NoLex:
  62. def __init__(self, lexer_conf, parser_conf):
  63. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  64. rules = [{'name':n,
  65. 'symbols': list(self._prepare_expansion(x)),
  66. 'postprocess': getattr(parser_conf.callback, a)}
  67. for n,x,a in parser_conf.rules]
  68. self.parser = earley.Parser(rules, parser_conf.start)
  69. def _prepare_expansion(self, expansion):
  70. for sym in expansion:
  71. if is_terminal(sym):
  72. regexp = self.token_by_name[sym].to_regexp()
  73. width = sre_parse.parse(regexp).getwidth()
  74. if not width == (1,1):
  75. raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width))
  76. yield sym, re.compile(regexp)
  77. else:
  78. yield sym
  79. def parse(self, text):
  80. res = self.parser.parse(text)
  81. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  82. return res[0]
  83. ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex, 'lalr_contextual_lexer': LALR_ContextualLexer }