This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

81 line
2.5 KiB

  1. import re
  2. from .lexer import Lexer
  3. from .parsers.lalr_analysis import GrammarAnalyzer
  4. from .common import is_terminal
  5. from .parsers import lalr_parser, earley
  6. class WithLexer:
  7. def __init__(self, lexer_conf):
  8. self.lexer_conf = lexer_conf
  9. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  10. def lex(self, text):
  11. stream = self.lexer.lex(text)
  12. if self.lexer_conf.postlex:
  13. return self.lexer_conf.postlex.process(stream)
  14. else:
  15. return stream
  16. class LALR(WithLexer):
  17. def __init__(self, lexer_conf, parser_conf):
  18. WithLexer.__init__(self, lexer_conf)
  19. analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
  20. analyzer.analyze()
  21. self.parser = lalr_parser.Parser(analyzer, parser_conf.callback)
  22. def parse(self, text):
  23. tokens = list(self.lex(text))
  24. return self.parser.parse(tokens)
  25. class Earley(WithLexer):
  26. def __init__(self, lexer_conf, parser_conf):
  27. WithLexer.__init__(self, lexer_conf)
  28. rules = [{'name':n,
  29. 'symbols': list(self._prepare_expansion(x)),
  30. 'postprocess': getattr(parser_conf.callback, a)}
  31. for n,x,a in parser_conf.rules]
  32. self.parser = earley.Parser(rules, parser_conf.start)
  33. def _prepare_expansion(self, expansion):
  34. for sym in expansion:
  35. if is_terminal(sym):
  36. yield sym, None
  37. else:
  38. yield sym
  39. def parse(self, text):
  40. tokens = list(self.lex(text))
  41. res = self.parser.parse(tokens)
  42. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  43. return res[0]
  44. class Earley2:
  45. def __init__(self, lexer_conf, parser_conf):
  46. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  47. rules = [{'name':n,
  48. 'symbols': list(self._prepare_expansion(x)),
  49. 'postprocess': getattr(parser_conf.callback, a)}
  50. for n,x,a in parser_conf.rules]
  51. self.parser = earley.Parser(rules, parser_conf.start)
  52. def _prepare_expansion(self, expansion):
  53. for sym in expansion:
  54. if is_terminal(sym):
  55. yield sym, re.compile(self.token_by_name[sym].to_regexp())
  56. else:
  57. yield sym
  58. def parse(self, text):
  59. res = self.parser.parse(text)
  60. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  61. return res[0]
  62. ENGINE_DICT = { 'lalr': LALR, 'earley': Earley }