This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

188 rader
6.3 KiB

  1. import re
  2. import sre_parse
  3. from .lexer import Lexer, ContextualLexer, Token
  4. from .common import is_terminal, GrammarError, ParserConf
  5. from .parsers import lalr_parser, earley, nearley
  6. from .parsers.grammar_analysis import Rule
  7. from .tree import Transformer
  8. class WithLexer:
  9. def __init__(self, lexer_conf):
  10. self.lexer_conf = lexer_conf
  11. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  12. def lex(self, text):
  13. stream = self.lexer.lex(text)
  14. if self.lexer_conf.postlex:
  15. return self.lexer_conf.postlex.process(stream)
  16. else:
  17. return stream
  18. class LALR(WithLexer):
  19. def __init__(self, lexer_conf, parser_conf):
  20. WithLexer.__init__(self, lexer_conf)
  21. self.parser_conf = parser_conf
  22. self.parser = lalr_parser.Parser(parser_conf)
  23. def parse(self, text):
  24. tokens = list(self.lex(text))
  25. return self.parser.parse(tokens)
  26. class LALR_ContextualLexer:
  27. def __init__(self, lexer_conf, parser_conf):
  28. self.lexer_conf = lexer_conf
  29. self.parser_conf = parser_conf
  30. self.parser = lalr_parser.Parser(parser_conf)
  31. d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()}
  32. self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore,
  33. always_accept=lexer_conf.postlex.always_accept
  34. if lexer_conf.postlex else ())
  35. def parse(self, text):
  36. tokens = self.lexer.lex(text)
  37. if self.lexer_conf.postlex:
  38. tokens = self.lexer_conf.postlex.process(tokens)
  39. return self.parser.parse(tokens, self.lexer.set_parser_state)
  40. class Nearley(WithLexer):
  41. def __init__(self, lexer_conf, parser_conf):
  42. WithLexer.__init__(self, lexer_conf)
  43. rules = [{'name':n,
  44. 'symbols': self._prepare_expansion(x),
  45. 'postprocess': getattr(parser_conf.callback, a)}
  46. for n,x,a in parser_conf.rules]
  47. self.parser = nearley.Parser(rules, parser_conf.start)
  48. def _prepare_expansion(self, expansion):
  49. return [(sym, None) if is_terminal(sym) else sym for sym in expansion]
  50. def parse(self, text):
  51. tokens = list(self.lex(text))
  52. res = self.parser.parse(tokens)
  53. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  54. return res[0]
  55. class Earley(WithLexer):
  56. def __init__(self, lexer_conf, parser_conf):
  57. WithLexer.__init__(self, lexer_conf)
  58. rules = [(n, self._prepare_expansion(x), a)
  59. for n,x,a in parser_conf.rules]
  60. self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  61. def _prepare_expansion(self, expansion):
  62. return [(sym,) if is_terminal(sym) else sym for sym in expansion]
  63. def parse(self, text):
  64. tokens = list(self.lex(text))
  65. res = self.parser.parse(tokens)
  66. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  67. return res[0]
  68. class Nearley_NoLex:
  69. def __init__(self, lexer_conf, parser_conf):
  70. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  71. rules = [{'name':n,
  72. 'symbols': list(self._prepare_expansion(x)),
  73. 'postprocess': getattr(parser_conf.callback, a)}
  74. for n,x,a in parser_conf.rules]
  75. self.parser = nearley.Parser(rules, parser_conf.start)
  76. def _prepare_expansion(self, expansion):
  77. for sym in expansion:
  78. if is_terminal(sym):
  79. regexp = self.token_by_name[sym].to_regexp()
  80. width = sre_parse.parse(regexp).getwidth()
  81. if not width == (1,1):
  82. raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
  83. yield sym, re.compile(regexp)
  84. else:
  85. yield sym
  86. def parse(self, text):
  87. res = self.parser.parse(text)
  88. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  89. return res[0]
  90. class Earley_NoLex:
  91. def __init__(self, lexer_conf, parser_conf):
  92. self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)}
  93. rules = []
  94. for name, exp, alias in parser_conf.rules:
  95. name = self.tokens_to_convert.get(name, name)
  96. exp = [self.tokens_to_convert.get(x, x) for x in exp]
  97. rules.append((name, exp, alias))
  98. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  99. rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules]
  100. self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))
  101. def _prepare_expansion(self, expansion):
  102. for sym in expansion:
  103. if is_terminal(sym):
  104. regexp = self.token_by_name[sym].pattern.to_regexp()
  105. width = sre_parse.parse(regexp).getwidth()
  106. if not width == (1,1):
  107. raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
  108. yield (re.compile(regexp).match,)
  109. else:
  110. yield sym
  111. def parse(self, text):
  112. res = self.parser.parse(text)
  113. assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
  114. res = res[0]
  115. class RestoreTokens(Transformer):
  116. pass
  117. for t in self.tokens_to_convert:
  118. setattr(RestoreTokens, t, ''.join)
  119. res = RestoreTokens().transform(res)
  120. return res
  121. def get_frontend(parser, lexer):
  122. if parser=='lalr':
  123. if lexer is None:
  124. raise ValueError('The LALR parser requires use of a lexer')
  125. elif lexer == 'standard':
  126. return LALR
  127. elif lexer == 'contextual':
  128. return LALR_ContextualLexer
  129. else:
  130. raise ValueError('Unknown lexer: %s' % lexer)
  131. elif parser=='earley':
  132. if lexer is None:
  133. return Earley_NoLex
  134. elif lexer=='standard':
  135. return Earley
  136. elif lexer=='contextual':
  137. raise ValueError('The Earley parser does not support the contextual parser')
  138. else:
  139. raise ValueError('Unknown lexer: %s' % lexer)
  140. else:
  141. raise ValueError('Unknown parser: %s' % parser)