This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

156 rader
5.3 KiB

  1. import re
  2. import sre_parse
  3. from .lexer import Lexer, ContextualLexer, Token
  4. from .common import is_terminal, GrammarError, ParserConf
  5. from .parsers import lalr_parser, earley, xearley, resolve_ambig
  6. class WithLexer:
  7. def init_traditional_lexer(self, lexer_conf):
  8. self.lexer_conf = lexer_conf
  9. self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
  10. def init_contextual_lexer(self, lexer_conf, parser_conf):
  11. self.lexer_conf = lexer_conf
  12. d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
  13. always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
  14. self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)
  15. def lex(self, text):
  16. stream = self.lexer.lex(text)
  17. if self.lexer_conf.postlex:
  18. return self.lexer_conf.postlex.process(stream)
  19. else:
  20. return stream
  21. class LALR(WithLexer):
  22. def __init__(self, lexer_conf, parser_conf, options=None):
  23. self.parser = lalr_parser.Parser(parser_conf)
  24. self.init_traditional_lexer(lexer_conf)
  25. def parse(self, text):
  26. token_stream = self.lex(text)
  27. return self.parser.parse(token_stream)
  28. class LALR_ContextualLexer(WithLexer):
  29. def __init__(self, lexer_conf, parser_conf, options=None):
  30. self.parser = lalr_parser.Parser(parser_conf)
  31. self.init_contextual_lexer(lexer_conf, parser_conf)
  32. def parse(self, text):
  33. token_stream = self.lex(text)
  34. return self.parser.parse(token_stream, self.lexer.set_parser_state)
  35. def get_ambiguity_resolver(options):
  36. if not options or options.ambiguity == 'resolve':
  37. return resolve_ambig.standard_resolve_ambig
  38. elif options.ambiguity == 'resolve__antiscore_sum':
  39. return resolve_ambig.antiscore_sum_resolve_ambig
  40. elif options.ambiguity == 'explicit':
  41. return None
  42. raise ValueError(options)
  43. def tokenize_text(text):
  44. line = 1
  45. col_start_pos = 0
  46. for i, ch in enumerate(text):
  47. if '\n' in ch:
  48. line += ch.count('\n')
  49. col_start_pos = i + ch.rindex('\n')
  50. yield Token('CHAR', ch, line=line, column=i - col_start_pos)
  51. class Earley_NoLex:
  52. def __init__(self, lexer_conf, parser_conf, options=None):
  53. self._prepare_match(lexer_conf)
  54. self.parser = earley.Parser(parser_conf, self.match,
  55. resolve_ambiguity=get_ambiguity_resolver(options))
  56. def match(self, term, text, index=0):
  57. return self.regexps[term].match(text, index)
  58. def _prepare_match(self, lexer_conf):
  59. self.regexps = {}
  60. for t in lexer_conf.tokens:
  61. regexp = t.pattern.to_regexp()
  62. width = sre_parse.parse(regexp).getwidth()
  63. if width != (1,1):
  64. raise GrammarError('Scanless parsing (lexer=None) requires all tokens to have a width of 1 (terminal %s: %s is %s)' % (sym, regexp, width))
  65. self.regexps[t.name] = re.compile(regexp)
  66. def parse(self, text):
  67. token_stream = tokenize_text(text)
  68. return self.parser.parse(token_stream)
  69. class Earley(WithLexer):
  70. def __init__(self, lexer_conf, parser_conf, options=None):
  71. self.init_traditional_lexer(lexer_conf)
  72. self.parser = earley.Parser(parser_conf, self.match,
  73. resolve_ambiguity=get_ambiguity_resolver(options))
  74. def match(self, term, token):
  75. return term == token.type
  76. def parse(self, text):
  77. tokens = self.lex(text)
  78. return self.parser.parse(tokens)
  79. class XEarley:
  80. def __init__(self, lexer_conf, parser_conf, options=None):
  81. self.token_by_name = {t.name:t for t in lexer_conf.tokens}
  82. self._prepare_match(lexer_conf)
  83. self.parser = xearley.Parser(parser_conf,
  84. self.match,
  85. resolve_ambiguity=get_ambiguity_resolver(options),
  86. ignore=lexer_conf.ignore,
  87. predict_all=options.earley__predict_all
  88. )
  89. def match(self, term, text, index=0):
  90. return self.regexps[term].match(text, index)
  91. def _prepare_match(self, lexer_conf):
  92. self.regexps = {}
  93. for t in lexer_conf.tokens:
  94. regexp = t.pattern.to_regexp()
  95. assert sre_parse.parse(regexp).getwidth()
  96. self.regexps[t.name] = re.compile(regexp)
  97. def parse(self, text):
  98. return self.parser.parse(text)
  99. def get_frontend(parser, lexer):
  100. if parser=='lalr':
  101. if lexer is None:
  102. raise ValueError('The LALR parser requires use of a lexer')
  103. elif lexer == 'standard':
  104. return LALR
  105. elif lexer == 'contextual':
  106. return LALR_ContextualLexer
  107. else:
  108. raise ValueError('Unknown lexer: %s' % lexer)
  109. elif parser=='earley':
  110. if lexer is None:
  111. return Earley_NoLex
  112. elif lexer=='standard':
  113. return Earley
  114. elif lexer=='dynamic':
  115. return XEarley
  116. elif lexer=='contextual':
  117. raise ValueError('The Earley parser does not support the contextual parser')
  118. else:
  119. raise ValueError('Unknown lexer: %s' % lexer)
  120. else:
  121. raise ValueError('Unknown parser: %s' % parser)