This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

144 lines
4.7 KiB

  1. from ..common import ParseError, UnexpectedToken, is_terminal
  2. from .grammar_analysis import GrammarAnalyzer
  3. # is_terminal = callable
  4. class Item:
  5. def __init__(self, rule, ptr, start, data):
  6. self.rule = rule
  7. self.ptr = ptr
  8. self.start = start
  9. self.data = data
  10. @property
  11. def expect(self):
  12. return self.rule.expansion[self.ptr]
  13. @property
  14. def is_complete(self):
  15. return self.ptr == len(self.rule.expansion)
  16. def advance(self, data):
  17. return Item(self.rule, self.ptr+1, self.start, self.data + [data])
  18. def __eq__(self, other):
  19. return self.start == other.start and self.ptr == other.ptr and self.rule == other.rule
  20. def __hash__(self):
  21. return hash((self.rule, self.ptr, self.start))
  22. class Parser:
  23. def __init__(self, parser_conf):
  24. self.analysis = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
  25. self.start = parser_conf.start
  26. self.postprocess = {}
  27. self.predictions = {}
  28. for rule in self.analysis.rules:
  29. if rule.origin != '$root': # XXX kinda ugly
  30. self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
  31. self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)]
  32. def parse(self, stream):
  33. # Define parser functions
  34. def predict(symbol, i):
  35. assert not is_terminal(symbol), symbol
  36. return {Item(rule, index, i, []) for rule, index in self.predictions[symbol]}
  37. def complete(item, table):
  38. #item.data = (item.rule_ptr.rule, item.data)
  39. item.data = self.postprocess[item.rule](item.data)
  40. return {old_item.advance(item.data) for old_item in table[item.start]
  41. if not old_item.is_complete and old_item.expect == item.rule.origin}
  42. def process_column(i, term):
  43. assert i == len(table)-1
  44. cur_set = table[i]
  45. next_set = set()
  46. to_process = cur_set
  47. while to_process:
  48. new_items = set()
  49. for item in to_process:
  50. if item.is_complete:
  51. new_items |= complete(item, table)
  52. else:
  53. if is_terminal(item.expect):
  54. # scan
  55. match = item.expect[0](term) if callable(item.expect[0]) else item.expect[0] == term
  56. if match:
  57. next_set.add(item.advance(stream[i]))
  58. else:
  59. if item.ptr: # part of an already predicted batch
  60. new_items |= predict(item.expect, i)
  61. to_process = new_items - cur_set # TODO: is this precaution necessary?
  62. cur_set |= to_process
  63. if not next_set and term != '$end':
  64. expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete])
  65. raise UnexpectedToken(term, expect, stream, i)
  66. table.append(next_set)
  67. # Main loop starts
  68. table = [predict(self.start, 0)]
  69. for i, char in enumerate(stream):
  70. process_column(i, char.type)
  71. process_column(len(stream), '$end')
  72. # Parse ended. Now build a parse tree
  73. solutions = [n.data for n in table[len(stream)]
  74. if n.is_complete and n.rule.origin==self.start and n.start==0]
  75. if not solutions:
  76. raise ParseError('Incomplete parse: Could not find a solution to input')
  77. return solutions
  78. #return map(self.reduce_solution, solutions)
  79. def reduce_solution(self, solution):
  80. rule, children = solution
  81. children = [self.reduce_solution(c) if isinstance(c, tuple) else c for c in children]
  82. return self.postprocess[rule](children)
  83. from ..common import ParserConf
  84. # A = 'A'.__eq__
  85. # rules = [
  86. # ('a', ['a', A], None),
  87. # ('a', ['a', A, 'a'], None),
  88. # ('a', ['a', A, A, 'a'], None),
  89. # ('a', [A], None),
  90. # ]
  91. # p = Parser(ParserConf(rules, None, 'a'))
  92. # for x in p.parse('AAAA'):
  93. # print '->'
  94. # print x.pretty()
  95. # import re
  96. # NUM = re.compile('[0-9]').match
  97. # ADD = re.compile('[+-]').match
  98. # MUL = re.compile('[*/]').match
  99. # rules = [
  100. # ('sum', ['sum', ADD, 'product'], None),
  101. # ('sum', ['product'], None),
  102. # ('product', ['product', MUL, 'factor'], None),
  103. # ('product', ['factor'], None),
  104. # ('factor', ['('.__eq__, 'sum', ')'.__eq__], None),
  105. # ('factor', ['number'], None),
  106. # ('number', [NUM, 'number'], None),
  107. # ('number', [NUM], None),
  108. # ]
  109. # p = Parser(ParserConf(rules, None, 'sum'))
  110. # # print p.parse('NALNMNANR')
  111. # print p.parse('1+(2*3-4)')[0].pretty()