This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

132 lines
3.6 KiB

  1. from ..common import ParseError, UnexpectedToken, is_terminal
  2. from lalr_analysis import GrammarAnalyzer
  3. from ..tree import Tree
  4. class Item:
  5. def __init__(self, rule_ptr, start, data):
  6. self.rule_ptr = rule_ptr
  7. self.start = start
  8. self.data = data
  9. @property
  10. def expect(self):
  11. return self.rule_ptr.next
  12. @property
  13. def is_complete(self):
  14. return self.rule_ptr.is_satisfied
  15. @property
  16. def name(self):
  17. return self.rule_ptr.rule.origin
  18. def advance(self, data):
  19. return Item(self.rule_ptr.advance(self.expect), self.start, self.data + [data])
  20. def __eq__(self, other):
  21. return self.rule_ptr == other.rule_ptr and self.start == other.start
  22. def __hash__(self):
  23. return hash((self.rule_ptr, self.start))
  24. def __repr__(self):
  25. return '%s (%s)' % (self.rule_ptr, self.start)
  26. class Parser:
  27. def __init__(self, rules, start):
  28. self.analyzer = GrammarAnalyzer(rules, start)
  29. self.start = start
  30. def parse(self, stream):
  31. # Define parser functions
  32. def predict(symbol, i):
  33. assert not is_terminal(symbol), symbol
  34. return {Item(rp, i, []) for rp in self.analyzer.expand_rule(symbol)}
  35. def scan(item, inp):
  36. if item.expect == inp: # TODO Do a smarter match, i.e. regexp
  37. return {item.advance(inp)}
  38. else:
  39. return set()
  40. def complete(item, table):
  41. name = item.name
  42. item.data = Tree(name, item.data)
  43. return {old_item.advance(item.data) for old_item in table[item.start]
  44. if not old_item.is_complete and old_item.expect == name}
  45. def process_column(i, char):
  46. cur_set = table[-1]
  47. next_set = set()
  48. table.append(next_set)
  49. to_process = cur_set
  50. while to_process:
  51. new_items = set()
  52. for item in to_process:
  53. if item.is_complete:
  54. new_items |= complete(item, table)
  55. else:
  56. if is_terminal(item.expect):
  57. next_set |= scan(item, char)
  58. else:
  59. new_items |= predict(item.expect, i)
  60. to_process = new_items - cur_set
  61. cur_set |= to_process
  62. if not next_set and char != '$end':
  63. expect = filter(is_terminal, [i.expect for i in cur_set if not i.is_complete])
  64. raise UnexpectedToken(char, expect, stream, i)
  65. # Main loop starts
  66. table = [predict(self.start, 0)]
  67. for i, char in enumerate(stream):
  68. process_column(i, char)
  69. process_column(len(stream), '$end')
  70. # Parse ended. Now build a parse tree
  71. solutions = [n.data for n in table[len(stream)]
  72. if n.is_complete and n.name==self.start and n.start==0]
  73. if not solutions:
  74. raise ParseError('Incomplete parse: Could not find a solution to input')
  75. return solutions
  76. # rules = [
  77. # ('a', ['a', 'A']),
  78. # ('a', ['a', 'A', 'a']),
  79. # ('a', ['a', 'A', 'A', 'a']),
  80. # ('a', ['A']),
  81. # ]
  82. # p = Parser(rules, 'a')
  83. # for x in p.parse('AAAA'):
  84. # print '->'
  85. # print x.pretty()
  86. # rules = [
  87. # ('sum', ['sum', "A", 'product']),
  88. # ('sum', ['product']),
  89. # ('product', ['product', "M", 'factor']),
  90. # ('product', ['factor']),
  91. # ('factor', ['L', 'sum', 'R']),
  92. # ('factor', ['number']),
  93. # ('number', ['N', 'number']),
  94. # ('number', ['N']),
  95. # ]
  96. # p = Parser(rules, 'sum')
  97. # print p.parse('NALNMNANR')