This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.

181 righe
5.8 KiB

  1. "This module implements an Earley Parser"
  2. # The algorithm keeps track of each state set, using a corresponding Column instance.
  3. # Column keeps track of new items using NewsList instances.
  4. #
  5. # Author: Erez Shinan (2017)
  6. # Email : erezshin@gmail.com
  7. from ..common import ParseError, UnexpectedToken, is_terminal
  8. from .grammar_analysis import GrammarAnalyzer
  9. class EndToken:
  10. type = '$end'
  11. END_TOKEN = EndToken()
  12. class Item(object):
  13. def __init__(self, rule, ptr, start, data):
  14. self.rule = rule
  15. self.ptr = ptr
  16. self.start = start
  17. self.data = data
  18. @property
  19. def expect(self):
  20. return self.rule.expansion[self.ptr]
  21. @property
  22. def is_complete(self):
  23. return self.ptr == len(self.rule.expansion)
  24. def advance(self, data):
  25. return Item(self.rule, self.ptr+1, self.start, self.data + [data])
  26. def __eq__(self, other):
  27. return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule
  28. def __hash__(self):
  29. return hash((self.rule, self.ptr, id(self.start)))
  30. def __repr__(self):
  31. before = map(str, self.rule.expansion[:self.ptr])
  32. after = map(str, self.rule.expansion[self.ptr:])
  33. return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after))
  34. class NewsList(list):
  35. "Keeps track of newly added items (append-only)"
  36. def __init__(self, initial=None):
  37. list.__init__(self, initial or [])
  38. self.last_iter = 0
  39. def get_news(self):
  40. i = self.last_iter
  41. self.last_iter = len(self)
  42. return self[i:]
  43. class Column:
  44. "An entry in the table, aka Earley Chart"
  45. def __init__(self):
  46. self.to_reduce = NewsList()
  47. self.to_predict = NewsList()
  48. self.to_scan = NewsList()
  49. self.item_count = 0
  50. self.added = set()
  51. def add(self, items):
  52. """Sort items into scan/predict/reduce newslists
  53. Makes sure only unique items are added.
  54. """
  55. added = self.added
  56. for item in items:
  57. if item.is_complete:
  58. # (We must allow repetition of empty rules)
  59. # if item.rule.expansion:
  60. # This is an important test to avoid infinite-loops,
  61. # For example for the rule:
  62. # a: a | "b"
  63. # If we can detect these cases statically, we can remove
  64. # this test an gain a tiny performance boost
  65. #
  66. # if item in added:
  67. # continue
  68. # added.add(item)
  69. self.to_reduce.append(item)
  70. else:
  71. if is_terminal(item.expect):
  72. self.to_scan.append(item)
  73. else:
  74. if item in added:
  75. continue
  76. added.add(item)
  77. self.to_predict.append(item)
  78. self.item_count += 1 # Only count if actually added
  79. def __nonzero__(self):
  80. return bool(self.item_count)
  81. class Parser:
  82. def __init__(self, parser_conf):
  83. self.analysis = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
  84. self.start = parser_conf.start
  85. self.postprocess = {}
  86. self.predictions = {}
  87. for rule in self.analysis.rules:
  88. if rule.origin != '$root': # XXX kinda ugly
  89. a = rule.alias
  90. self.postprocess[rule] = a if callable(a) else getattr(parser_conf.callback, a)
  91. self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
  92. def parse(self, stream, start=None):
  93. # Define parser functions
  94. start = start or self.start
  95. def predict(nonterm, i):
  96. assert not is_terminal(nonterm), nonterm
  97. return [Item(rule, 0, i, []) for rule in self.predictions[nonterm]]
  98. def complete(item):
  99. name = item.rule.origin
  100. item.data = self.postprocess[item.rule](item.data)
  101. return [i.advance(item.data) for i in item.start.to_predict if i.expect == name]
  102. def process_column(i, token, cur_set):
  103. next_set = Column()
  104. while True:
  105. to_predict = {x.expect for x in cur_set.to_predict.get_news()
  106. if x.ptr} # if not part of an already predicted batch
  107. to_reduce = cur_set.to_reduce.get_news()
  108. if not (to_predict or to_reduce):
  109. break
  110. for nonterm in to_predict:
  111. cur_set.add( predict(nonterm, cur_set) )
  112. for item in to_reduce:
  113. cur_set.add( complete(item) )
  114. if token is not END_TOKEN:
  115. for item in cur_set.to_scan.get_news():
  116. match = item.expect[0](token) if callable(item.expect[0]) else item.expect[0] == token.type
  117. if match:
  118. next_set.add([item.advance(stream[i])])
  119. if not next_set and token is not END_TOKEN:
  120. expect = {i.expect[-1] for i in cur_set.to_scan}
  121. raise UnexpectedToken(token, expect, stream, i)
  122. return cur_set, next_set
  123. # Main loop starts
  124. column0 = Column()
  125. column0.add(predict(start, column0))
  126. cur_set = column0
  127. for i, char in enumerate(stream):
  128. _, cur_set = process_column(i, char, cur_set)
  129. last_set, _ = process_column(len(stream), END_TOKEN, cur_set)
  130. # Parse ended. Now build a parse tree
  131. solutions = [n.data for n in last_set.to_reduce
  132. if n.rule.origin==start and n.start is column0]
  133. if not solutions:
  134. raise ParseError('Incomplete parse: Could not find a solution to input')
  135. return solutions