This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
3.2 KiB

  1. "This module implements an Earley Parser"
  2. # The parser uses a parse-forest to keep track of derivations and ambiguations.
  3. # When the parse ends successfully, a disambiguation stage resolves all ambiguity
  4. # (right now ambiguity resolution is not developed beyond the needs of lark)
  5. # Afterwards the parse tree is reduced (transformed) according to user callbacks.
  6. # I use the no-recursion version of Transformer, because the tree might be
  7. # deeper than Python's recursion limit (a bit absurd, but that's life)
  8. #
  9. # The algorithm keeps track of each state set, using a corresponding Column instance.
  10. # Column keeps track of new items using NewsList instances.
  11. #
  12. # Author: Erez Shinan (2017)
  13. # Email : erezshin@gmail.com
  14. from ..grammar import NonTerminal, Terminal
  15. class Item(object):
  16. "An Earley Item, the atom of the algorithm."
  17. __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
  18. def __init__(self, rule, ptr, start):
  19. self.is_complete = len(rule.expansion) == ptr
  20. self.rule = rule # rule
  21. self.ptr = ptr # ptr
  22. self.start = start # j
  23. self.node = None # w
  24. if self.is_complete:
  25. self.s = rule.origin
  26. self.expect = None
  27. self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
  28. else:
  29. self.s = (rule, ptr)
  30. self.expect = rule.expansion[ptr]
  31. self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
  32. self._hash = hash((self.s, self.start))
  33. def advance(self):
  34. return Item(self.rule, self.ptr + 1, self.start)
  35. def __eq__(self, other):
  36. return self is other or (self.s == other.s and self.start == other.start)
  37. def __hash__(self):
  38. return self._hash
  39. def __repr__(self):
  40. before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
  41. after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
  42. symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
  43. return '%s (%d)' % (symbol, self.start)
  44. class TransitiveItem(Item):
  45. __slots__ = ('recognized', 'reduction', 'column', 'next_titem')
  46. def __init__(self, recognized, trule, originator, start):
  47. super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
  48. self.recognized = recognized
  49. self.reduction = originator
  50. self.column = start
  51. self.next_titem = None
  52. self._hash = hash((self.s, self.start, self.recognized))
  53. def __eq__(self, other):
  54. if not isinstance(other, TransitiveItem):
  55. return False
  56. return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)
  57. def __hash__(self):
  58. return self._hash
  59. def __repr__(self):
  60. before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
  61. after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
  62. return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)