This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

81 lines
2.6 KiB

  1. "This module implements an Earley Parser"
  2. # The parser uses a parse-forest to keep track of derivations and ambiguations.
  3. # When the parse ends successfully, a disambiguation stage resolves all ambiguity
  4. # (right now ambiguity resolution is not developed beyond the needs of lark)
  5. # Afterwards the parse tree is reduced (transformed) according to user callbacks.
  6. # I use the no-recursion version of Transformer, because the tree might be
  7. # deeper than Python's recursion limit (a bit absurd, but that's life)
  8. #
  9. # The algorithm keeps track of each state set, using a corresponding Column instance.
  10. # Column keeps track of new items using NewsList instances.
  11. #
  12. # Author: Erez Shinan (2017)
  13. # Email : erezshin@gmail.com
  14. ## for recursive repr
  15. from ..tree import Tree
  16. class Derivation(Tree):
  17. def __init__(self, rule, children = None):
  18. Tree.__init__(self, 'drv', children if children is not None else [])
  19. self.meta.rule = rule
  20. self._hash = None
  21. def __repr__(self, indent = 0):
  22. return 'Derivation(%s, %s, %s)' % (self.data, self.rule.origin, '...')
  23. def __hash__(self):
  24. if self._hash is None:
  25. self._hash = Tree.__hash__(self)
  26. return self._hash
  27. class Item(object):
  28. "An Earley Item, the atom of the algorithm."
  29. __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'node', '_hash')
  30. def __init__(self, rule, ptr, start):
  31. self.is_complete = len(rule.expansion) == ptr
  32. self.rule = rule # rule
  33. self.ptr = ptr # ptr
  34. self.start = start # j
  35. self.node = None # w
  36. if self.is_complete:
  37. self.s = rule.origin
  38. self.expect = None
  39. else:
  40. self.s = (rule, ptr)
  41. self.expect = rule.expansion[ptr]
  42. self._hash = hash((self.s, self.start.i))
  43. def advance(self):
  44. return self.__class__(self.rule, self.ptr + 1, self.start)
  45. def __eq__(self, other):
  46. return self is other or (self.s == other.s and self.start.i == other.start.i)
  47. def __hash__(self):
  48. return self._hash
  49. def __repr__(self):
  50. return '%s (%d)' % (self.s if self.is_complete else self.rule.origin, self.start.i)
  51. class Column:
  52. "An entry in the table, aka Earley Chart. Contains lists of items."
  53. def __init__(self, i, FIRST):
  54. self.i = i
  55. self.items = set()
  56. self.FIRST = FIRST
  57. def add(self, item):
  58. """Sort items into scan/predict/reduce newslists
  59. Makes sure only unique items are added.
  60. """
  61. self.items.add(item)
  62. def __bool__(self):
  63. return bool(self.items)
  64. __nonzero__ = __bool__ # Py2 backwards-compatibility