This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

140 líneas
5.1 KiB

  1. "This module implements an experimental Earley Parser with a dynamic lexer"
  2. # The parser uses a parse-forest to keep track of derivations and ambiguations.
  3. # When the parse ends successfully, a disambiguation stage resolves all ambiguity
  4. # (right now ambiguity resolution is not developed beyond the needs of lark)
  5. # Afterwards the parse tree is reduced (transformed) according to user callbacks.
  6. # I use the no-recursion version of Transformer and Visitor, because the tree might be
  7. # deeper than Python's recursion limit (a bit absurd, but that's life)
  8. #
  9. # The algorithm keeps track of each state set, using a corresponding Column instance.
  10. # Column keeps track of new items using NewsList instances.
  11. #
  12. # Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
  13. # uses regular expressions by necessity, achieving high-performance while maintaining all of
  14. # Earley's power in parsing any CFG.
  15. #
  16. #
  17. # Author: Erez Shinan (2017)
  18. # Email : erezshin@gmail.com
  19. from collections import defaultdict
  20. from ..common import ParseError, UnexpectedToken, Terminal
  21. from ..lexer import Token
  22. from ..tree import Tree
  23. from .grammar_analysis import GrammarAnalyzer
  24. from .earley import ApplyCallbacks, Item, Column
  25. class Parser:
  26. def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=()):
  27. self.analysis = GrammarAnalyzer(rules, start_symbol)
  28. self.start_symbol = start_symbol
  29. self.resolve_ambiguity = resolve_ambiguity
  30. self.ignore = list(ignore)
  31. self.postprocess = {}
  32. self.predictions = {}
  33. for rule in self.analysis.rules:
  34. if rule.origin != '$root': # XXX kinda ugly
  35. a = rule.alias
  36. self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
  37. self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
  38. def parse(self, stream, start_symbol=None):
  39. # Define parser functions
  40. start_symbol = start_symbol or self.start_symbol
  41. delayed_matches = defaultdict(list)
  42. text_line = 1
  43. text_column = 0
  44. def predict(nonterm, column):
  45. assert not isinstance(nonterm, Terminal), nonterm
  46. return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]]
  47. def complete(item):
  48. name = item.rule.origin
  49. return [i.advance(item.tree) for i in item.start.to_predict if i.expect == name]
  50. def predict_and_complete(column):
  51. while True:
  52. to_predict = {x.expect for x in column.to_predict.get_news()
  53. if x.ptr} # if not part of an already predicted batch
  54. to_reduce = column.to_reduce.get_news()
  55. if not (to_predict or to_reduce):
  56. break
  57. for nonterm in to_predict:
  58. column.add( predict(nonterm, column) )
  59. for item in to_reduce:
  60. new_items = list(complete(item))
  61. for new_item in new_items:
  62. if new_item.similar(item):
  63. raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
  64. column.add(new_items)
  65. def scan(i, token, column):
  66. for x in self.ignore:
  67. m = x.match(stream, i)
  68. if m:
  69. return column
  70. to_scan = column.to_scan.get_news()
  71. for item in to_scan:
  72. m = item.expect.match(stream, i)
  73. if m:
  74. t = Token(item.expect.name, m.group(0), i, text_line, text_column)
  75. delayed_matches[m.end()].append(item.advance(t))
  76. s = m.group(0)
  77. for j in range(1, len(s)):
  78. m = item.expect.match(s[:-j])
  79. if m:
  80. delayed_matches[m.end()].append(item.advance(m.group(0)))
  81. next_set = Column(i+1)
  82. next_set.add(delayed_matches[i+1])
  83. del delayed_matches[i+1] # No longer needed, so unburden memory
  84. return next_set
  85. # Main loop starts
  86. column0 = Column(0)
  87. column0.add(predict(start_symbol, column0))
  88. column = column0
  89. for i, token in enumerate(stream):
  90. predict_and_complete(column)
  91. column = scan(i, token, column)
  92. if token == '\n':
  93. text_line += 1
  94. text_column = 0
  95. else:
  96. text_column += 1
  97. predict_and_complete(column)
  98. # Parse ended. Now build a parse tree
  99. solutions = [n.tree for n in column.to_reduce
  100. if n.rule.origin==start_symbol and n.start is column0]
  101. if not solutions:
  102. raise ParseError('Incomplete parse: Could not find a solution to input')
  103. elif len(solutions) == 1:
  104. tree = solutions[0]
  105. else:
  106. tree = Tree('_ambig', solutions)
  107. if self.resolve_ambiguity:
  108. tree = self.resolve_ambiguity(tree)
  109. return ApplyCallbacks(self.postprocess).transform(tree)