| @@ -66,7 +66,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
| if allowed: | if allowed: | ||||
| message += '\nExpecting: %s\n' % allowed | message += '\nExpecting: %s\n' % allowed | ||||
| super(UnexpectedCharacters, self).__init__(message.encode('utf-8')) | |||||
| super(UnexpectedCharacters, self).__init__(message) | |||||
| @@ -84,7 +84,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| "Expected one of: \n\t* %s\n" | "Expected one of: \n\t* %s\n" | ||||
| % (token, self.line, self.column, '\n\t* '.join(self.expected))) | % (token, self.line, self.column, '\n\t* '.join(self.expected))) | ||||
| super(UnexpectedToken, self).__init__(message.encode('utf-8')) | |||||
| super(UnexpectedToken, self).__init__(message) | |||||
| class VisitError(Exception): | class VisitError(Exception): | ||||
| pass | pass | ||||
| @@ -42,12 +42,8 @@ class LarkOptions(object): | |||||
| cache_grammar - Cache the Lark grammar (Default: False) | cache_grammar - Cache the Lark grammar (Default: False) | ||||
| postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers. | postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers. | ||||
| start - The start symbol (Default: start) | start - The start symbol (Default: start) | ||||
| <<<<<<< HEAD | |||||
| profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) | profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) | ||||
| priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto) | priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto) | ||||
| ======= | |||||
| profile - Measure run-time usage in Lark. Read results from the profiler property (Default: False) | |||||
| >>>>>>> master | |||||
| propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. | propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. | ||||
| lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. | lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. | ||||
| maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None | maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None | ||||
| @@ -76,9 +72,9 @@ class LarkOptions(object): | |||||
| assert self.parser in ('earley', 'lalr', 'cyk', None) | assert self.parser in ('earley', 'lalr', 'cyk', None) | ||||
| if self.ambiguity == 'explicit' and self.transformer: | |||||
| raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm for explicit ambiguity.' | |||||
| 'Please use your transformer on the resulting Forest, or use a different algorithm (i.e. LALR)') | |||||
| if self.parser == 'earley' and self.transformer: | |||||
| raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||||
| 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | |||||
| if o: | if o: | ||||
| raise ValueError("Unknown options: %s" % o.keys()) | raise ValueError("Unknown options: %s" % o.keys()) | ||||
| @@ -160,14 +156,16 @@ class Lark: | |||||
| disambig_parsers = ['earley', 'cyk'] | disambig_parsers = ['earley', 'cyk'] | ||||
| assert self.options.parser in disambig_parsers, ( | assert self.options.parser in disambig_parsers, ( | ||||
| 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | ||||
| assert self.options.priority in ('auto', 'none', 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) | |||||
| if self.options.priority == 'auto': | if self.options.priority == 'auto': | ||||
| if self.options.parser in ('earley', 'cyk', ): | if self.options.parser in ('earley', 'cyk', ): | ||||
| self.options.priority = 'normal' | self.options.priority = 'normal' | ||||
| elif self.options.parser in ('lalr', ): | elif self.options.parser in ('lalr', ): | ||||
| self.options.priority = 'none' | |||||
| if self.options.priority in ('invert', 'normal'): | |||||
| self.options.priority = None | |||||
| elif self.options.priority in ('invert', 'normal'): | |||||
| assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" | assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" | ||||
| assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) | |||||
| assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | ||||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) | assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) | ||||
| @@ -186,7 +184,7 @@ class Lark: | |||||
| # Else, if the user asked to disable priorities, strip them from the | # Else, if the user asked to disable priorities, strip them from the | ||||
| # rules. This allows the Earley parsers to skip an extra forest walk | # rules. This allows the Earley parsers to skip an extra forest walk | ||||
| # for improved performance, if you don't need them (or didn't specify any). | # for improved performance, if you don't need them (or didn't specify any). | ||||
| elif self.options.priority == 'none': | |||||
| elif self.options.priority == None: | |||||
| for rule in self.rules: | for rule in self.rules: | ||||
| if rule.options and rule.options.priority is not None: | if rule.options and rule.options.priority is not None: | ||||
| rule.options.priority = None | rule.options.priority = None | ||||
| @@ -100,17 +100,10 @@ class Reconstructor: | |||||
| for origin, rule_aliases in aliases.items(): | for origin, rule_aliases in aliases.items(): | ||||
| for alias in rule_aliases: | for alias in rule_aliases: | ||||
| <<<<<<< HEAD | |||||
| yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)])) | yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)])) | ||||
| yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin])) | yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin])) | ||||
| ======= | |||||
| yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)])) | |||||
| yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin])) | |||||
| >>>>>>> master | |||||
| def _match(self, term, token): | def _match(self, term, token): | ||||
| @@ -49,6 +49,7 @@ class TestParsers(unittest.TestCase): | |||||
| self.assertRaises(GrammarError, Lark, g, parser='lalr') | self.assertRaises(GrammarError, Lark, g, parser='lalr') | ||||
| # TODO: should it? shouldn't it? | |||||
| # l = Lark(g, parser='earley', lexer='dynamic') | # l = Lark(g, parser='earley', lexer='dynamic') | ||||
| # self.assertRaises(ParseError, l.parse, 'a') | # self.assertRaises(ParseError, l.parse, 'a') | ||||
| @@ -192,8 +193,10 @@ def _make_full_earley_test(LEXER): | |||||
| @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser") | @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser") | ||||
| def test_earley3(self): | def test_earley3(self): | ||||
| "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" | |||||
| """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result) | |||||
| By default, `+` should immitate regexp greedy-matching | |||||
| """ | |||||
| grammar = """ | grammar = """ | ||||
| start: A A | start: A A | ||||
| A: "a"+ | A: "a"+ | ||||
| @@ -201,7 +204,7 @@ def _make_full_earley_test(LEXER): | |||||
| l = Lark(grammar, parser='earley', lexer=LEXER) | l = Lark(grammar, parser='earley', lexer=LEXER) | ||||
| res = l.parse("aaa") | res = l.parse("aaa") | ||||
| self.assertEqual(res.children, ['a', 'aa']) | |||||
| self.assertEqual(res.children, ['aa', 'a']) | |||||
| def test_earley4(self): | def test_earley4(self): | ||||
| grammar = """ | grammar = """ | ||||
| @@ -211,7 +214,6 @@ def _make_full_earley_test(LEXER): | |||||
| l = Lark(grammar, parser='earley', lexer=LEXER) | l = Lark(grammar, parser='earley', lexer=LEXER) | ||||
| res = l.parse("aaa") | res = l.parse("aaa") | ||||
| # print(res.pretty()) | |||||
| self.assertEqual(res.children, ['aaa']) | self.assertEqual(res.children, ['aaa']) | ||||
| def test_earley_repeating_empty(self): | def test_earley_repeating_empty(self): | ||||
| @@ -242,7 +244,6 @@ def _make_full_earley_test(LEXER): | |||||
| parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') | parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') | ||||
| ambig_tree = parser.parse('ab') | ambig_tree = parser.parse('ab') | ||||
| # print(ambig_tree.pretty()) | |||||
| self.assertEqual( ambig_tree.data, '_ambig') | self.assertEqual( ambig_tree.data, '_ambig') | ||||
| self.assertEqual( len(ambig_tree.children), 2) | self.assertEqual( len(ambig_tree.children), 2) | ||||
| @@ -258,8 +259,6 @@ def _make_full_earley_test(LEXER): | |||||
| """ | """ | ||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | ||||
| ambig_tree = l.parse('cde') | ambig_tree = l.parse('cde') | ||||
| # print(ambig_tree.pretty()) | |||||
| # tree = ApplyCallbacks(l.parser.parser.postprocess).transform(ambig_tree) | |||||
| assert ambig_tree.data == '_ambig', ambig_tree | assert ambig_tree.data == '_ambig', ambig_tree | ||||
| assert len(ambig_tree.children) == 2 | assert len(ambig_tree.children) == 2 | ||||
| @@ -304,7 +303,6 @@ def _make_full_earley_test(LEXER): | |||||
| """ | """ | ||||
| parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) | parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) | ||||
| tree = parser.parse('fruit flies like bananas') | tree = parser.parse('fruit flies like bananas') | ||||
| # tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree) | |||||
| expected = Tree('_ambig', [ | expected = Tree('_ambig', [ | ||||
| Tree('comparative', [ | Tree('comparative', [ | ||||
| @@ -319,9 +317,6 @@ def _make_full_earley_test(LEXER): | |||||
| ]) | ]) | ||||
| ]) | ]) | ||||
| # print res.pretty() | |||||
| # print expected.pretty() | |||||
| # self.assertEqual(tree, expected) | # self.assertEqual(tree, expected) | ||||
| self.assertEqual(tree.data, expected.data) | self.assertEqual(tree.data, expected.data) | ||||
| self.assertEqual(set(tree.children), set(expected.children)) | self.assertEqual(set(tree.children), set(expected.children)) | ||||
| @@ -338,7 +333,6 @@ def _make_full_earley_test(LEXER): | |||||
| parser = _Lark(grammar, start='start', ambiguity='explicit') | parser = _Lark(grammar, start='start', ambiguity='explicit') | ||||
| tree = parser.parse(text) | tree = parser.parse(text) | ||||
| # print(tree.pretty()) | |||||
| self.assertEqual(tree.data, '_ambig') | self.assertEqual(tree.data, '_ambig') | ||||
| combinations = {tuple(str(s) for s in t.children) for t in tree.children} | combinations = {tuple(str(s) for s in t.children) for t in tree.children} | ||||
| @@ -1085,7 +1079,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
| l = Lark(grammar, priority="invert") | l = Lark(grammar, priority="invert") | ||||
| res = l.parse('abba') | res = l.parse('abba') | ||||
| # print(res.pretty()) | |||||
| self.assertEqual(''.join(child.data for child in res.children), 'indirection') | self.assertEqual(''.join(child.data for child in res.children), 'indirection') | ||||
| grammar = """ | grammar = """ | ||||