From 688c453456459841ec4eaac30fecf58130753c1e Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 12 Jan 2019 16:56:58 +0200 Subject: [PATCH] Corrections --- lark/exceptions.py | 4 ++-- lark/lark.py | 20 +++++++++----------- lark/reconstruct.py | 7 ------- tests/test_parser.py | 17 +++++------------ 4 files changed, 16 insertions(+), 32 deletions(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index a6516ce..9a4e9a0 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -66,7 +66,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput): if allowed: message += '\nExpecting: %s\n' % allowed - super(UnexpectedCharacters, self).__init__(message.encode('utf-8')) + super(UnexpectedCharacters, self).__init__(message) @@ -84,7 +84,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): "Expected one of: \n\t* %s\n" % (token, self.line, self.column, '\n\t* '.join(self.expected))) - super(UnexpectedToken, self).__init__(message.encode('utf-8')) + super(UnexpectedToken, self).__init__(message) class VisitError(Exception): pass diff --git a/lark/lark.py b/lark/lark.py index f796330..8d34b21 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -42,12 +42,8 @@ class LarkOptions(object): cache_grammar - Cache the Lark grammar (Default: False) postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers. start - The start symbol (Default: start) -<<<<<<< HEAD profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto) -======= - profile - Measure run-time usage in Lark. Read results from the profiler property (Default: False) ->>>>>>> master propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None @@ -76,9 +72,9 @@ class LarkOptions(object): assert self.parser in ('earley', 'lalr', 'cyk', None) - if self.ambiguity == 'explicit' and self.transformer: - raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm for explicit ambiguity.' - 'Please use your transformer on the resulting Forest, or use a different algorithm (i.e. LALR)') + if self.parser == 'earley' and self.transformer: + raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' + 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') if o: raise ValueError("Unknown options: %s" % o.keys()) @@ -160,14 +156,16 @@ class Lark: disambig_parsers = ['earley', 'cyk'] assert self.options.parser in disambig_parsers, ( 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) - assert self.options.priority in ('auto', 'none', 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) + if self.options.priority == 'auto': if self.options.parser in ('earley', 'cyk', ): self.options.priority = 'normal' elif self.options.parser in ('lalr', ): - self.options.priority = 'none' - if self.options.priority in ('invert', 'normal'): + self.options.priority = None + elif self.options.priority in ('invert', 'normal'): assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" + + assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) @@ -186,7 +184,7 @@ class Lark: # Else, if the user asked to disable priorities, strip them from the # rules. This allows the Earley parsers to skip an extra forest walk # for improved performance, if you don't need them (or didn't specify any). - elif self.options.priority == 'none': + elif self.options.priority == None: for rule in self.rules: if rule.options and rule.options.priority is not None: rule.options.priority = None diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 515c05d..1ab679e 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -100,17 +100,10 @@ class Reconstructor: for origin, rule_aliases in aliases.items(): for alias in rule_aliases: -<<<<<<< HEAD yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)])) yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin])) -======= - yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)])) - - yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin])) - ->>>>>>> master def _match(self, term, token): diff --git a/tests/test_parser.py b/tests/test_parser.py index 657dda0..4e8ae52 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -49,6 +49,7 @@ class TestParsers(unittest.TestCase): self.assertRaises(GrammarError, Lark, g, parser='lalr') + # TODO: should it? shouldn't it? # l = Lark(g, parser='earley', lexer='dynamic') # self.assertRaises(ParseError, l.parse, 'a') @@ -192,8 +193,10 @@ def _make_full_earley_test(LEXER): @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser") def test_earley3(self): - "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" + """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result) + By default, `+` should immitate regexp greedy-matching + """ grammar = """ start: A A A: "a"+ @@ -201,7 +204,7 @@ def _make_full_earley_test(LEXER): l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") - self.assertEqual(res.children, ['a', 'aa']) + self.assertEqual(res.children, ['aa', 'a']) def test_earley4(self): grammar = """ @@ -211,7 +214,6 @@ def _make_full_earley_test(LEXER): l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") -# print(res.pretty()) self.assertEqual(res.children, ['aaa']) def test_earley_repeating_empty(self): @@ -242,7 +244,6 @@ def _make_full_earley_test(LEXER): parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') ambig_tree = parser.parse('ab') - # print(ambig_tree.pretty()) self.assertEqual( ambig_tree.data, '_ambig') self.assertEqual( len(ambig_tree.children), 2) @@ -258,8 +259,6 @@ def _make_full_earley_test(LEXER): """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse('cde') - # print(ambig_tree.pretty()) -# tree = ApplyCallbacks(l.parser.parser.postprocess).transform(ambig_tree) assert ambig_tree.data == '_ambig', ambig_tree assert len(ambig_tree.children) == 2 @@ -304,7 +303,6 @@ def _make_full_earley_test(LEXER): """ parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) tree = parser.parse('fruit flies like bananas') -# tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree) expected = Tree('_ambig', [ Tree('comparative', [ @@ -319,9 +317,6 @@ def _make_full_earley_test(LEXER): ]) ]) - # print res.pretty() - # print expected.pretty() - # self.assertEqual(tree, expected) self.assertEqual(tree.data, expected.data) self.assertEqual(set(tree.children), set(expected.children)) @@ -338,7 +333,6 @@ def _make_full_earley_test(LEXER): parser = _Lark(grammar, start='start', ambiguity='explicit') tree = parser.parse(text) -# print(tree.pretty()) self.assertEqual(tree.data, '_ambig') combinations = {tuple(str(s) for s in t.children) for t in tree.children} @@ -1085,7 +1079,6 @@ def _make_parser_test(LEXER, PARSER): l = Lark(grammar, priority="invert") res = l.parse('abba') -# print(res.pretty()) self.assertEqual(''.join(child.data for child in res.children), 'indirection') grammar = """