From 4e96b96bb55fee49a249e9e4417df03d83676177 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sun, 12 Sep 2021 20:41:45 +0100 Subject: [PATCH 1/4] Various backwards incompatible fixes for v1.0 --- lark/exceptions.py | 11 +++++++---- lark/lark.py | 9 +++------ lark/load_grammar.py | 4 +--- lark/visitors.py | 2 +- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index 662d55a..deea929 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -74,7 +74,11 @@ class UnexpectedInput(LarkError): after = text[pos:end].split(b'\n', 1)[0] return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") - def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]: + def match_examples(self, parse_fn: 'Callable[[str], Tree]', + examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], + token_type_match_fallback: bool=False, + use_accepts: bool=True + ) -> Optional[T]: """Allows you to detect what's wrong in the input text by matching against example errors. @@ -89,8 +93,7 @@ class UnexpectedInput(LarkError): Parameters: parse_fn: parse function (usually ``lark_instance.parse``) examples: dictionary of ``{'example_string': value}``. - use_accepts: Recommended to call this with ``use_accepts=True``. - The default is ``False`` for backwards compatibility. + use_accepts: Recommended to keep this as ``use_accepts=True``. """ assert self.state is not None, "Not supported for this exception" @@ -106,7 +109,7 @@ class UnexpectedInput(LarkError): parse_fn(malformed) except UnexpectedInput as ut: if ut.state == self.state: - if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: + if use_accepts and hasattr(self, 'accepts') and hasattr(ut, 'accepts') and ut.accepts != self.accepts: logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % (self.state, self.accepts, ut.accepts, i, j)) continue diff --git a/lark/lark.py b/lark/lark.py index 78ed2ea..8c6fcd3 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -82,9 +82,8 @@ class LarkOptions(Serialize): Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. maybe_placeholders When ``True``, the ``[]`` operator returns ``None`` when not matched. - When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. - (default= ``False``. Recommended to set to ``True``) + (default= ``True``) cache Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. @@ -164,7 +163,7 @@ class LarkOptions(Serialize): 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, - 'maybe_placeholders': False, + 'maybe_placeholders': True, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, @@ -304,7 +303,7 @@ class Lark(Serialize): if self.options.cache is not True: raise ConfigurationError("cache argument must be bool or str") # Python2.7 doesn't support * syntax in tuples - cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % ((cache_md5,) + sys.version_info[:2]) + cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2]) if FS.exists(cache_fn): logger.debug('Loading grammar from cache: %s', cache_fn) @@ -368,7 +367,6 @@ class Lark(Serialize): if self.options.priority not in _VALID_PRIORITY_OPTIONS: raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) - assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) @@ -387,7 +385,6 @@ class Lark(Serialize): self._terminals_dict = {t.name: t for t in self.terminals} # If the user asked to invert the priorities, negate them all here. - # This replaces the old 'resolve__antiscore_sum' option. if self.options.priority == 'invert': for rule in self.rules: if rule.options.priority is not None: diff --git a/lark/load_grammar.py b/lark/load_grammar.py index c0503e6..4d73e61 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -6,7 +6,6 @@ from collections import namedtuple from copy import copy, deepcopy import pkgutil from ast import literal_eval -from numbers import Integral from contextlib import suppress from typing import List, Tuple, Union, Callable, Dict, Optional @@ -1067,8 +1066,7 @@ class GrammarBuilder: if self._is_term(name): if options is None: options = 1 - # if we don't use Integral here, we run into python2.7/python3 problems with long vs int - elif not isinstance(options, Integral): + elif not isinstance(options, int): raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),)) else: if options is None: diff --git a/lark/visitors.py b/lark/visitors.py index 2c7309f..60923b3 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -385,7 +385,7 @@ def _vargs_inline(f, _data, children, _meta): def _vargs_meta_inline(f, _data, children, meta): return f(meta, *children) def _vargs_meta(f, _data, children, meta): - return f(children, meta) # TODO swap these for consistency? Backwards incompatible! + return f(meta, children) def _vargs_tree(f, data, children, meta): return f(Tree(data, children, meta)) From 5eecb7f5c91df83ae055722d229c6dd2744731d2 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sun, 12 Sep 2021 20:45:07 +0100 Subject: [PATCH 2/4] Remove old comment --- lark/lark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lark/lark.py b/lark/lark.py index 8c6fcd3..380c32a 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -302,7 +302,7 @@ class Lark(Serialize): else: if self.options.cache is not True: raise ConfigurationError("cache argument must be bool or str") - # Python2.7 doesn't support * syntax in tuples + cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2]) if FS.exists(cache_fn): From 5eb348481b4579c9bfcd03f4b7e05f406b3f7adc Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sun, 12 Sep 2021 20:55:59 +0100 Subject: [PATCH 3/4] Added CHANGELOG.md --- lark/CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 lark/CHANGELOG.md diff --git a/lark/CHANGELOG.md b/lark/CHANGELOG.md new file mode 100644 index 0000000..9fdb072 --- /dev/null +++ b/lark/CHANGELOG.md @@ -0,0 +1,6 @@ +v1.0 + +- `maybe_placeholders` is now True by default + +- `use_accepts` in `UnexpectedInput.match_examples()` is now True by default + From 19b2aa934fc69ce1229c85887f9e6c07a12b1272 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sun, 12 Sep 2021 21:02:00 +0100 Subject: [PATCH 4/4] Fix tests for PR --- tests/test_parser.py | 4 ++-- tests/test_reconstructor.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index ebc6152..dab69f7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -208,11 +208,11 @@ class TestParsers(unittest.TestCase): @v_args(meta=True) class T1(Transformer): - def a(self, children, meta): + def a(self, meta, children): assert not children return meta.line - def start(self, children, meta): + def start(self, meta, children): return children @v_args(meta=True, inline=True) diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py index e2f2dbe..4df1cb9 100644 --- a/tests/test_reconstructor.py +++ b/tests/test_reconstructor.py @@ -183,8 +183,8 @@ class TestReconstructor(TestCase): keyword x += y """ - l1 = Lark(g1, parser='lalr') - l2 = Lark(g2, parser='lalr') + l1 = Lark(g1, parser='lalr', maybe_placeholders=False) + l2 = Lark(g2, parser='lalr', maybe_placeholders=False) r = Reconstructor(l2) tree = l1.parse(code)