import_paths->sources, source->source_path, various implementation changes

5 years ago · a50fc10773
--- a/lark-stubs/lark.pyi
+++ b/lark-stubs/lark.pyi
@@ -32,19 +32,19 @@ class LarkOptions:
    cache: Union[bool, str]
    g_regex_flags: int
    use_bytes: bool
    import_sources: List[Union[str, Callable[[str, str], str]]]
    source: Optional[str]
    import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]]
    source_path: Optional[str]


 class FromPackageLoader:
    def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...
    
    def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ...
    def __call__(self, base_paths: str, grammar_path: str) -> Tuple[str, str]: ...


 class Lark:
    source: str
    grammar_source: str
    source_path: str
    source_code: str
    options: LarkOptions
    lexer: Lexer
    terminals: List[TerminalDef]
@@ -68,8 +68,8 @@ class Lark:
        cache: Union[bool, str] = False,
        g_regex_flags: int = ...,
        use_bytes: bool = False,
        import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ...,
        source: Optional[str],
        import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]] = ...,
        source_path: Optional[str],
    ):
        ...

--- a/lark/lark.py
+++ b/lark/lark.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import

 import sys, os, pickle, hashlib
 from io import open

 from warnings import warn

 from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
 from .load_grammar import load_grammar, FromPackageLoader
@@ -90,10 +90,10 @@ class LarkOptions(Serialize):
            Accept an input of type ``bytes`` instead of ``str`` (Python 3 only).
    edit_terminals
            A callback for editing the terminals before parse.
    import_sources
    import_paths
            A List of either paths or loader functions to specify from where grammars are imported 
    source
            Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading
    source_path
            Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading

    **=== End Options ===**
    """
@@ -119,8 +119,8 @@ class LarkOptions(Serialize):
        'edit_terminals': None,
        'g_regex_flags': 0,
        'use_bytes': False,
        'import_sources': [],
        'source': None,
        'import_paths': [],
        'source_path': None,
    }

    def __init__(self, options_dict):
@@ -196,13 +196,13 @@ class Lark(Serialize):
            re_module = re

        # Some, but not all file-like objects have a 'name' attribute
        if self.options.source is None:
        if self.options.source_path is None:
            try:
                self.source = grammar.name
                self.source_path = grammar.name
            except AttributeError:
                self.source = '<string>'
                self.source_path = '<string>'
        else:
            self.source = self.options.source
            self.source_path = self.options.source_path

        # Drain file-like objects to get their contents
        try:
@@ -213,7 +213,7 @@ class Lark(Serialize):
            grammar = read()

        assert isinstance(grammar, STRING_TYPE)
        self.grammar_source = grammar
        self.source_code = grammar
        if self.options.use_bytes:
            if not isascii(grammar):
                raise ValueError("Grammar must be ascii only, when use_bytes=True")
@@ -276,7 +276,7 @@ class Lark(Serialize):
        assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )

        # Parse the grammar file and compose the grammars (TODO)
        self.grammar = load_grammar(grammar, self.source, re_module, self.options.import_sources)
        self.grammar = load_grammar(grammar, self.source_path, re_module, self.options.import_paths)

        # Compile the EBNF grammar into BNF
        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
@@ -374,7 +374,7 @@ class Lark(Serialize):
        self.options = LarkOptions.deserialize(options, memo)
        re_module = regex if self.options.regex else re
        self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
        self.source = '<deserialized>'
        self.source_path = '<deserialized>'
        self._prepare_callbacks()
        self.parser = self.parser_class.deserialize(
            data['parser'],
@@ -416,9 +416,7 @@ class Lark(Serialize):
        """Create an instance of Lark with the grammar loaded from within the package `package`.
        This allows grammar loading from zipapps.
        
        Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing
        
        ``search_paths`` is passed to `FromPackageLoader`
        Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
        
        Example:
            
@@ -426,15 +424,15 @@ class Lark(Serialize):
        """
        package = FromPackageLoader(package, search_paths)
        full_path, text = package([], grammar_path)
        options.setdefault('source', full_path)
        if 'import_sources' in options:
            options['import_sources'].append(package)
        options.setdefault('source_path', full_path)
        if 'import_paths' in options:
            options['import_paths'].append(package)
        else:
            options['import_sources'] = [package]
            options['import_paths'] = [package]
        return cls(text, **options)

    def __repr__(self):
        return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
        return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)


    def lex(self, text):
@@ -481,6 +479,15 @@ class Lark(Serialize):
                        # Prevent infinite loop
                        raise e2
                    e = e2
    
    @property
    def source(self):
        warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning)
        return self.source_path
    
    @source.setter
    def source(self, value):
        self.source_path = value


 ###}
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -666,50 +666,61 @@ class FromPackageLoader(object):
    def __repr__(self):
        return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

    def __call__(self, base_paths, grammar_path):
        if len(base_paths) == 0:
    def __call__(self, base_path, grammar_path):
        if base_path is None:
            to_try = self.search_paths
        else:
            assert len(base_paths) == 1
            if not base_paths[0].startswith('<%s:' % (self.pkg_name,)):
            # Check whether or not the importing grammar was loaded by this module.
            if not base_path.startswith('<%s:' % (self.pkg_name,)): 
                # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
                raise IOError()
            base_path = base_paths[0].partition(':')[2]
            if base_path and base_path[0] == '/':
                base_path = base_path[1:]
            # Separate the path and the pkg_name and throw away the slash. `pkgutil.get_data` doesn't like it. (see below)
            base_path = base_path.partition(':')[2].lstrip('/')
            to_try = [base_path]
        for path in to_try:
            full_path = os.path.join(path, grammar_path)
            text = None
            with suppress(IOError):
            try:
                text = pkgutil.get_data(self.pkg_name, full_path)
            if text is None:
            except IOError:
                continue
            return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
            else:
                # Custom format `<{pkg_name}:/{full_path}>`
                # These are the arguments to `pkgutil.get_data(pkg_name, full_path)`
                # Required since we can not easily provided a actual file path for all package data (e.g. from inside a zip)
                
                # The additional slash after the `:` is to allow `os.path.split` to work on this without accidentally
                # throwing away the `pkg_name`. (As it would inside of `GrammarLoader.load_grammar` otherwise when relative imports
                # are resolved.
                # Without the slash `"<lark:common.lark>"` would turn into `""`, losing the pacakge information
                # With the slash `"<lark:/common.lark>"` turns into `"<lark:"` without the slash, but
                # `"<lark:/grammars/common.lark>"` into `"<lark:/grammars"`, so we have to strip it away when we look at the path (see above)

                return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
        raise IOError()

 stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)


 _imported_grammars = {}
 def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]):
 def import_grammar(grammar_path, re_, base_path=None, import_paths=[]):
    if grammar_path not in _imported_grammars:
        # import_sources take priority over base_paths since they should handle relative imports and ignore everthing else.
        # import_paths take priority over base_path since they should handle relative imports and ignore everything else.
        # Question: should the stdlib_loader really be pushed to the end?
        import_paths = import_sources + base_paths + [stdlib_loader] 
        for source in import_paths:
            text = None
            with suppress(IOError):
        to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] 
        for source in to_try:
            try:
                if callable(source):
                    joined_path, text = source(base_paths, grammar_path)
                    joined_path, text = source(base_path, grammar_path)
                else:
                    joined_path = os.path.join(source, grammar_path)
                    with open(joined_path, encoding='utf8') as f:
                        text = f.read()
            if text is not None:
                # Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed 
            except IOError:
                continue
            else:
                # Don't load the grammar from within the try statement. Otherwise the underlying error message will be swallowed 
                # and the wrong file will be reported as missing
                grammar = load_grammar(text, joined_path, re_, import_sources) 
                grammar = load_grammar(text, joined_path, re_, import_paths) 
                _imported_grammars[grammar_path] = grammar
                break
        else:
@@ -868,7 +879,7 @@ class GrammarLoader:
        self.canonize_tree = CanonizeTree()
        self.re_module = re_module

    def load_grammar(self, grammar_text, grammar_name='<?>', import_sources=[]):
    def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
        "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."

        try:
@@ -922,7 +933,7 @@ class GrammarLoader:
                    aliases = {name: arg1 or name}  # Aliases if exist

                if path_node.data == 'import_lib':  # Import from library
                    base_paths = []
                    base_path = None
                else:  # Relative import
                    if grammar_name == '<string>':  # Import relative to script file path if grammar is coded in script
                        try:
@@ -932,16 +943,16 @@ class GrammarLoader:
                    else:
                        base_file = grammar_name  # Import relative to grammar file path if external grammar file
                    if base_file:
                        base_paths = [os.path.split(base_file)[0]]
                        base_path = os.path.split(base_file)[0]
                    else:
                        base_paths = [os.path.abspath(os.path.curdir)]
                        base_path = os.path.abspath(os.path.curdir)

                try:
                    import_base_paths, import_aliases = imports[dotted_path]
                    assert base_paths == import_base_paths, 'Inconsistent base_paths for %s.' % '.'.join(dotted_path)
                    import_base_path, import_aliases = imports[dotted_path]
                    assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
                    import_aliases.update(aliases)
                except KeyError:
                    imports[dotted_path] = base_paths, aliases
                    imports[dotted_path] = base_path, aliases

            elif stmt.data == 'declare':
                for t in stmt.children:
@@ -950,9 +961,9 @@ class GrammarLoader:
                assert False, stmt

        # import grammars
        for dotted_path, (base_paths, aliases) in imports.items():
        for dotted_path, (base_path, aliases) in imports.items():
            grammar_path = os.path.join(*dotted_path) + EXT
            g = import_grammar(grammar_path, self.re_module, base_paths=base_paths, import_sources=import_sources)
            g = import_grammar(grammar_path, self.re_module, base_path=base_path, import_paths=import_paths)
            new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)

            term_defs += new_td
@@ -1032,5 +1043,5 @@ class GrammarLoader:



 def load_grammar(grammar, source, re_, import_sources):
    return GrammarLoader(re_).load_grammar(grammar, source, import_sources)
 def load_grammar(grammar, source, re_, import_paths):
    return GrammarLoader(re_).load_grammar(grammar, source, import_paths)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1792,7 +1792,7 @@ def _make_parser_test(LEXER, PARSER):
            %import ab.startab
            """

            p = _Lark(grammar, import_sources=[custom_loader])
            p = _Lark(grammar, import_paths=[custom_loader])
            self.assertEqual(p.parse('ab'),
                             Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))

@@ -1801,7 +1801,7 @@ def _make_parser_test(LEXER, PARSER):

            %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
            """
            p = _Lark(grammar, import_sources=[custom_loader])
            p = _Lark(grammar, import_paths=[custom_loader])
            x = p.parse('N')
            self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
            
@@ -1810,7 +1810,7 @@ def _make_parser_test(LEXER, PARSER):
            %import .test_relative_import (start, WS)
            %ignore WS
            """
            p = _Lark(grammar, import_sources=[custom_loader2])
            p = _Lark(grammar, import_paths=[custom_loader2])
            x = p.parse('12 capybaras')
            self.assertEqual(x.children, ['12', 'capybaras'])