From 53b3e12bba9359aa6ac4cd5ab973fac21428148f Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Sat, 26 Sep 2020 21:04:54 +0200
Subject: [PATCH] Added `import_sources`

---
 lark-stubs/lark.pyi  |  4 +++-
 lark/lark.py         |  5 ++++-
 lark/load_grammar.py | 46 +++++++++++++++++++++++++++++---------------
 tests/test_parser.py | 18 +++++++++++++++++
 4 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi
index c39ae3d..deb8849 100644
--- a/lark-stubs/lark.pyi
+++ b/lark-stubs/lark.pyi
@@ -2,7 +2,7 @@
 
 from typing import (
     TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
-    Literal, Protocol,
+    Literal, Protocol, Tuple,
 )
 from .visitors import Transformer
 from .lexer import Token, Lexer, TerminalDef
@@ -32,6 +32,7 @@ class LarkOptions:
     cache: Union[bool, str]
     g_regex_flags: int
     use_bytes: bool
+    import_sources: List[Union[str, Callable[[str, str], str]]]
 
 
 class Lark:
@@ -60,6 +61,7 @@ class Lark:
         cache: Union[bool, str] = False,
         g_regex_flags: int = ...,
         use_bytes: bool = False,
+        import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ...,
     ):
         ...
 
diff --git a/lark/lark.py b/lark/lark.py
index 8799610..9877b00 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -90,6 +90,8 @@ class LarkOptions(Serialize):
             Accept an input of type ``bytes`` instead of ``str`` (Python 3 only).
     edit_terminals
             A callback for editing the terminals before parse.
+    import_sources
+            A List of either paths or loader functions to specify from where grammars are imported 
 
     **=== End Options ===**
     """
@@ -115,6 +117,7 @@ class LarkOptions(Serialize):
         'edit_terminals': None,
         'g_regex_flags': 0,
         'use_bytes': False,
+        'import_sources': [],
     }
 
     def __init__(self, options_dict):
@@ -267,7 +270,7 @@ class Lark(Serialize):
         assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )
 
         # Parse the grammar file and compose the grammars (TODO)
-        self.grammar = load_grammar(grammar, self.source, re_module)
+        self.grammar = load_grammar(grammar, self.source, re_module, self.options.import_sources)
 
         # Compile the EBNF grammar into BNF
         self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index cd36e4b..bb5f71a 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -20,7 +20,7 @@ from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transfo
 inline_args = v_args(inline=True)
 
 __path__ = os.path.dirname(__file__)
-IMPORT_PATHS = [os.path.join(__path__, 'grammars')]
+IMPORT_PATHS = ['grammars']
 
 EXT = '.lark'
 
@@ -648,19 +648,35 @@ class Grammar:
         return terminals, compiled_rules, self.ignore
 
 
+def stdlib_loader(base_paths, grammar_path):
+    import pkgutil
+    for path in IMPORT_PATHS:
+        text = pkgutil.get_data('lark', path + '/' + grammar_path)
+        if text is None:
+            continue
+        return '<stdlib:' + grammar_path + '>', text.decode()
+    raise FileNotFoundError()
+
 
 _imported_grammars = {}
-def import_grammar(grammar_path, re_, base_paths=[]):
+def import_grammar(grammar_path, re_, base_paths=(), import_sources=()):
     if grammar_path not in _imported_grammars:
-        import_paths = base_paths + IMPORT_PATHS
-        for import_path in import_paths:
-            with suppress(IOError):
-                joined_path = os.path.join(import_path, grammar_path)
-                with open(joined_path, encoding='utf8') as f:
-                    text = f.read()
-                grammar = load_grammar(text, joined_path, re_)
-                _imported_grammars[grammar_path] = grammar
-                break
+        import_paths = import_sources + base_paths + [stdlib_loader]
+        for source in import_paths:
+            if isinstance(source, str):
+                with suppress(IOError):
+                    joined_path = os.path.join(source, grammar_path)
+                    with open(joined_path, encoding='utf8') as f:
+                        text = f.read()
+                    grammar = load_grammar(text, joined_path, re_, import_sources)
+                    _imported_grammars[grammar_path] = grammar
+                    break
+            else:
+                with suppress(IOError):
+                    joined_path, text = source(base_paths, grammar_path)
+                    grammar = load_grammar(text, joined_path, re_, import_sources)
+                    _imported_grammars[grammar_path] = grammar
+                    break
         else:
             open(grammar_path, encoding='utf8')
             assert False
@@ -817,7 +833,7 @@ class GrammarLoader:
         self.canonize_tree = CanonizeTree()
         self.re_module = re_module
 
-    def load_grammar(self, grammar_text, grammar_name='<?>'):
+    def load_grammar(self, grammar_text, grammar_name='<?>', import_sources=[]):
         "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
 
         try:
@@ -901,7 +917,7 @@ class GrammarLoader:
         # import grammars
         for dotted_path, (base_paths, aliases) in imports.items():
             grammar_path = os.path.join(*dotted_path) + EXT
-            g = import_grammar(grammar_path, self.re_module, base_paths=base_paths)
+            g = import_grammar(grammar_path, self.re_module, base_paths=base_paths, import_sources=import_sources)
             new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)
 
             term_defs += new_td
@@ -981,5 +997,5 @@ class GrammarLoader:
 
 
 
-def load_grammar(grammar, source, re_):
-    return GrammarLoader(re_).load_grammar(grammar, source)
+def load_grammar(grammar, source, re_, import_sources):
+    return GrammarLoader(re_).load_grammar(grammar, source, import_sources)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 83336c5..6779f64 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1782,6 +1782,24 @@ def _make_parser_test(LEXER, PARSER):
             """
             self.assertRaises(IOError, _Lark, grammar)
 
+        def test_import_custom_sources(self):
+            def custom_loader(base_paths, grammar_path):
+                import pkgutil
+                text = pkgutil.get_data('tests', 'grammars/' + grammar_path)
+                if text is None:
+                    raise FileNotFoundError()
+                return '<tests.grammars:' + grammar_path + '>', text.decode()
+
+            grammar = """
+            start: startab
+
+            %import ab.startab
+            """
+
+            p = _Lark(grammar, import_sources=[custom_loader])
+            self.assertEqual(p.parse('ab'),
+                             Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))
+
         @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
         def test_earley_prioritization(self):
             "Tests effect of priority on result"