Merge pull request #816 from lark-parser/MegaIng-grammar_builder

5 years ago · f7442d3be6
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -291,7 +291,7 @@ Declare a terminal without defining it. Useful for plugins.
 ### %override
 Override a rule, affecting all the rules that refer to it.
 Override a rule or terminals, affecting all references to it, even in imported grammars.
 Useful for implementing an inheritance pattern when importing grammars.
@@ -302,3 +302,22 @@ Useful for implementing an inheritance pattern when importing grammars.
 // Add hex support to my_grammar
 %override number: NUMBER | /0x\w+/
 ```
 ### %extend
 Extend the definition of a rule or terminal, e.g. add a new option on what it can match, like when separated with `|`.
 Useful for splitting up a definition of a complex rule with many different options over multiple files.
 Can also be used to implement a plugin system where a core grammar is extended by others.
 **Example:**
 ```perl
 %import my_grammar (start, NUMBER)
 // Add hex support to my_grammar
 %extend NUMBER: /0x\w+/
 ```
 For both `%extend` and `%override`, there is not requirement for a rule/terminal to come from another file, but that is probably the most common usecase
--- a/examples/advanced/extend_python.py
+++ b/examples/advanced/extend_python.py
@@ -0,0 +1,46 @@
 """
 Extend the Python Grammar
 ==============================
 This example demonstrates how to use the `%extend` statement,
 to add new syntax to the example Python grammar.
 """
 from lark.lark import Lark
 from python_parser import PythonIndenter
 GRAMMAR = r"""
 %import .python3 (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT)
 %extend compound_stmt: match_stmt
 match_stmt: "match" test ":" cases
 cases: _NEWLINE _INDENT case+ _DEDENT
 case: "case" test ":" suite // test is not quite correct.
 %ignore /[\t \f]+/          // WS
 %ignore /\\[\t \f]*\r?\n/   // LINE_CONT
 %ignore COMMENT
 """
 parser = Lark(GRAMMAR, parser='lalr', start=['single_input', 'file_input', 'eval_input'], postlex=PythonIndenter())
 tree = parser.parse(r"""
 def name(n):
    match n:
        case 1:
            print("one")
        case 2:
            print("two")
        case _:
            print("number is too big")
 """, start='file_input')
 # Remove the 'python3__' prefix that was add to the implicitely imported rules.
 for t in tree.iter_subtrees():
    t.data = t.data.rsplit('__', 1)[-1]
 print(tree.pretty())
--- a/lark-stubs/init.pyi
+++ b/lark-stubs/init.pyi
@@ -4,6 +4,7 @@ from .tree import *
 from .visitors import *
 from .exceptions import *
 from .lexer import *
 from .load_grammar import *
 from .lark import *
 from logging import Logger as _Logger
--- a/lark-stubs/grammar.pyi
+++ b/lark-stubs/grammar.pyi
@@ -0,0 +1,9 @@
 from typing import Optional, Tuple
 class RuleOptions:
    keep_all_tokens: bool
    expand1: bool
    priority: int
    template_source: Optional[str]
    empty_indices: Tuple[bool, ...]
--- a/lark-stubs/lark.pyi
+++ b/lark-stubs/lark.pyi
@@ -8,6 +8,7 @@ from .visitors import Transformer
 from .lexer import Token, Lexer, TerminalDef
 from .tree import Tree
 from .exceptions import UnexpectedInput
 from .load_grammar import Grammar
 _T = TypeVar('_T')
@@ -54,13 +55,14 @@ class FromPackageLoader:
 class Lark:
    source_path: str
    source_grammar: str
    grammar: Grammar
    options: LarkOptions
    lexer: Lexer
    terminals: List[TerminalDef]
    def __init__(
        self,
        grammar: Union[str, IO[str]],
        grammar: Union[Grammar, str, IO[str]],
        *,
        start: Union[None, str, List[str]] = "start",
        parser: Literal["earley", "lalr", "cyk"] = "auto",
--- a/lark-stubs/load_grammar.pyi
+++ b/lark-stubs/load_grammar.pyi
@@ -0,0 +1,28 @@
 from typing import List, Tuple, Union, Callable, Dict, Optional
 from lark import Tree
 from lark.grammar import RuleOptions
 class Grammar:
    rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]
    term_defs: List[Tuple[str, Tuple[Tree, int]]]
    ignore: List[str]
 class GrammarBuilder:
    global_keep_all_tokens: bool
    import_paths: List[Union[str, Callable]]
    def __init__(self, global_keep_all_tokens=..., import_paths=...): ...
    def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None): ...
    def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str],
                  base_mangle: Callable[[str], str] = None):  ...
    def get_mangle(self, prefix: str, aliases: Dict[str, str], base_mangle: Callable[[str], str] = None): ...
    def check(self): ...
    def build(self) -> Grammar: ...
--- a/lark/grammars/common.lark
+++ b/lark/grammars/common.lark
@@ -55,5 +55,5 @@ NEWLINE: (CR? LF)+
 // Comments
 SH_COMMENT: /#[^\n]*/
 CPP_COMMENT: /\/\/[^\n]*/
 C_COMMENT: "/*" /.*?/s "*/"
 C_COMMENT: "/*" /(.|\n)*?/ "*/"
 SQL_COMMENT: /--[^\n]*/
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -7,7 +7,7 @@ import tempfile
 from warnings import warn
 from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
 from .load_grammar import load_grammar, FromPackageLoader
 from .load_grammar import load_grammar, FromPackageLoader, Grammar
 from .tree import Tree
 from .common import LexerConf, ParserConf
@@ -234,42 +234,50 @@ class Lark(Serialize):
        else:
            grammar = read()
        assert isinstance(grammar, STRING_TYPE)
        self.source_grammar = grammar
        if self.options.use_bytes:
            if not isascii(grammar):
                raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
            if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
                raise ConfigurationError("`use_bytes=True` may have issues on python2."
                                          "Use `use_bytes='force'` to use it at your own risk.")
        cache_fn = None
        if self.options.cache:
            if self.options.parser != 'lalr':
                raise ConfigurationError("cache only works with parser='lalr' for now")
            if isinstance(self.options.cache, STRING_TYPE):
                cache_fn = self.options.cache
            else:
                if self.options.cache is not True:
                    raise ConfigurationError("cache argument must be bool or str")
                unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
                from . import __version__
                options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
                s = grammar + options_str + __version__
                md5 = hashlib.md5(s.encode()).hexdigest()
                cache_fn = tempfile.gettempdir() + '/.lark_cache_%s.tmp' % md5
            if FS.exists(cache_fn):
                logger.debug('Loading grammar from cache: %s', cache_fn)
                # Remove options that aren't relevant for loading from cache
                for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
                    del options[name]
                with FS.open(cache_fn, 'rb') as f:
                    try:
                        self._load(f, **options)
                    except Exception:
                        raise RuntimeError("Failed to load Lark from cache: %r. Try to delete the file and run again." % cache_fn)
                return
        if isinstance(grammar, STRING_TYPE):
            self.source_grammar = grammar
            if self.options.use_bytes:
                if not isascii(grammar):
                    raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
                if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
                    raise ConfigurationError("`use_bytes=True` may have issues on python2."
                                              "Use `use_bytes='force'` to use it at your own risk.")
            if self.options.cache:
                if self.options.parser != 'lalr':
                    raise ConfigurationError("cache only works with parser='lalr' for now")
                if isinstance(self.options.cache, STRING_TYPE):
                    cache_fn = self.options.cache
                else:
                    if self.options.cache is not True:
                        raise ConfigurationError("cache argument must be bool or str")
                    unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
                    from . import __version__
                    options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
                    s = grammar + options_str + __version__
                    md5 = hashlib.md5(s.encode()).hexdigest()
                    cache_fn = tempfile.gettempdir() + '/.lark_cache_%s.tmp' % md5
                if FS.exists(cache_fn):
                    logger.debug('Loading grammar from cache: %s', cache_fn)
                    # Remove options that aren't relevant for loading from cache
                    for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
                        del options[name]
                    with FS.open(cache_fn, 'rb') as f:
                        try:
                            self._load(f, **options)
                        except Exception:
                            raise RuntimeError("Failed to load Lark from cache: %r. Try to delete the file and run again." % cache_fn)
                    return
            # Parse the grammar file and compose the grammars
            self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
        else:
            assert isinstance(grammar, Grammar)
            self.grammar = grammar
        if self.options.lexer == 'auto':
            if self.options.parser == 'lalr':
@@ -301,9 +309,6 @@ class Lark(Serialize):
        if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
            raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
        # Parse the grammar file and compose the grammars
        self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
        if self.options.postlex is not None:
            terminals_to_keep = set(self.options.postlex.always_accept)
        else:
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -6,6 +6,7 @@ from copy import copy, deepcopy
 from io import open
 import pkgutil
 from ast import literal_eval
 from numbers import Integral
 from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start
 from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -95,6 +96,7 @@ TERMINALS = {
    '_IGNORE': r'%ignore',
    '_OVERRIDE': r'%override',
    '_DECLARE': r'%declare',
    '_EXTEND': r'%extend',
    '_IMPORT': r'%import',
    'NUMBER': r'[+-]?\d+',
 }
@@ -102,7 +104,7 @@ TERMINALS = {
 RULES = {
    'start': ['_list'],
    '_list':  ['_item', '_list _item'],
    '_item':  ['rule', 'term', 'statement', '_NL'],
    '_item':  ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],
    'rule': ['RULE template_params _COLON expansions _NL',
             'RULE template_params _DOT NUMBER _COLON expansions _NL'],
@@ -149,8 +151,10 @@ RULES = {
    'term': ['TERMINAL _COLON expansions _NL',
             'TERMINAL _DOT NUMBER _COLON expansions _NL'],
    'statement': ['ignore', 'import', 'declare', 'override_rule'],
    'override_rule': ['_OVERRIDE rule'],
    'override': ['_OVERRIDE rule',
                 '_OVERRIDE term'],
    'extend': ['_EXTEND rule',
               '_EXTEND term'],
    'ignore': ['_IGNORE expansions _NL'],
    'declare': ['_DECLARE _declare_args _NL'],
    'import': ['_IMPORT _import_path _NL',
@@ -298,15 +302,6 @@ class RuleTreeToText(Transformer):
        return expansion, alias.value
@inline_args
 class CanonizeTree(Transformer_InPlace):
    def tokenmods(self, *args):
        if len(args) == 1:
            return list(args)
        tokenmods, value = args
        return tokenmods + [value]
 class PrepareAnonTerminals(Transformer_InPlace):
    """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""
@@ -546,10 +541,6 @@ class PrepareSymbols(Transformer_InPlace):
        assert False
 def _choice_of_rules(rules):
    return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules])
 def nr_deepcopy_tree(t):
    """Deepcopy tree `t` without recursion"""
    return Transformer_NonRecursive(False).transform(t)
@@ -736,69 +727,14 @@ class FromPackageLoader(object):
 stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)
 _imported_grammars = {}
 def import_from_grammar_into_namespace(grammar, namespace, aliases):
    """Returns all rules and terminals of grammar, prepended
    with a 'namespace' prefix, except for those which are aliased.
    """
    imported_terms = dict(grammar.term_defs)
    imported_rules = {n:(n,p,deepcopy(t),o) for n,p,t,o in grammar.rule_defs}
    term_defs = []
    rule_defs = []
    def rule_dependencies(symbol):
        if symbol.type != 'RULE':
            return []
        try:
            _, params, tree,_ = imported_rules[symbol]
        except KeyError:
            raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace))
        return _find_used_symbols(tree) - set(params)
    def get_namespace_name(name, params):
        if params is not None:
            try:
                return params[name]
            except KeyError:
                pass
        try:
            return aliases[name].value
        except KeyError:
            if name[0] == '_':
                return '_%s__%s' % (namespace, name[1:])
            return '%s__%s' % (namespace, name)
    to_import = list(bfs(aliases, rule_dependencies))
    for symbol in to_import:
        if symbol.type == 'TERMINAL':
            term_defs.append([get_namespace_name(symbol, None), imported_terms[symbol]])
        else:
            assert symbol.type == 'RULE'
            _, params, tree, options = imported_rules[symbol]
            params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params}
            for t in tree.iter_subtrees():
                for i, c in enumerate(t.children):
                    if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
                        t.children[i] = Token(c.type, get_namespace_name(c, params_map))
            params = [params_map[p] for p in params]  # We can not rely on ordered dictionaries
            rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options))
    return term_defs, rule_defs
 def resolve_term_references(term_defs):
 def resolve_term_references(term_dict):
    # TODO Solve with transitive closure (maybe)
    term_dict = {k:t for k, (t,_p) in term_defs}
    assert len(term_dict) == len(term_defs), "Same name defined twice?"
    while True:
        changed = False
        for name, (token_tree, _p) in term_defs:
        for name, token_tree in term_dict.items():
            if token_tree is None:  # Terminal added through %declare
                continue
            for exp in token_tree.find_data('value'):
@@ -859,8 +795,25 @@ def _find_used_symbols(tree):
              for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}
 class GrammarLoader:
    ERRORS = [
 def _get_parser():
    try:
        return _get_parser.cache
    except AttributeError:
        terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
        rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
        rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
                 for r, _p, xs, o in rules for i, x in enumerate(xs)]
        callback = ParseTreeBuilder(rules, ST).create_callback()
        import re
        lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
        parser_conf = ParserConf(rules, callback, ['start'])
        lexer_conf.lexer_type = 'standard'
        parser_conf.parser_type = 'lalr'
        _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {})
        return _get_parser.cache
 GRAMMAR_ERRORS = [
        ('Unclosed parenthesis', ['a: (\n']),
        ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
        ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
@@ -874,120 +827,202 @@ class GrammarLoader:
        ('%ignore expects a value', ['%ignore %import\n']),
    ]
    def __init__(self, global_keep_all_tokens):
        terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
 def _parse_grammar(text, name, start='start'):
    try:
        tree = _get_parser().parse(text + '\n', start)
    except UnexpectedCharacters as e:
        context = e.get_context(text)
        raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
                           (e.line, e.column, name, context))
    except UnexpectedToken as e:
        context = e.get_context(text)
        error = e.match_examples(_get_parser().parse, GRAMMAR_ERRORS, use_accepts=True)
        if error:
            raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
        elif 'STRING' in e.expected:
            raise GrammarError("Expecting a value at line %s column %s\n\n%s" % (e.line, e.column, context))
        raise
    return PrepareGrammar().transform(tree)
 def _get_mangle(prefix, aliases, base_mangle=None):
    def mangle(s):
        if s in aliases:
            s = aliases[s]
        else:
            if s[0] == '_':
                s = '_%s__%s' % (prefix, s[1:])
            else:
                s = '%s__%s' % (prefix, s)
        if base_mangle is not None:
            s = base_mangle(s)
        return s
    return mangle
 def _mangle_exp(exp, mangle):
    if mangle is None:
        return exp
    exp = deepcopy(exp) # TODO: is this needed
    for t in exp.iter_subtrees():
        for i, c in enumerate(t.children):
            if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
                t.children[i] = Token(c.type, mangle(c.value))
    return exp
 class GrammarBuilder:
    def __init__(self, global_keep_all_tokens=False, import_paths=None):
        self.global_keep_all_tokens = global_keep_all_tokens
        self.import_paths = import_paths or []
        self._definitions = {}
        self._ignore_names = []
    def _is_term(self, name):
        # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME`
        # Only the last part is the actual name, and the rest might contain mixed case
        return name.rpartition('__')[-1].isupper()
    def _grammar_error(self, msg, *names):
        args = {}
        for i, name in enumerate(names, start=1):
            postfix = '' if i == 1 else str(i)
            args['name' + postfix] = name
            args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)]
            args['Type' + postfix] = lowercase_type.title()
        raise GrammarError(msg.format(**args))
    def _check_options(self, name, options):
        if self._is_term(name):
            if options is None:
                options = 1
            # if we don't use Integral here, we run into python2.7/python3 problems with long vs int
            elif not isinstance(options, Integral):
                raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))
        else:
            if options is None:
                options = RuleOptions()
            elif not isinstance(options, RuleOptions):
                raise GrammarError("Rules require a RuleOptions instance as 'options'")
            if self.global_keep_all_tokens:
                options.keep_all_tokens = True
        return options
    def _define(self, name, exp, params=(), options=None, override=False):
        if name in self._definitions:
            if not override:
                self._grammar_error("{Type} '{name}' defined more than once", name)
        elif override:
            self._grammar_error("Cannot override a nonexisting {type} {name}", name)
        if name.startswith('__'):
            self._grammar_error('Names starting with double-underscore are reserved (Error at {name})', name)
        self._definitions[name] = (params, exp, self._check_options(name, options))
    def _extend(self, name, exp, params=(), options=None):
        if name not in self._definitions:
            self._grammar_error("Can't extend {type} {name} as it wasn't defined before", name)
        if tuple(params) != tuple(self._definitions[name][0]):
            self._grammar_error("Cannot extend {type} with different parameters: {name}", name)
        # TODO: think about what to do with 'options'
        base = self._definitions[name][1]
        while len(base.children) == 2:
            assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', base
            base = base.children[0]
        base.children.insert(0, exp)
    def _ignore(self, exp_or_name):
        if isinstance(exp_or_name, str):
            self._ignore_names.append(exp_or_name)
        else:
            assert isinstance(exp_or_name, Tree)
            t = exp_or_name
            if t.data == 'expansions' and len(t.children) == 1:
                t2 ,= t.children
                if t2.data=='expansion' and len(t2.children) == 1:
                    item ,= t2.children
                    if item.data == 'value':
                        item ,= item.children
                        if isinstance(item, Token) and item.type == 'TERMINAL':
                            self._ignore_names.append(item.value)
                            return
        rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
        rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
                 for r, _p, xs, o in rules for i, x in enumerate(xs)]
        callback = ParseTreeBuilder(rules, ST).create_callback()
        import re
        lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
        parser_conf = ParserConf(rules, callback, ['start'])
        lexer_conf.lexer_type = 'standard'
        parser_conf.parser_type = 'lalr'
        self.parser = ParsingFrontend(lexer_conf, parser_conf, {})
            name = '__IGNORE_%d'% len(self._ignore_names)
            self._ignore_names.append(name)
            self._definitions[name] = ((), t, 1)
        self.canonize_tree = CanonizeTree()
        self.global_keep_all_tokens = global_keep_all_tokens
    def _declare(self, *names):
        for name in names:
            self._define(name, None)
    def import_grammar(self, grammar_path, base_path=None, import_paths=[]):
        if grammar_path not in _imported_grammars:
            # import_paths take priority over base_path since they should handle relative imports and ignore everything else.
            to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
            for source in to_try:
    def _unpack_import(self, stmt, grammar_name):
        if len(stmt.children) > 1:
            path_node, arg1 = stmt.children
        else:
            path_node, = stmt.children
            arg1 = None
        if isinstance(arg1, Tree):  # Multi import
            dotted_path = tuple(path_node.children)
            names = arg1.children
            aliases = dict(zip(names, names))  # Can't have aliased multi import, so all aliases will be the same as names
        else:  # Single import
            dotted_path = tuple(path_node.children[:-1])
            if not dotted_path:
                name ,= path_node.children
                raise GrammarError("Nothing was imported from grammar `%s`" % name)
            name = path_node.children[-1]  # Get name from dotted path
            aliases = {name.value: (arg1 or name).value}  # Aliases if exist
        if path_node.data == 'import_lib':  # Import from library
            base_path = None
        else:  # Relative import
            if grammar_name == '<string>':  # Import relative to script file path if grammar is coded in script
                try:
                    if callable(source):
                        joined_path, text = source(base_path, grammar_path)
                    else:
                        joined_path = os.path.join(source, grammar_path)
                        with open(joined_path, encoding='utf8') as f:
                            text = f.read()
                except IOError:
                    continue
                else:
                    grammar = self.load_grammar(text, joined_path, import_paths)
                    _imported_grammars[grammar_path] = grammar
                    break
                    base_file = os.path.abspath(sys.modules['__main__'].__file__)
                except AttributeError:
                    base_file = None
            else:
                # Search failed. Make Python throw a nice error.
                open(grammar_path, encoding='utf8')
                assert False
        return _imported_grammars[grammar_path]
    def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
        """Parse grammar_text, verify, and create Grammar object. Display nice messages on error."""
        try:
            tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n'))
        except UnexpectedCharacters as e:
            context = e.get_context(grammar_text)
            raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
                               (e.line, e.column, grammar_name, context))
        except UnexpectedToken as e:
            context = e.get_context(grammar_text)
            error = e.match_examples(self.parser.parse, self.ERRORS, use_accepts=True)
            if error:
                raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
            elif 'STRING' in e.expected:
                raise GrammarError("Expecting a value at line %s column %s\n\n%s" % (e.line, e.column, context))
            raise
        tree = PrepareGrammar().transform(tree)
        # Extract grammar items
        defs = classify(tree.children, lambda c: c.data, lambda c: c.children)
        term_defs = defs.pop('term', [])
        rule_defs = defs.pop('rule', [])
        statements = defs.pop('statement', [])
        assert not defs
        term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs]
        term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs]
        rule_defs = [options_from_rule(*x) for x in rule_defs]
        # Execute statements
        ignore, imports = [], {}
        overriding_rules = []
        for (stmt,) in statements:
            if stmt.data == 'ignore':
                t ,= stmt.children
                ignore.append(t)
            elif stmt.data == 'import':
                if len(stmt.children) > 1:
                    path_node, arg1 = stmt.children
                base_file = grammar_name  # Import relative to grammar file path if external grammar file
            if base_file:
                if isinstance(base_file, PackageResource):
                    base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
                else:
                    path_node ,= stmt.children
                    arg1 = None
                if isinstance(arg1, Tree):  # Multi import
                    dotted_path = tuple(path_node.children)
                    names = arg1.children
                    aliases = dict(zip(names, names))  # Can't have aliased multi import, so all aliases will be the same as names
                else:  # Single import
                    dotted_path = tuple(path_node.children[:-1])
                    name = path_node.children[-1]  # Get name from dotted path
                    aliases = {name: arg1 or name}  # Aliases if exist
                if path_node.data == 'import_lib':  # Import from library
                    base_path = None
                else:  # Relative import
                    if grammar_name == '<string>':  # Import relative to script file path if grammar is coded in script
                        try:
                            base_file = os.path.abspath(sys.modules['__main__'].__file__)
                        except AttributeError:
                            base_file = None
                    else:
                        base_file = grammar_name  # Import relative to grammar file path if external grammar file
                    if base_file:
                        if isinstance(base_file, PackageResource):
                            base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
                        else:
                            base_path = os.path.split(base_file)[0]
                    else:
                        base_path = os.path.abspath(os.path.curdir)
                    base_path = os.path.split(base_file)[0]
            else:
                base_path = os.path.abspath(os.path.curdir)
        return dotted_path, base_path, aliases
    def _unpack_definition(self, tree, mangle):
        if tree.data == 'rule':
            name, params, exp, opts = options_from_rule(*tree.children)
        else:
            name = tree.children[0].value
            params = ()     # TODO terminal templates
            opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
            exp = tree.children[-1]
        if mangle is not None:
            params = tuple(mangle(p) for p in params)
            name = mangle(name)
        exp = _mangle_exp(exp, mangle)
        return name, exp, params, opts
    def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None, dotted_path=None):
        tree = _parse_grammar(grammar_text, grammar_name)
        imports = {}
        for stmt in tree.children:
            if stmt.data == 'import':
                dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
                try:
                    import_base_path, import_aliases = imports[dotted_path]
                    assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
@@ -995,110 +1030,126 @@ class GrammarLoader:
                except KeyError:
                    imports[dotted_path] = base_path, aliases
            elif stmt.data == 'declare':
                for t in stmt.children:
                    term_defs.append([t.value, (None, None)])
            elif stmt.data == 'override_rule':
        for dotted_path, (base_path, aliases) in imports.items():
            self.do_import(dotted_path, base_path, aliases, mangle)
        for stmt in tree.children:
            if stmt.data in ('term', 'rule'):
                self._define(*self._unpack_definition(stmt, mangle))
            elif stmt.data == 'override':
                r ,= stmt.children
                self._define(*self._unpack_definition(r, mangle), override=True)
            elif stmt.data == 'extend':
                r ,= stmt.children
                overriding_rules.append(options_from_rule(*r.children))
                self._extend(*self._unpack_definition(r, mangle))
            elif stmt.data == 'ignore':
                # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar
                if mangle is None:
                    self._ignore(*stmt.children)
            elif stmt.data == 'declare':
                names = [t.value for t in stmt.children]
                if mangle is None:
                    self._declare(*names)
                else:
                    self._declare(*map(mangle, names))
            elif stmt.data == 'import':
                pass
            else:
                assert False, stmt
        # import grammars
        for dotted_path, (base_path, aliases) in imports.items():
            grammar_path = os.path.join(*dotted_path) + EXT
            g = self.import_grammar(grammar_path, base_path=base_path, import_paths=import_paths)
            new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)
            term_defs += new_td
            rule_defs += new_rd
        # replace rules by overridding rules, according to name
        for r in overriding_rules:
            name = r[0]
            # remove overridden rule from rule_defs
            overridden, rule_defs = classify_bool(rule_defs, lambda r: r[0] == name)    # FIXME inefficient
            if not overridden:
                raise GrammarError("Cannot override a nonexisting rule: %s" % name)
            rule_defs.append(r)
        ## Handle terminals
        # Verify correctness 1
        for name, _ in term_defs:
            if name.startswith('__'):
                raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
        # Handle ignore tokens
        # XXX A slightly hacky solution. Recognition of %ignore TERMINAL as separate comes from the lexer's
        #     inability to handle duplicate terminals (two names, one value)
        ignore_names = []
        for t in ignore:
            if t.data=='expansions' and len(t.children) == 1:
                t2 ,= t.children
                if t2.data=='expansion' and len(t2.children) == 1:
                    item ,= t2.children
                    if item.data == 'value':
                        item ,= item.children
                        if isinstance(item, Token) and item.type == 'TERMINAL':
                            ignore_names.append(item.value)
                            continue
            name = '__IGNORE_%d'% len(ignore_names)
            ignore_names.append(name)
            term_defs.append((name, (t, 1)))
        term_defs = { name: exp
            for name, (_params, exp, _options) in self._definitions.items()
            if self._is_term(name)
        }
        resolve_term_references(term_defs)
        # Verify correctness 2
        terminal_names = set()
        for name, _ in term_defs:
            if name in terminal_names:
                raise GrammarError("Terminal '%s' defined more than once" % name)
            terminal_names.add(name)
        if set(ignore_names) > terminal_names:
            raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names))
    def _remove_unused(self, used):
        def rule_dependencies(symbol):
            if self._is_term(symbol):
                return []
            params, tree,_ = self._definitions[symbol]
            return _find_used_symbols(tree) - set(params)
        resolve_term_references(term_defs)
        _used = set(bfs(used, rule_dependencies))
        self._definitions = {k: v for k, v in self._definitions.items() if k in _used}
        ## Handle rules
        rule_names = {}
        for name, params, _x, option in rule_defs:
            # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
            if self.global_keep_all_tokens:
                option.keep_all_tokens = True
    def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
        assert dotted_path
        mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
        grammar_path = os.path.join(*dotted_path) + EXT
        to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
        for source in to_try:
            try:
                if callable(source):
                    joined_path, text = source(base_path, grammar_path)
                else:
                    joined_path = os.path.join(source, grammar_path)
                    with open(joined_path, encoding='utf8') as f:
                        text = f.read()
            except IOError:
                continue
            else:
                gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths)
                gb.load_grammar(text, joined_path, mangle, dotted_path)
                gb._remove_unused(map(mangle, aliases))
                for name in gb._definitions:
                    if name in self._definitions:
                        raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))
                self._definitions.update(**gb._definitions)
                break
        else:
            # Search failed. Make Python throw a nice error.
            open(grammar_path, encoding='utf8')
            assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)
            if name.startswith('__'):
                raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
            if name in rule_names:
                raise GrammarError("Rule '%s' defined more than once" % name)
            rule_names[name] = len(params)
        for name, params , expansions, _o in rule_defs:
    def validate(self):
        for name, (params, exp, _options) in self._definitions.items():
            for i, p in enumerate(params):
                if p in rule_names:
                if p in self._definitions:
                    raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
                if p in params[:i]:
                    raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
            for temp in expansions.find_data('template_usage'):
            if exp is None: # Remaining checks don't apply to abstract rules/terminals
                continue
            for temp in exp.find_data('template_usage'):
                sym = temp.children[0]
                args = temp.children[1:]
                if sym not in params:
                    if sym not in rule_names:
                        raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name))
                    if len(args) != rule_names[sym]:
                        raise GrammarError("Wrong number of template arguments used for %s "
                                           "(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name))
            for sym in _find_used_symbols(expansions):
                if sym.type == 'TERMINAL':
                    if sym not in terminal_names:
                        raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name))
                else:
                    if sym not in rule_names and sym not in params:
                        raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))
        return Grammar(rule_defs, term_defs, ignore_names)
                    if sym not in self._definitions:
                        self._grammar_error("Template '%s' used but not defined (in {type} {name})" % sym, name)
                    if len(args) != len(self._definitions[sym][0]):
                        expected, actual = len(self._definitions[sym][0]), len(args)
                        self._grammar_error("Wrong number of template arguments used for {name} "
                                            "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)
            for sym in _find_used_symbols(exp):
                if sym not in self._definitions and sym not in params:
                    self._grammar_error("{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)
        if not set(self._definitions).issuperset(self._ignore_names):
            raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))
    def build(self):
        self.validate()
        rule_defs = []
        term_defs = []
        for name, (params, exp, options) in self._definitions.items():
            if self._is_term(name):
                assert len(params) == 0
                term_defs.append((name, (exp, options)))
            else:
                rule_defs.append((name, params, exp, options))
        # resolve_term_references(term_defs)
        return Grammar(rule_defs, term_defs, self._ignore_names)
 def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
    return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths)
    builder = GrammarBuilder(global_keep_all_tokens, import_paths)
    builder.load_grammar(grammar, source)
    return builder.build()
--- a/tests/test_grammar.py
+++ b/tests/test_grammar.py
@@ -3,8 +3,9 @@ from __future__ import absolute_import
 import sys
 from unittest import TestCase, main
 from lark import Lark
 from lark.load_grammar import GrammarLoader, GrammarError
 from lark import Lark, Token, Tree
 from lark.load_grammar import GrammarError, GRAMMAR_ERRORS
 from lark.load_grammar import FromPackageLoader
 class TestGrammar(TestCase):
@@ -12,7 +13,7 @@ class TestGrammar(TestCase):
        pass
    def test_errors(self):
        for msg, examples in GrammarLoader.ERRORS:
        for msg, examples in GRAMMAR_ERRORS:
            for example in examples:
                try:
                    p = Lark(example)
@@ -21,7 +22,7 @@ class TestGrammar(TestCase):
                else:
                    assert False, "example did not raise an error"
    def test_override(self):
    def test_override_rule(self):
        # Overrides the 'sep' template in existing grammar to add an optional terminating delimiter
        # Thus extending it beyond its original capacity
        p = Lark("""
@@ -29,12 +30,135 @@ class TestGrammar(TestCase):
            %override sep{item, delim}: item (delim item)* delim?
            %ignore " "
        """)
        """, source_path=__file__)
        a = p.parse('[1, 2, 3]')
        b = p.parse('[1, 2, 3, ]')
        assert a == b
        self.assertRaises(GrammarError, Lark, """
            %import .test_templates_import (start, sep)
            %override sep{item}: item (delim item)* delim?
        """)
        self.assertRaises(GrammarError, Lark, """
            %override sep{item}: item (delim item)* delim?
        """)
    def test_override_terminal(self):
        p = Lark("""
            %import .grammars.ab (startab, A, B)
            %override A: "c"
            %override B: "d"
        """, start='startab', source_path=__file__)
        a = p.parse('cd')
        self.assertEqual(a.children[0].children, [Token('A', 'c'), Token('B', 'd')])
    def test_extend_rule(self):
        p = Lark("""
            %import .grammars.ab (startab, A, B, expr)
            %extend expr: B A
        """, start='startab', source_path=__file__)
        a = p.parse('abab')
        self.assertEqual(a.children[0].children, ['a', Tree('expr', ['b', 'a']), 'b'])
        self.assertRaises(GrammarError, Lark, """
            %extend expr: B A
        """)
    def test_extend_term(self):
        p = Lark("""
            %import .grammars.ab (startab, A, B, expr)
            %extend A: "c"
        """, start='startab', source_path=__file__)
        a = p.parse('acbb')
        self.assertEqual(a.children[0].children, ['a', Tree('expr', ['c', 'b']), 'b'])
    def test_extend_twice(self):
        p = Lark("""
            start: x+
            x: "a"
            %extend x: "b"
            %extend x: "c"
        """)
        assert p.parse("abccbba") == p.parse("cbabbbb")
    def test_undefined_ignore(self):
        g = """!start: "A"
            %ignore B
            """
        self.assertRaises( GrammarError, Lark, g)
        g = """!start: "A"
            %ignore start
            """
        self.assertRaises( GrammarError, Lark, g)
    def test_alias_in_terminal(self):
        g = """start: TERM
            TERM: "a" -> alias
            """
        self.assertRaises( GrammarError, Lark, g)
    def test_undefined_rule(self):
        self.assertRaises(GrammarError, Lark, """start: a""")
    def test_undefined_term(self):
        self.assertRaises(GrammarError, Lark, """start: A""")
    def test_token_multiline_only_works_with_x_flag(self):
        g = r"""start: ABC
                ABC: /  a      b c
                            d
                            e f
                        /i
                    """
        self.assertRaises( GrammarError, Lark, g)
    def test_import_custom_sources(self):
        custom_loader = FromPackageLoader('tests', ('grammars', ))
        grammar = """
        start: startab
        %import ab.startab
        """
        p = Lark(grammar, import_paths=[custom_loader])
        self.assertEqual(p.parse('ab'),
                            Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))
    def test_import_custom_sources2(self):
        custom_loader = FromPackageLoader('tests', ('grammars', ))
        grammar = """
        start: rule_to_import
        %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
        """
        p = Lark(grammar, import_paths=[custom_loader])
        x = p.parse('N')
        self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
    def test_import_custom_sources3(self):
        custom_loader2 = FromPackageLoader('tests')
        grammar = """
        %import .test_relative_import (start, WS)
        %ignore WS
        """
        p = Lark(grammar, import_paths=[custom_loader2], source_path=__file__) # import relative to current file
        x = p.parse('12 capybaras')
        self.assertEqual(x.children, ['12', 'capybaras'])
 if __name__ == '__main__':
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -11,7 +11,6 @@ from copy import copy, deepcopy
 from lark.utils import Py36, isascii
 from lark import Token
 from lark.load_grammar import FromPackageLoader
 try:
    from cStringIO import StringIO as cStringIO
@@ -1380,12 +1379,6 @@ def _make_parser_test(LEXER, PARSER):
        #                  A: "a"  """)
        #     self.assertRaises(LexError, g.parse, 'aab')
        def test_undefined_rule(self):
            self.assertRaises(GrammarError, _Lark, """start: a""")
        def test_undefined_token(self):
            self.assertRaises(GrammarError, _Lark, """start: A""")
        def test_rule_collision(self):
            g = _Lark("""start: "a"+ "b"
                             | "a"+ """)
@@ -1619,15 +1612,6 @@ def _make_parser_test(LEXER, PARSER):
            x = g.parse('abcdef')
            self.assertEqual(x.children, ['abcdef'])
        def test_token_multiline_only_works_with_x_flag(self):
            g = r"""start: ABC
                    ABC: /  a      b c
                              d
                                e f
                            /i
                      """
            self.assertRaises( GrammarError, _Lark, g)
        @unittest.skipIf(PARSER == 'cyk', "No empty rules")
        def test_twice_empty(self):
            g = """!start: ("A"?)?
@@ -1639,18 +1623,6 @@ def _make_parser_test(LEXER, PARSER):
            tree = l.parse('')
            self.assertEqual(tree.children, [])
        def test_undefined_ignore(self):
            g = """!start: "A"
                %ignore B
                """
            self.assertRaises( GrammarError, _Lark, g)
        def test_alias_in_terminal(self):
            g = """start: TERM
                TERM: "a" -> alias
                """
            self.assertRaises( GrammarError, _Lark, g)
        def test_line_and_column(self):
            g = r"""!start: "A" bc "D"
@@ -1950,36 +1922,6 @@ def _make_parser_test(LEXER, PARSER):
            parser = _Lark(grammar, postlex=CustomIndenter())
            parser.parse("a\n    b\n")
        def test_import_custom_sources(self):
            custom_loader = FromPackageLoader('tests', ('grammars', ))
            grammar = """
            start: startab
            %import ab.startab
            """
            p = _Lark(grammar, import_paths=[custom_loader])
            self.assertEqual(p.parse('ab'),
                             Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))
            grammar = """
            start: rule_to_import
            %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
            """
            p = _Lark(grammar, import_paths=[custom_loader])
            x = p.parse('N')
            self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
            custom_loader2 = FromPackageLoader('tests')
            grammar = """
            %import .test_relative_import (start, WS)
            %ignore WS
            """
            p = _Lark(grammar, import_paths=[custom_loader2], source_path=__file__) # import relative to current file
            x = p.parse('12 capybaras')
            self.assertEqual(x.children, ['12', 'capybaras'])
        @unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK")
        def test_prioritization(self):