diff --git a/examples/advanced/create_ast.py b/examples/advanced/create_ast.py new file mode 100644 index 0000000..e20b7e6 --- /dev/null +++ b/examples/advanced/create_ast.py @@ -0,0 +1,105 @@ +""" + This example demonstrates how to transform a parse-tree into an AST using `lark.ast_utils`. + + This example only works with Python 3. +""" + +import sys +from typing import List +from dataclasses import dataclass + +from lark import Lark, ast_utils, Transformer, v_args + +this_module = sys.modules[__name__] + + +# +# Define AST +# +class _Ast(ast_utils.Ast): + pass + +class _Statement(_Ast): + pass + +@dataclass +class Value(_Ast): + value: object + +@dataclass +class Name(_Ast): + name: str + +@dataclass +class CodeBlock(_Ast, ast_utils.AsList): + statements: List[_Statement] + +@dataclass +class If(_Statement): + cond: Value + then: CodeBlock + +@dataclass +class SetVar(_Statement): + name: str + value: Value + +@dataclass +class Print(_Statement): + value: Value + + +class ToAst(Transformer): + def STRING(self, s): + # Remove quotation marks + return s[1:-1] + + def DEC_NUMBER(self, n): + return int(n) + + @v_args(inline=True) + def start(self, x): + return x + +# +# Define Parser +# + +parser = Lark(""" + start: code_block + + code_block: statement+ + + ?statement: if | set_var | print + + if: "if" value "{" code_block "}" + set_var: NAME "=" value ";" + print: "print" value ";" + + value: name | STRING | DEC_NUMBER + name: NAME + + %import python (NAME, STRING, DEC_NUMBER) + %import common.WS + %ignore WS + """, + parser="lalr", +) + +transformer = ast_utils.create_transformer(this_module, ToAst()) + +def parse(text): + return transformer.transform(parser.parse(text)) + +# +# Test +# + +if __name__ == '__main__': + print(parse(""" + a = 1; + if a { + print "a is 1"; + a = 2; + } + """)) diff --git a/lark-stubs/ast_utils.pyi b/lark-stubs/ast_utils.pyi new file mode 100644 index 0000000..28246cf --- /dev/null +++ b/lark-stubs/ast_utils.pyi @@ -0,0 +1,17 @@ +import types +from typing import Optional + +from .visitors import Transformer + +class Ast(object): + pass + +class AsList(object): + pass + + +def create_transformer( + ast_module: types.ModuleType, + transformer: Optional[Transformer]=None +) -> Transformer: + ... \ No newline at end of file diff --git a/lark-stubs/grammar.pyi b/lark-stubs/grammar.pyi index 379d7a9..3a3d806 100644 --- a/lark-stubs/grammar.pyi +++ b/lark-stubs/grammar.pyi @@ -6,4 +6,9 @@ class RuleOptions: expand1: bool priority: int template_source: Optional[str] - empty_indices: Tuple[bool, ...] \ No newline at end of file + empty_indices: Tuple[bool, ...] + + +class Symbol: + name: str + is_term: bool diff --git a/lark-stubs/indenter.pyi b/lark-stubs/indenter.pyi index f0cf8a2..3a7aa97 100644 --- a/lark-stubs/indenter.pyi +++ b/lark-stubs/indenter.pyi @@ -3,9 +3,10 @@ from typing import Tuple, List, Iterator, Optional from abc import ABC, abstractmethod from .lexer import Token +from .lark import PostLex -class Indenter(ABC): +class Indenter(PostLex, ABC): paren_level: Optional[int] indent_level: Optional[List[int]] @@ -15,13 +16,6 @@ class Indenter(ABC): def handle_NL(self, token: Token) -> Iterator[Token]: ... - def process(self, stream: Iterator[Token]) -> Iterator[Token]: - ... - - @property - def always_accept(self) -> Tuple[str]: - ... - @property @abstractmethod def NL_type(self) -> str: diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index 9246938..fba567b 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -65,7 +65,7 @@ class Lark: grammar: Union[Grammar, str, IO[str]], *, start: Union[None, str, List[str]] = "start", - parser: Literal["earley", "lalr", "cyk"] = "auto", + parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", transformer: Optional[Transformer] = None, postlex: Optional[PostLex] = None, diff --git a/lark-stubs/load_grammar.pyi b/lark-stubs/load_grammar.pyi index 0521e33..7202fa8 100644 --- a/lark-stubs/load_grammar.pyi +++ b/lark-stubs/load_grammar.pyi @@ -1,8 +1,8 @@ from typing import List, Tuple, Union, Callable, Dict, Optional -from lark import Tree -from lark.grammar import RuleOptions -from lark.exceptions import UnexpectedInput +from .tree import Tree +from .grammar import RuleOptions +from .exceptions import UnexpectedInput class Grammar: diff --git a/lark-stubs/reconstruct.pyi b/lark-stubs/reconstruct.pyi index 2824fa7..9826428 100644 --- a/lark-stubs/reconstruct.pyi +++ b/lark-stubs/reconstruct.pyi @@ -11,8 +11,7 @@ from .lexer import TerminalDef class WriteTokensTransformer(Transformer_InPlace): - def __init__(self, tokens: Dict[str, TerminalDef], Dict[str, Callable[[Symbol], str]] = ...): - ... + def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... class MatchTree(Tree): diff --git a/lark/ast_utils.py b/lark/ast_utils.py new file mode 100644 index 0000000..27b00a3 --- /dev/null +++ b/lark/ast_utils.py @@ -0,0 +1,51 @@ +""" + Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree +""" + +import inspect, re + +from lark import Transformer, v_args + +class Ast(object): + """Abstract class + + Subclasses will be collected by `create_transformer()` + """ + pass + +class AsList(object): + """Abstract class + + Subclasses will be instanciated with the parse results as a single list, instead of as arguments. + """ + +def camel_to_snake(name): + return re.sub(r'(? "code_block". + + Parameters: + ast_module - A Python module containing all the subclasses of `ast_utils.Ast` + Classes starting with an underscore (`_`) will be skipped. + transformer (Optional[Transformer]) - An initial transformer. Its attributes may be overwritten. + """ + t = transformer or Transformer() + + for name, obj in inspect.getmembers(ast_module): + if not name.startswith('_') and inspect.isclass(obj): + if issubclass(obj, Ast): + if not issubclass(obj, AsList): + obj = inline(obj).__get__(t) + + setattr(t, camel_to_snake(name), obj) + + return t \ No newline at end of file diff --git a/lark/indenter.py b/lark/indenter.py index 7663be7..7e1263d 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -1,13 +1,14 @@ "Provides Indentation services for languages with indentation similar to Python" from .exceptions import LarkError +from .lark import PostLex from .lexer import Token ###{standalone class DedentError(LarkError): pass -class Indenter: +class Indenter(PostLex): def __init__(self): self.paren_level = None self.indent_level = None diff --git a/lark/lark.py b/lark/lark.py index 23063d4..ba98d16 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -1,4 +1,6 @@ from __future__ import absolute_import + + from lark.exceptions import ConfigurationError, assert_config import sys, os, pickle, hashlib @@ -6,7 +8,7 @@ from io import open import tempfile from warnings import warn -from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger +from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod from .load_grammar import load_grammar, FromPackageLoader, Grammar from .tree import Tree from .common import LexerConf, ParserConf @@ -191,6 +193,14 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') +class PostLex(ABC): + @abstractmethod + def process(self, stream): + return stream + + always_accept = () + + class Lark(Serialize): """Main interface for the library. @@ -288,7 +298,12 @@ class Lark(Serialize): if self.options.parser == 'lalr': self.options.lexer = 'contextual' elif self.options.parser == 'earley': - self.options.lexer = 'dynamic' + if self.options.postlex is not None: + logger.info("postlex can't be used with the dynamic lexer, so we use standard instead. " + "Consider using lalr with contextual instead of earley") + self.options.lexer = 'standard' + else: + self.options.lexer = 'dynamic' elif self.options.parser == 'cyk': self.options.lexer = 'standard' else: @@ -298,6 +313,8 @@ class Lark(Serialize): assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance else: assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) + if self.options.postlex is not None and 'dynamic' in lexer: + raise ConfigurationError("Can't use postlex with a dynamic lexer. Use standard or contextual instead") if self.options.ambiguity == 'auto': if self.options.parser == 'earley': diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 51fb297..d98805c 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -72,6 +72,7 @@ class ParsingFrontend(Serialize): lexer_type = lexer_conf.lexer_type self.skip_lexer = False if lexer_type in ('dynamic', 'dynamic_complete'): + assert lexer_conf.postlex is None self.skip_lexer = True return diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 424dd95..038e181 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -56,7 +56,6 @@ EXTRACT_STANDALONE_FILES = [ 'utils.py', 'tree.py', 'visitors.py', - 'indenter.py', 'grammar.py', 'lexer.py', 'common.py', @@ -65,6 +64,7 @@ EXTRACT_STANDALONE_FILES = [ 'parsers/lalr_analysis.py', 'parser_frontends.py', 'lark.py', + 'indenter.py', ] def extract_sections(lines): diff --git a/lark/utils.py b/lark/utils.py index 023b118..f3bd957 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -12,6 +12,15 @@ logger.addHandler(logging.StreamHandler()) # By default, we should not output any log messages logger.setLevel(logging.CRITICAL) +if sys.version_info[0]>2: + from abc import ABC, abstractmethod +else: + from abc import ABCMeta, abstractmethod + class ABC(object): # Provide Python27 compatibility + __slots__ = () + __metclass__ = ABCMeta + + Py36 = (sys.version_info[:2] >= (3, 6)) NO_VALUE = object()