@@ -0,0 +1,105 @@ | |||
""" | |||
This example demonstrates how to transform a parse-tree into an AST using `lark.ast_utils`. | |||
This example only works with Python 3. | |||
""" | |||
import sys | |||
from typing import List | |||
from dataclasses import dataclass | |||
from lark import Lark, ast_utils, Transformer, v_args | |||
this_module = sys.modules[__name__] | |||
# | |||
# Define AST | |||
# | |||
class _Ast(ast_utils.Ast): | |||
pass | |||
class _Statement(_Ast): | |||
pass | |||
@dataclass | |||
class Value(_Ast): | |||
value: object | |||
@dataclass | |||
class Name(_Ast): | |||
name: str | |||
@dataclass | |||
class CodeBlock(_Ast, ast_utils.AsList): | |||
statements: List[_Statement] | |||
@dataclass | |||
class If(_Statement): | |||
cond: Value | |||
then: CodeBlock | |||
@dataclass | |||
class SetVar(_Statement): | |||
name: str | |||
value: Value | |||
@dataclass | |||
class Print(_Statement): | |||
value: Value | |||
class ToAst(Transformer): | |||
def STRING(self, s): | |||
# Remove quotation marks | |||
return s[1:-1] | |||
def DEC_NUMBER(self, n): | |||
return int(n) | |||
@v_args(inline=True) | |||
def start(self, x): | |||
return x | |||
# | |||
# Define Parser | |||
# | |||
parser = Lark(""" | |||
start: code_block | |||
code_block: statement+ | |||
?statement: if | set_var | print | |||
if: "if" value "{" code_block "}" | |||
set_var: NAME "=" value ";" | |||
print: "print" value ";" | |||
value: name | STRING | DEC_NUMBER | |||
name: NAME | |||
%import python (NAME, STRING, DEC_NUMBER) | |||
%import common.WS | |||
%ignore WS | |||
""", | |||
parser="lalr", | |||
) | |||
transformer = ast_utils.create_transformer(this_module, ToAst()) | |||
def parse(text): | |||
return transformer.transform(parser.parse(text)) | |||
# | |||
# Test | |||
# | |||
if __name__ == '__main__': | |||
print(parse(""" | |||
a = 1; | |||
if a { | |||
print "a is 1"; | |||
a = 2; | |||
} | |||
""")) |
@@ -0,0 +1,17 @@ | |||
import types | |||
from typing import Optional | |||
from .visitors import Transformer | |||
class Ast(object): | |||
pass | |||
class AsList(object): | |||
pass | |||
def create_transformer( | |||
ast_module: types.ModuleType, | |||
transformer: Optional[Transformer]=None | |||
) -> Transformer: | |||
... |
@@ -6,4 +6,9 @@ class RuleOptions: | |||
expand1: bool | |||
priority: int | |||
template_source: Optional[str] | |||
empty_indices: Tuple[bool, ...] | |||
empty_indices: Tuple[bool, ...] | |||
class Symbol: | |||
name: str | |||
is_term: bool |
@@ -3,9 +3,10 @@ | |||
from typing import Tuple, List, Iterator, Optional | |||
from abc import ABC, abstractmethod | |||
from .lexer import Token | |||
from .lark import PostLex | |||
class Indenter(ABC): | |||
class Indenter(PostLex, ABC): | |||
paren_level: Optional[int] | |||
indent_level: Optional[List[int]] | |||
@@ -15,13 +16,6 @@ class Indenter(ABC): | |||
def handle_NL(self, token: Token) -> Iterator[Token]: | |||
... | |||
def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||
... | |||
@property | |||
def always_accept(self) -> Tuple[str]: | |||
... | |||
@property | |||
@abstractmethod | |||
def NL_type(self) -> str: | |||
@@ -65,7 +65,7 @@ class Lark: | |||
grammar: Union[Grammar, str, IO[str]], | |||
*, | |||
start: Union[None, str, List[str]] = "start", | |||
parser: Literal["earley", "lalr", "cyk"] = "auto", | |||
parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", | |||
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | |||
transformer: Optional[Transformer] = None, | |||
postlex: Optional[PostLex] = None, | |||
@@ -1,8 +1,8 @@ | |||
from typing import List, Tuple, Union, Callable, Dict, Optional | |||
from lark import Tree | |||
from lark.grammar import RuleOptions | |||
from lark.exceptions import UnexpectedInput | |||
from .tree import Tree | |||
from .grammar import RuleOptions | |||
from .exceptions import UnexpectedInput | |||
class Grammar: | |||
@@ -11,8 +11,7 @@ from .lexer import TerminalDef | |||
class WriteTokensTransformer(Transformer_InPlace): | |||
def __init__(self, tokens: Dict[str, TerminalDef], Dict[str, Callable[[Symbol], str]] = ...): | |||
... | |||
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... | |||
class MatchTree(Tree): | |||
@@ -0,0 +1,51 @@ | |||
""" | |||
Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree | |||
""" | |||
import inspect, re | |||
from lark import Transformer, v_args | |||
class Ast(object): | |||
"""Abstract class | |||
Subclasses will be collected by `create_transformer()` | |||
""" | |||
pass | |||
class AsList(object): | |||
"""Abstract class | |||
Subclasses will be instanciated with the parse results as a single list, instead of as arguments. | |||
""" | |||
def camel_to_snake(name): | |||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower() | |||
def _call(func, _data, children, _meta): | |||
return func(*children) | |||
inline = v_args(wrapper=_call) | |||
def create_transformer(ast_module, transformer=None): | |||
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | |||
For each class, we create a corresponding rule in the transformer, with a matching name. | |||
CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block". | |||
Parameters: | |||
ast_module - A Python module containing all the subclasses of `ast_utils.Ast` | |||
Classes starting with an underscore (`_`) will be skipped. | |||
transformer (Optional[Transformer]) - An initial transformer. Its attributes may be overwritten. | |||
""" | |||
t = transformer or Transformer() | |||
for name, obj in inspect.getmembers(ast_module): | |||
if not name.startswith('_') and inspect.isclass(obj): | |||
if issubclass(obj, Ast): | |||
if not issubclass(obj, AsList): | |||
obj = inline(obj).__get__(t) | |||
setattr(t, camel_to_snake(name), obj) | |||
return t |
@@ -1,13 +1,14 @@ | |||
"Provides Indentation services for languages with indentation similar to Python" | |||
from .exceptions import LarkError | |||
from .lark import PostLex | |||
from .lexer import Token | |||
###{standalone | |||
class DedentError(LarkError): | |||
pass | |||
class Indenter: | |||
class Indenter(PostLex): | |||
def __init__(self): | |||
self.paren_level = None | |||
self.indent_level = None | |||
@@ -1,4 +1,6 @@ | |||
from __future__ import absolute_import | |||
from lark.exceptions import ConfigurationError, assert_config | |||
import sys, os, pickle, hashlib | |||
@@ -6,7 +8,7 @@ from io import open | |||
import tempfile | |||
from warnings import warn | |||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | |||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod | |||
from .load_grammar import load_grammar, FromPackageLoader, Grammar | |||
from .tree import Tree | |||
from .common import LexerConf, ParserConf | |||
@@ -191,6 +193,14 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | |||
class PostLex(ABC): | |||
@abstractmethod | |||
def process(self, stream): | |||
return stream | |||
always_accept = () | |||
class Lark(Serialize): | |||
"""Main interface for the library. | |||
@@ -288,7 +298,12 @@ class Lark(Serialize): | |||
if self.options.parser == 'lalr': | |||
self.options.lexer = 'contextual' | |||
elif self.options.parser == 'earley': | |||
self.options.lexer = 'dynamic' | |||
if self.options.postlex is not None: | |||
logger.info("postlex can't be used with the dynamic lexer, so we use standard instead. " | |||
"Consider using lalr with contextual instead of earley") | |||
self.options.lexer = 'standard' | |||
else: | |||
self.options.lexer = 'dynamic' | |||
elif self.options.parser == 'cyk': | |||
self.options.lexer = 'standard' | |||
else: | |||
@@ -298,6 +313,8 @@ class Lark(Serialize): | |||
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance | |||
else: | |||
assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) | |||
if self.options.postlex is not None and 'dynamic' in lexer: | |||
raise ConfigurationError("Can't use postlex with a dynamic lexer. Use standard or contextual instead") | |||
if self.options.ambiguity == 'auto': | |||
if self.options.parser == 'earley': | |||
@@ -72,6 +72,7 @@ class ParsingFrontend(Serialize): | |||
lexer_type = lexer_conf.lexer_type | |||
self.skip_lexer = False | |||
if lexer_type in ('dynamic', 'dynamic_complete'): | |||
assert lexer_conf.postlex is None | |||
self.skip_lexer = True | |||
return | |||
@@ -56,7 +56,6 @@ EXTRACT_STANDALONE_FILES = [ | |||
'utils.py', | |||
'tree.py', | |||
'visitors.py', | |||
'indenter.py', | |||
'grammar.py', | |||
'lexer.py', | |||
'common.py', | |||
@@ -65,6 +64,7 @@ EXTRACT_STANDALONE_FILES = [ | |||
'parsers/lalr_analysis.py', | |||
'parser_frontends.py', | |||
'lark.py', | |||
'indenter.py', | |||
] | |||
def extract_sections(lines): | |||
@@ -12,6 +12,15 @@ logger.addHandler(logging.StreamHandler()) | |||
# By default, we should not output any log messages | |||
logger.setLevel(logging.CRITICAL) | |||
if sys.version_info[0]>2: | |||
from abc import ABC, abstractmethod | |||
else: | |||
from abc import ABCMeta, abstractmethod | |||
class ABC(object): # Provide Python27 compatibility | |||
__slots__ = () | |||
__metclass__ = ABCMeta | |||
Py36 = (sys.version_info[:2] >= (3, 6)) | |||
NO_VALUE = object() | |||