@@ -0,0 +1,105 @@ | |||||
""" | |||||
This example demonstrates how to transform a parse-tree into an AST using `lark.ast_utils`. | |||||
This example only works with Python 3. | |||||
""" | |||||
import sys | |||||
from typing import List | |||||
from dataclasses import dataclass | |||||
from lark import Lark, ast_utils, Transformer, v_args | |||||
this_module = sys.modules[__name__] | |||||
# | |||||
# Define AST | |||||
# | |||||
class _Ast(ast_utils.Ast): | |||||
pass | |||||
class _Statement(_Ast): | |||||
pass | |||||
@dataclass | |||||
class Value(_Ast): | |||||
value: object | |||||
@dataclass | |||||
class Name(_Ast): | |||||
name: str | |||||
@dataclass | |||||
class CodeBlock(_Ast, ast_utils.AsList): | |||||
statements: List[_Statement] | |||||
@dataclass | |||||
class If(_Statement): | |||||
cond: Value | |||||
then: CodeBlock | |||||
@dataclass | |||||
class SetVar(_Statement): | |||||
name: str | |||||
value: Value | |||||
@dataclass | |||||
class Print(_Statement): | |||||
value: Value | |||||
class ToAst(Transformer): | |||||
def STRING(self, s): | |||||
# Remove quotation marks | |||||
return s[1:-1] | |||||
def DEC_NUMBER(self, n): | |||||
return int(n) | |||||
@v_args(inline=True) | |||||
def start(self, x): | |||||
return x | |||||
# | |||||
# Define Parser | |||||
# | |||||
parser = Lark(""" | |||||
start: code_block | |||||
code_block: statement+ | |||||
?statement: if | set_var | print | |||||
if: "if" value "{" code_block "}" | |||||
set_var: NAME "=" value ";" | |||||
print: "print" value ";" | |||||
value: name | STRING | DEC_NUMBER | |||||
name: NAME | |||||
%import python (NAME, STRING, DEC_NUMBER) | |||||
%import common.WS | |||||
%ignore WS | |||||
""", | |||||
parser="lalr", | |||||
) | |||||
transformer = ast_utils.create_transformer(this_module, ToAst()) | |||||
def parse(text): | |||||
return transformer.transform(parser.parse(text)) | |||||
# | |||||
# Test | |||||
# | |||||
if __name__ == '__main__': | |||||
print(parse(""" | |||||
a = 1; | |||||
if a { | |||||
print "a is 1"; | |||||
a = 2; | |||||
} | |||||
""")) |
@@ -0,0 +1,17 @@ | |||||
import types | |||||
from typing import Optional | |||||
from .visitors import Transformer | |||||
class Ast(object): | |||||
pass | |||||
class AsList(object): | |||||
pass | |||||
def create_transformer( | |||||
ast_module: types.ModuleType, | |||||
transformer: Optional[Transformer]=None | |||||
) -> Transformer: | |||||
... |
@@ -6,4 +6,9 @@ class RuleOptions: | |||||
expand1: bool | expand1: bool | ||||
priority: int | priority: int | ||||
template_source: Optional[str] | template_source: Optional[str] | ||||
empty_indices: Tuple[bool, ...] | |||||
empty_indices: Tuple[bool, ...] | |||||
class Symbol: | |||||
name: str | |||||
is_term: bool |
@@ -3,9 +3,10 @@ | |||||
from typing import Tuple, List, Iterator, Optional | from typing import Tuple, List, Iterator, Optional | ||||
from abc import ABC, abstractmethod | from abc import ABC, abstractmethod | ||||
from .lexer import Token | from .lexer import Token | ||||
from .lark import PostLex | |||||
class Indenter(ABC): | |||||
class Indenter(PostLex, ABC): | |||||
paren_level: Optional[int] | paren_level: Optional[int] | ||||
indent_level: Optional[List[int]] | indent_level: Optional[List[int]] | ||||
@@ -15,13 +16,6 @@ class Indenter(ABC): | |||||
def handle_NL(self, token: Token) -> Iterator[Token]: | def handle_NL(self, token: Token) -> Iterator[Token]: | ||||
... | ... | ||||
def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||||
... | |||||
@property | |||||
def always_accept(self) -> Tuple[str]: | |||||
... | |||||
@property | @property | ||||
@abstractmethod | @abstractmethod | ||||
def NL_type(self) -> str: | def NL_type(self) -> str: | ||||
@@ -65,7 +65,7 @@ class Lark: | |||||
grammar: Union[Grammar, str, IO[str]], | grammar: Union[Grammar, str, IO[str]], | ||||
*, | *, | ||||
start: Union[None, str, List[str]] = "start", | start: Union[None, str, List[str]] = "start", | ||||
parser: Literal["earley", "lalr", "cyk"] = "auto", | |||||
parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", | |||||
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | ||||
transformer: Optional[Transformer] = None, | transformer: Optional[Transformer] = None, | ||||
postlex: Optional[PostLex] = None, | postlex: Optional[PostLex] = None, | ||||
@@ -1,8 +1,8 @@ | |||||
from typing import List, Tuple, Union, Callable, Dict, Optional | from typing import List, Tuple, Union, Callable, Dict, Optional | ||||
from lark import Tree | |||||
from lark.grammar import RuleOptions | |||||
from lark.exceptions import UnexpectedInput | |||||
from .tree import Tree | |||||
from .grammar import RuleOptions | |||||
from .exceptions import UnexpectedInput | |||||
class Grammar: | class Grammar: | ||||
@@ -11,8 +11,7 @@ from .lexer import TerminalDef | |||||
class WriteTokensTransformer(Transformer_InPlace): | class WriteTokensTransformer(Transformer_InPlace): | ||||
def __init__(self, tokens: Dict[str, TerminalDef], Dict[str, Callable[[Symbol], str]] = ...): | |||||
... | |||||
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... | |||||
class MatchTree(Tree): | class MatchTree(Tree): | ||||
@@ -0,0 +1,51 @@ | |||||
""" | |||||
Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree | |||||
""" | |||||
import inspect, re | |||||
from lark import Transformer, v_args | |||||
class Ast(object): | |||||
"""Abstract class | |||||
Subclasses will be collected by `create_transformer()` | |||||
""" | |||||
pass | |||||
class AsList(object): | |||||
"""Abstract class | |||||
Subclasses will be instanciated with the parse results as a single list, instead of as arguments. | |||||
""" | |||||
def camel_to_snake(name): | |||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower() | |||||
def _call(func, _data, children, _meta): | |||||
return func(*children) | |||||
inline = v_args(wrapper=_call) | |||||
def create_transformer(ast_module, transformer=None): | |||||
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | |||||
For each class, we create a corresponding rule in the transformer, with a matching name. | |||||
CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block". | |||||
Parameters: | |||||
ast_module - A Python module containing all the subclasses of `ast_utils.Ast` | |||||
Classes starting with an underscore (`_`) will be skipped. | |||||
transformer (Optional[Transformer]) - An initial transformer. Its attributes may be overwritten. | |||||
""" | |||||
t = transformer or Transformer() | |||||
for name, obj in inspect.getmembers(ast_module): | |||||
if not name.startswith('_') and inspect.isclass(obj): | |||||
if issubclass(obj, Ast): | |||||
if not issubclass(obj, AsList): | |||||
obj = inline(obj).__get__(t) | |||||
setattr(t, camel_to_snake(name), obj) | |||||
return t |
@@ -1,13 +1,14 @@ | |||||
"Provides Indentation services for languages with indentation similar to Python" | "Provides Indentation services for languages with indentation similar to Python" | ||||
from .exceptions import LarkError | from .exceptions import LarkError | ||||
from .lark import PostLex | |||||
from .lexer import Token | from .lexer import Token | ||||
###{standalone | ###{standalone | ||||
class DedentError(LarkError): | class DedentError(LarkError): | ||||
pass | pass | ||||
class Indenter: | |||||
class Indenter(PostLex): | |||||
def __init__(self): | def __init__(self): | ||||
self.paren_level = None | self.paren_level = None | ||||
self.indent_level = None | self.indent_level = None | ||||
@@ -1,4 +1,6 @@ | |||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
from lark.exceptions import ConfigurationError, assert_config | from lark.exceptions import ConfigurationError, assert_config | ||||
import sys, os, pickle, hashlib | import sys, os, pickle, hashlib | ||||
@@ -6,7 +8,7 @@ from io import open | |||||
import tempfile | import tempfile | ||||
from warnings import warn | from warnings import warn | ||||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | |||||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod | |||||
from .load_grammar import load_grammar, FromPackageLoader, Grammar | from .load_grammar import load_grammar, FromPackageLoader, Grammar | ||||
from .tree import Tree | from .tree import Tree | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
@@ -191,6 +193,14 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | ||||
class PostLex(ABC): | |||||
@abstractmethod | |||||
def process(self, stream): | |||||
return stream | |||||
always_accept = () | |||||
class Lark(Serialize): | class Lark(Serialize): | ||||
"""Main interface for the library. | """Main interface for the library. | ||||
@@ -288,7 +298,12 @@ class Lark(Serialize): | |||||
if self.options.parser == 'lalr': | if self.options.parser == 'lalr': | ||||
self.options.lexer = 'contextual' | self.options.lexer = 'contextual' | ||||
elif self.options.parser == 'earley': | elif self.options.parser == 'earley': | ||||
self.options.lexer = 'dynamic' | |||||
if self.options.postlex is not None: | |||||
logger.info("postlex can't be used with the dynamic lexer, so we use standard instead. " | |||||
"Consider using lalr with contextual instead of earley") | |||||
self.options.lexer = 'standard' | |||||
else: | |||||
self.options.lexer = 'dynamic' | |||||
elif self.options.parser == 'cyk': | elif self.options.parser == 'cyk': | ||||
self.options.lexer = 'standard' | self.options.lexer = 'standard' | ||||
else: | else: | ||||
@@ -298,6 +313,8 @@ class Lark(Serialize): | |||||
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance | assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance | ||||
else: | else: | ||||
assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) | assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) | ||||
if self.options.postlex is not None and 'dynamic' in lexer: | |||||
raise ConfigurationError("Can't use postlex with a dynamic lexer. Use standard or contextual instead") | |||||
if self.options.ambiguity == 'auto': | if self.options.ambiguity == 'auto': | ||||
if self.options.parser == 'earley': | if self.options.parser == 'earley': | ||||
@@ -72,6 +72,7 @@ class ParsingFrontend(Serialize): | |||||
lexer_type = lexer_conf.lexer_type | lexer_type = lexer_conf.lexer_type | ||||
self.skip_lexer = False | self.skip_lexer = False | ||||
if lexer_type in ('dynamic', 'dynamic_complete'): | if lexer_type in ('dynamic', 'dynamic_complete'): | ||||
assert lexer_conf.postlex is None | |||||
self.skip_lexer = True | self.skip_lexer = True | ||||
return | return | ||||
@@ -56,7 +56,6 @@ EXTRACT_STANDALONE_FILES = [ | |||||
'utils.py', | 'utils.py', | ||||
'tree.py', | 'tree.py', | ||||
'visitors.py', | 'visitors.py', | ||||
'indenter.py', | |||||
'grammar.py', | 'grammar.py', | ||||
'lexer.py', | 'lexer.py', | ||||
'common.py', | 'common.py', | ||||
@@ -65,6 +64,7 @@ EXTRACT_STANDALONE_FILES = [ | |||||
'parsers/lalr_analysis.py', | 'parsers/lalr_analysis.py', | ||||
'parser_frontends.py', | 'parser_frontends.py', | ||||
'lark.py', | 'lark.py', | ||||
'indenter.py', | |||||
] | ] | ||||
def extract_sections(lines): | def extract_sections(lines): | ||||
@@ -12,6 +12,15 @@ logger.addHandler(logging.StreamHandler()) | |||||
# By default, we should not output any log messages | # By default, we should not output any log messages | ||||
logger.setLevel(logging.CRITICAL) | logger.setLevel(logging.CRITICAL) | ||||
if sys.version_info[0]>2: | |||||
from abc import ABC, abstractmethod | |||||
else: | |||||
from abc import ABCMeta, abstractmethod | |||||
class ABC(object): # Provide Python27 compatibility | |||||
__slots__ = () | |||||
__metclass__ = ABCMeta | |||||
Py36 = (sys.version_info[:2] >= (3, 6)) | Py36 = (sys.version_info[:2] >= (3, 6)) | ||||
NO_VALUE = object() | NO_VALUE = object() | ||||