Преглед изворни кода

Merge remote-tracking branch 'origin/master'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh пре 3 година
родитељ
комит
ed6fc3cd1f
13 измењених фајлова са 218 додато и 19 уклоњено
  1. +105
    -0
      examples/advanced/create_ast.py
  2. +17
    -0
      lark-stubs/ast_utils.pyi
  3. +6
    -1
      lark-stubs/grammar.pyi
  4. +2
    -8
      lark-stubs/indenter.pyi
  5. +1
    -1
      lark-stubs/lark.pyi
  6. +3
    -3
      lark-stubs/load_grammar.pyi
  7. +1
    -2
      lark-stubs/reconstruct.pyi
  8. +51
    -0
      lark/ast_utils.py
  9. +2
    -1
      lark/indenter.py
  10. +19
    -2
      lark/lark.py
  11. +1
    -0
      lark/parser_frontends.py
  12. +1
    -1
      lark/tools/standalone.py
  13. +9
    -0
      lark/utils.py

+ 105
- 0
examples/advanced/create_ast.py Прегледај датотеку

@@ -0,0 +1,105 @@
"""
This example demonstrates how to transform a parse-tree into an AST using `lark.ast_utils`.

This example only works with Python 3.
"""

import sys
from typing import List
from dataclasses import dataclass

from lark import Lark, ast_utils, Transformer, v_args

this_module = sys.modules[__name__]


#
# Define AST
#
class _Ast(ast_utils.Ast):
pass

class _Statement(_Ast):
pass

@dataclass
class Value(_Ast):
value: object

@dataclass
class Name(_Ast):
name: str

@dataclass
class CodeBlock(_Ast, ast_utils.AsList):
statements: List[_Statement]

@dataclass
class If(_Statement):
cond: Value
then: CodeBlock

@dataclass
class SetVar(_Statement):
name: str
value: Value

@dataclass
class Print(_Statement):
value: Value


class ToAst(Transformer):
def STRING(self, s):
# Remove quotation marks
return s[1:-1]

def DEC_NUMBER(self, n):
return int(n)

@v_args(inline=True)
def start(self, x):
return x

#
# Define Parser
#

parser = Lark("""
start: code_block

code_block: statement+

?statement: if | set_var | print

if: "if" value "{" code_block "}"
set_var: NAME "=" value ";"
print: "print" value ";"

value: name | STRING | DEC_NUMBER
name: NAME

%import python (NAME, STRING, DEC_NUMBER)
%import common.WS
%ignore WS
""",
parser="lalr",
)

transformer = ast_utils.create_transformer(this_module, ToAst())

def parse(text):
return transformer.transform(parser.parse(text))

#
# Test
#

if __name__ == '__main__':
print(parse("""
a = 1;
if a {
print "a is 1";
a = 2;
}
"""))

+ 17
- 0
lark-stubs/ast_utils.pyi Прегледај датотеку

@@ -0,0 +1,17 @@
import types
from typing import Optional

from .visitors import Transformer

class Ast(object):
pass

class AsList(object):
pass


def create_transformer(
ast_module: types.ModuleType,
transformer: Optional[Transformer]=None
) -> Transformer:
...

+ 6
- 1
lark-stubs/grammar.pyi Прегледај датотеку

@@ -6,4 +6,9 @@ class RuleOptions:
expand1: bool
priority: int
template_source: Optional[str]
empty_indices: Tuple[bool, ...]
empty_indices: Tuple[bool, ...]


class Symbol:
name: str
is_term: bool

+ 2
- 8
lark-stubs/indenter.pyi Прегледај датотеку

@@ -3,9 +3,10 @@
from typing import Tuple, List, Iterator, Optional
from abc import ABC, abstractmethod
from .lexer import Token
from .lark import PostLex


class Indenter(ABC):
class Indenter(PostLex, ABC):
paren_level: Optional[int]
indent_level: Optional[List[int]]

@@ -15,13 +16,6 @@ class Indenter(ABC):
def handle_NL(self, token: Token) -> Iterator[Token]:
...

def process(self, stream: Iterator[Token]) -> Iterator[Token]:
...

@property
def always_accept(self) -> Tuple[str]:
...

@property
@abstractmethod
def NL_type(self) -> str:


+ 1
- 1
lark-stubs/lark.pyi Прегледај датотеку

@@ -65,7 +65,7 @@ class Lark:
grammar: Union[Grammar, str, IO[str]],
*,
start: Union[None, str, List[str]] = "start",
parser: Literal["earley", "lalr", "cyk"] = "auto",
parser: Literal["earley", "lalr", "cyk", "auto"] = "auto",
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto",
transformer: Optional[Transformer] = None,
postlex: Optional[PostLex] = None,


+ 3
- 3
lark-stubs/load_grammar.pyi Прегледај датотеку

@@ -1,8 +1,8 @@
from typing import List, Tuple, Union, Callable, Dict, Optional

from lark import Tree
from lark.grammar import RuleOptions
from lark.exceptions import UnexpectedInput
from .tree import Tree
from .grammar import RuleOptions
from .exceptions import UnexpectedInput


class Grammar:


+ 1
- 2
lark-stubs/reconstruct.pyi Прегледај датотеку

@@ -11,8 +11,7 @@ from .lexer import TerminalDef

class WriteTokensTransformer(Transformer_InPlace):

def __init__(self, tokens: Dict[str, TerminalDef], Dict[str, Callable[[Symbol], str]] = ...):
...
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ...


class MatchTree(Tree):


+ 51
- 0
lark/ast_utils.py Прегледај датотеку

@@ -0,0 +1,51 @@
"""
Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree
"""

import inspect, re

from lark import Transformer, v_args

class Ast(object):
"""Abstract class

Subclasses will be collected by `create_transformer()`
"""
pass

class AsList(object):
"""Abstract class

Subclasses will be instanciated with the parse results as a single list, instead of as arguments.
"""

def camel_to_snake(name):
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()

def _call(func, _data, children, _meta):
return func(*children)

inline = v_args(wrapper=_call)

def create_transformer(ast_module, transformer=None):
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST.

For each class, we create a corresponding rule in the transformer, with a matching name.
CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block".

Parameters:
ast_module - A Python module containing all the subclasses of `ast_utils.Ast`
Classes starting with an underscore (`_`) will be skipped.
transformer (Optional[Transformer]) - An initial transformer. Its attributes may be overwritten.
"""
t = transformer or Transformer()

for name, obj in inspect.getmembers(ast_module):
if not name.startswith('_') and inspect.isclass(obj):
if issubclass(obj, Ast):
if not issubclass(obj, AsList):
obj = inline(obj).__get__(t)

setattr(t, camel_to_snake(name), obj)

return t

+ 2
- 1
lark/indenter.py Прегледај датотеку

@@ -1,13 +1,14 @@
"Provides Indentation services for languages with indentation similar to Python"

from .exceptions import LarkError
from .lark import PostLex
from .lexer import Token

###{standalone
class DedentError(LarkError):
pass

class Indenter:
class Indenter(PostLex):
def __init__(self):
self.paren_level = None
self.indent_level = None


+ 19
- 2
lark/lark.py Прегледај датотеку

@@ -1,4 +1,6 @@
from __future__ import absolute_import


from lark.exceptions import ConfigurationError, assert_config

import sys, os, pickle, hashlib
@@ -6,7 +8,7 @@ from io import open
import tempfile
from warnings import warn

from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod
from .load_grammar import load_grammar, FromPackageLoader, Grammar
from .tree import Tree
from .common import LexerConf, ParserConf
@@ -191,6 +193,14 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')


class PostLex(ABC):
@abstractmethod
def process(self, stream):
return stream

always_accept = ()


class Lark(Serialize):
"""Main interface for the library.

@@ -288,7 +298,12 @@ class Lark(Serialize):
if self.options.parser == 'lalr':
self.options.lexer = 'contextual'
elif self.options.parser == 'earley':
self.options.lexer = 'dynamic'
if self.options.postlex is not None:
logger.info("postlex can't be used with the dynamic lexer, so we use standard instead. "
"Consider using lalr with contextual instead of earley")
self.options.lexer = 'standard'
else:
self.options.lexer = 'dynamic'
elif self.options.parser == 'cyk':
self.options.lexer = 'standard'
else:
@@ -298,6 +313,8 @@ class Lark(Serialize):
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance
else:
assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete'))
if self.options.postlex is not None and 'dynamic' in lexer:
raise ConfigurationError("Can't use postlex with a dynamic lexer. Use standard or contextual instead")

if self.options.ambiguity == 'auto':
if self.options.parser == 'earley':


+ 1
- 0
lark/parser_frontends.py Прегледај датотеку

@@ -72,6 +72,7 @@ class ParsingFrontend(Serialize):
lexer_type = lexer_conf.lexer_type
self.skip_lexer = False
if lexer_type in ('dynamic', 'dynamic_complete'):
assert lexer_conf.postlex is None
self.skip_lexer = True
return



+ 1
- 1
lark/tools/standalone.py Прегледај датотеку

@@ -56,7 +56,6 @@ EXTRACT_STANDALONE_FILES = [
'utils.py',
'tree.py',
'visitors.py',
'indenter.py',
'grammar.py',
'lexer.py',
'common.py',
@@ -65,6 +64,7 @@ EXTRACT_STANDALONE_FILES = [
'parsers/lalr_analysis.py',
'parser_frontends.py',
'lark.py',
'indenter.py',
]

def extract_sections(lines):


+ 9
- 0
lark/utils.py Прегледај датотеку

@@ -12,6 +12,15 @@ logger.addHandler(logging.StreamHandler())
# By default, we should not output any log messages
logger.setLevel(logging.CRITICAL)

if sys.version_info[0]>2:
from abc import ABC, abstractmethod
else:
from abc import ABCMeta, abstractmethod
class ABC(object): # Provide Python27 compatibility
__slots__ = ()
__metclass__ = ABCMeta


Py36 = (sys.version_info[:2] >= (3, 6))

NO_VALUE = object()


Loading…
Откажи
Сачувај