Browse Source

Merge branch 'master' into bytes-support

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
MegaIng 4 years ago
committed by GitHub
parent
commit
01094af15b
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 441 additions and 204 deletions
  1. +1
    -1
      docs/recipes.md
  2. +23
    -0
      docs/visitors.md
  3. +362
    -181
      examples/standalone/json_parser.py
  4. +1
    -0
      lark-stubs/lark.pyi
  5. +1
    -1
      lark-stubs/reconstruct.pyi
  6. +20
    -16
      lark/load_grammar.py
  7. +8
    -3
      lark/parser_frontends.py
  8. +8
    -1
      lark/reconstruct.py
  9. +1
    -1
      lark/visitors.py
  10. +16
    -0
      tests/test_cache.py

+ 1
- 1
docs/recipes.md View File

@@ -139,7 +139,7 @@ If your tree nodes aren't unique (if there is a shared Tree instance), the asser

```python
class Parent(Visitor):
def visit(self, tree):
def __default__(self, tree):
for subtree in tree.children:
if isinstance(subtree, Tree):
assert not hasattr(subtree, 'parent')


+ 23
- 0
docs/visitors.md View File

@@ -28,6 +28,29 @@ There are two classes that implement the visitor interface:

* Visitor_Recursive - Visit every node using recursion. Slightly faster.

### Interpreter

The interpreter walks the tree starting at the root (top-down).

For each node, it calls the method corresponding with its `data` attribute.

Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches.
The user has to explicitly call `visit`, `visit_children`, or use the `@visit_children_decor`.
This allows the user to implement branching and loops.

**Example:**
```python
class IncreaseSomeOfTheNumbers(Interpreter):
def number(self, tree):
tree.children[0] += 1

def skip(self, tree):
# skip this subtree. don't change any number node inside it.
pass

IncreaseSomeOfTheNumbers().visit(parse_tree)
```

### Transformers

Transformers visit each node of the tree, and run the appropriate method on it according to the node's data.


+ 362
- 181
examples/standalone/json_parser.py
File diff suppressed because it is too large
View File


+ 1
- 0
lark-stubs/lark.pyi View File

@@ -57,6 +57,7 @@ class Lark:
propagate_positions: bool = False,
maybe_placeholders: bool = False,
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None,
cache: Union[bool, str] = False,
g_regex_flags: int = ...,
use_bytes: bool = False,
):


+ 1
- 1
lark-stubs/reconstruct.pyi View File

@@ -30,7 +30,7 @@ class MakeMatchTree:

class Reconstructor:

def __init__(self, parser: Lark):
def __init__(self, parser: Lark, term_subs: Dict[str, str] = ...):
...

def reconstruct(self, tree: Tree) -> str:


+ 20
- 16
lark/load_grammar.py View File

@@ -432,6 +432,20 @@ class PrepareLiterals(Transformer_InPlace):
return ST('pattern', [PatternRE(regexp)])


def _make_joined_pattern(regexp, flags_set):
# In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope
# of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags`
# However, for prior Python versions, we still need to use global flags, so we have to make sure
# that there are no flag collisions when we merge several terminals.
flags = ()
if not Py36:
if len(flags_set) > 1:
raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!")
elif len(flags_set) == 1:
flags ,= flags_set

return PatternRE(regexp, flags)

class TerminalTreeToPattern(Transformer):
def pattern(self, ps):
p ,= ps
@@ -441,26 +455,16 @@ class TerminalTreeToPattern(Transformer):
assert items
if len(items) == 1:
return items[0]
# In Python 3.6, a new syntax for flags was introduced. We are already using it in `lexer.Pattern._get_flags`
# It allows us to activate flags just in a specific part, like in this case for a specific terminal.
# The `to_regexp` method already does this, so we don't have to continue to pass around the flags.
if not Py36:
if len({i.flags for i in items}) > 1:
raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!")
return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags if items else ())
else:
return PatternRE(''.join(i.to_regexp() for i in items), ())

pattern = ''.join(i.to_regexp() for i in items)
return _make_joined_pattern(pattern, {i.flags for i in items})

def expansions(self, exps):
if len(exps) == 1:
return exps[0]
# See `expansion`
if not Py36:
if len({i.flags for i in exps}) > 1:
raise GrammarError("Lark doesn't support joining terminals with conflicting flags!")
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags)
else:
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), ())

pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
return _make_joined_pattern(pattern, {i.flags for i in exps})

def expr(self, args):
inner, op = args[:2]


+ 8
- 3
lark/parser_frontends.py View File

@@ -1,5 +1,3 @@
from functools import partial

from .utils import get_regexp_width, Serialize
from .parsers.grammar_analysis import GrammarAnalyzer
from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token
@@ -20,7 +18,14 @@ def get_frontend(parser, lexer):
elif lexer == 'contextual':
return LALR_ContextualLexer
elif issubclass(lexer, Lexer):
return partial(LALR_CustomLexer, lexer)
class LALR_CustomLexerWrapper(LALR_CustomLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
super(LALR_CustomLexerWrapper, self).__init__(
lexer, lexer_conf, parser_conf, options=options)
def init_lexer(self):
self.lexer = lexer(self.lexer_conf)

return LALR_CustomLexerWrapper
else:
raise ValueError('Unknown lexer: %s' % lexer)
elif parser=='earley':


+ 8
- 1
lark/reconstruct.py View File

@@ -87,9 +87,16 @@ def best_from_group(seq, group_key, cmp_key):
return list(d.values())

class Reconstructor:
def __init__(self, parser, term_subs={}):
"""
A Reconstructor that will, given a full parse Tree, generate source code.
Pass `term_subs`, a dictionary of [Terminal name as str] to [output text as str]
to say what discarded Terminals should be written as.
"""
def __init__(self, parser, term_subs=None):
# XXX TODO calling compile twice returns different results!
assert parser.options.maybe_placeholders == False
if term_subs is None:
term_subs = {}
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)

self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs)


+ 1
- 1
lark/visitors.py View File

@@ -267,7 +267,7 @@ class Interpreter(_Decoratable):
Calls its methods (provided by user via inheritance) according to tree.data

Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches.
The user has to explicitly call visit_children, or use the @visit_children_decor
The user has to explicitly call visit, visit_children, or use the @visit_children_decor
"""

def visit(self, tree):


+ 16
- 0
tests/test_cache.py View File

@@ -4,6 +4,7 @@ import sys
from unittest import TestCase, main

from lark import Lark, Tree
from lark.lexer import Lexer, Token
import lark.lark as lark_module

try:
@@ -38,6 +39,15 @@ class MockFS:
return name in self.files


class CustomLexer(Lexer):
def __init__(self, lexer_conf):
pass

def lex(self, data):
for obj in data:
yield Token('A', obj)


class TestCache(TestCase):
def setUp(self):
pass
@@ -70,6 +80,12 @@ class TestCache(TestCase):
parser = Lark(g, parser='lalr', cache=True)
assert parser.parse('a') == Tree('start', [])

# Test with custom lexer
mock_fs.files = {}
parser = Lark(g, parser='lalr', lexer=CustomLexer, cache=True)
parser = Lark(g, parser='lalr', lexer=CustomLexer, cache=True)
assert len(mock_fs.files) == 1
assert parser.parse('a') == Tree('start', [])
finally:
lark_module.FS = fs



Loading…
Cancel
Save