Browse Source

propagate_positions & maybe_placeholders are now true by default, updated docs, tests & examples accordingly (Issue #449, #451)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Shinan 5 years ago
parent
commit
84f08a452f
8 changed files with 24 additions and 27 deletions
  1. +13
    -11
      docs/classes.md
  2. +2
    -2
      docs/grammar.md
  3. +1
    -1
      examples/custom_lexer.py
  4. +2
    -8
      examples/reconstruct_json.py
  5. +2
    -2
      lark/lark.py
  6. +1
    -0
      lark/reconstruct.py
  7. +1
    -1
      tests/test_parser.py
  8. +2
    -2
      tests/test_reconstructor.py

+ 13
- 11
docs/classes.md View File

@@ -12,29 +12,31 @@ The Lark class is the main interface for the library. It's mostly a thin wrapper

The Lark class accepts a grammar string or file object, and keyword options:

* start - The symbol in the grammar that begins the parse (Default: `"start"`)
* **start** - A list of the rules in the grammar that begin the parse (Default: `["start"]`)

* parser - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
* **parser** - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)

* lexer - Overrides default lexer.
* **lexer** - Overrides default lexer, depending on parser.

* transformer - Applies the transformer instead of building a parse tree (only allowed with parser="lalr")
* **transformer** - Applies the provided transformer instead of building a parse tree (only allowed with parser="lalr")

* postlex - Lexer post-processing (Default: None. only works when lexer is "standard" or "contextual")
* **postlex** - Lexer post-processing (Default: `None`. only works when lexer is "standard" or "contextual")

* ambiguity (only relevant for earley and cyk)
* **ambiguity** (only relevant for earley and cyk)

* "explicit" - Return all derivations inside an "_ambig" data node.

* "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default)

* debug - Display warnings (such as Shift-Reduce warnings for LALR)
* **debug** - Display warnings (such as Shift-Reduce warnings for LALR)

* keep_all_tokens - Don't throw away any terminals from the tree (Default=False)
* **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`)

* propagate_positions - Propagate line/column count to tree nodes (default=False)
* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`)

* lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
* **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)

* **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.

#### parse(self, text)

@@ -50,7 +52,7 @@ The main tree class

* `data` - The name of the rule or alias
* `children` - List of matched sub-rules and terminals
* `meta` - Line & Column numbers, if using `propagate_positions`
* `meta` - Line & Column numbers (unless `propagate_positions` is disabled)

#### \_\_init\_\_(self, data, children)



+ 2
- 2
docs/grammar.md View File

@@ -147,7 +147,7 @@ Each item is one of:
* `TERMINAL`
* `"string literal"` or `/regexp literal/`
* `(item item ..)` - Group items
* `[item item ..]` - Maybe. Same as `(item item ..)?`
* `[item item ..]` - Maybe. Same as `(item item ..)?`, but generates `None` if there is no match
* `item?` - Zero or one instances of item ("maybe")
* `item*` - Zero or more instances of item
* `item+` - One or more instances of item
@@ -157,7 +157,7 @@ Each item is one of:
**Examples:**
```perl
hello_world: "hello" "world"
mul: [mul "*"] number //# Left-recursion is allowed!
mul: (mul "*")? number //# Left-recursion is allowed and encouraged!
expr: expr operator expr
| value //# Multi-line, belongs to expr



+ 1
- 1
examples/custom_lexer.py View File

@@ -29,7 +29,7 @@ parser = Lark("""
data_item: STR INT*

%declare STR INT
""", parser='lalr', lexer=TypeLexer)
""", parser='lalr', lexer=TypeLexer, propagate_positions=False)


class ParseToDict(Transformer):


+ 2
- 8
examples/reconstruct_json.py View File

@@ -25,15 +25,9 @@ test_json = '''

def test_earley():

json_parser = Lark(json_grammar)
json_parser = Lark(json_grammar, maybe_placeholders=False)
tree = json_parser.parse(test_json)

# print ('@@', tree.pretty())
# for x in tree.find_data('true'):
# x.data = 'false'
# # x.children[0].value = '"HAHA"'


new_json = Reconstructor(json_parser).reconstruct(tree)
print (new_json)
print (json.loads(new_json) == json.loads(test_json))
@@ -41,7 +35,7 @@ def test_earley():

def test_lalr():

json_parser = Lark(json_grammar, parser='lalr')
json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
tree = json_parser.parse(test_json)

new_json = Reconstructor(json_parser).reconstruct(tree)


+ 2
- 2
lark/lark.py View File

@@ -66,9 +66,9 @@ class LarkOptions(Serialize):
'profile': False,
'priority': 'auto',
'ambiguity': 'auto',
'propagate_positions': False,
'propagate_positions': True,
'lexer_callbacks': {},
'maybe_placeholders': False,
'maybe_placeholders': True,
'edit_terminals': None,
}



+ 1
- 0
lark/reconstruct.py View File

@@ -69,6 +69,7 @@ class MakeMatchTree:
class Reconstructor:
def __init__(self, parser):
# XXX TODO calling compile twice returns different results!
assert parser.options.maybe_placeholders == False
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)

self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})


+ 1
- 1
tests/test_parser.py View File

@@ -963,7 +963,7 @@ def _make_parser_test(LEXER, PARSER):

@unittest.skipIf(PARSER == 'cyk', "No empty rules")
def test_twice_empty(self):
g = """!start: [["A"]]
g = """!start: ("A"?)?
"""
l = _Lark(g)
tree = l.parse('A')


+ 2
- 2
tests/test_reconstructor.py View File

@@ -16,7 +16,7 @@ def _remove_ws(s):
class TestReconstructor(TestCase):

def assert_reconstruct(self, grammar, code):
parser = Lark(grammar, parser='lalr')
parser = Lark(grammar, parser='lalr', maybe_placeholders=False)
tree = parser.parse(code)
new = Reconstructor(parser).reconstruct(tree)
self.assertEqual(_remove_ws(code), _remove_ws(new))
@@ -105,7 +105,7 @@ class TestReconstructor(TestCase):
%ignore WS
"""

json_parser = Lark(json_grammar, parser='lalr')
json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
tree = json_parser.parse(test_json)

new_json = Reconstructor(json_parser).reconstruct(tree)


Loading…
Cancel
Save