diff --git a/README.md b/README.md index 2d69420..d7b88e2 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Lark is great at handling ambiguity. Here is the result of parsing the phrase "f ![fruitflies.png](examples/fruitflies.png) -[Read the code here](https://github.com/lark-parser/lark/tree/master/examples/fruitflies.py), and [more examples here](https://github.com/lark-parser/lark/tree/master/examples) +[Read the code here](https://github.com/lark-parser/lark/tree/master/examples/fruitflies.py), and see [more examples here](https://lark-parser.readthedocs.io/en/latest/examples/index.html). ## List of main features diff --git a/examples/advanced/conf_earley.py b/examples/advanced/conf_earley.py index b21c1ac..348ce3b 100644 --- a/examples/advanced/conf_earley.py +++ b/examples/advanced/conf_earley.py @@ -20,9 +20,10 @@ parser = Lark(r""" start: _NL? section+ section: "[" NAME "]" _NL item+ item: NAME "=" VALUE? _NL + + NAME: /\w/+ VALUE: /./+ - %import common.CNAME -> NAME %import common.NEWLINE -> _NL %import common.WS_INLINE %ignore WS_INLINE diff --git a/examples/advanced/conf_lalr.py b/examples/advanced/conf_lalr.py index 5ffd1d2..b0e164c 100644 --- a/examples/advanced/conf_lalr.py +++ b/examples/advanced/conf_lalr.py @@ -2,16 +2,18 @@ LALR’s contextual lexer ======================= -Demonstrates the power of LALR’s contextual lexer on a toy configuration language. +This example demonstrates the power of LALR's contextual lexer, +by parsing a toy configuration language. -The tokens NAME and VALUE match the same input. A standard lexer would arbitrarily -choose one over the other, which would lead to a (confusing) parse error. +The terminals `NAME` and `VALUE` overlap. They can match the same input. +A standard lexer would arbitrarily choose one over the other, based on priority, +which would lead to a (confusing) parse error. However, due to the unambiguous structure of the grammar, Lark's LALR(1) algorithm knows which one of them to expect at each point during the parse. The lexer then only matches the tokens that the parser expects. The result is a correct parse, something that is impossible with a regular lexer. -Another approach is to discard a lexer altogether and use the Earley algorithm. +Another approach is to use the Earley algorithm. It will handle more cases than the contextual lexer, but at the cost of performance. See examples/conf_earley.py for an example of that approach. """ @@ -21,9 +23,10 @@ parser = Lark(r""" start: _NL? section+ section: "[" NAME "]" _NL item+ item: NAME "=" VALUE? _NL + + NAME: /\w/+ VALUE: /./+ - %import common.CNAME -> NAME %import common.NEWLINE -> _NL %import common.WS_INLINE %ignore WS_INLINE diff --git a/examples/advanced/python_parser.py b/examples/advanced/python_parser.py index 7fbff2e..a37bade 100644 --- a/examples/advanced/python_parser.py +++ b/examples/advanced/python_parser.py @@ -57,12 +57,16 @@ def test_python_lib(): start = time.time() files = glob.glob(path+'/*.py') + total_kb = 0 for f in files: - print( f ) - chosen_parser.parse(_read(os.path.join(path, f)) + '\n') + r = _read(os.path.join(path, f)) + kb = len(r) / 1024 + print( '%s -\t%.1f kb' % (f, kb)) + chosen_parser.parse(r + '\n') + total_kb += kb end = time.time() - print( "test_python_lib (%d files), time: %s secs"%(len(files), end-start) ) + print( "test_python_lib (%d files, %.1f kb), time: %.2f secs"%(len(files), total_kb, end-start) ) def test_earley_equals_lalr(): path = _get_lib_path()