diff --git a/.gitignore b/.gitignore index 62b900c..b30399e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ tags .mypy_cache /dist /build -docs/_build \ No newline at end of file +docs/_build +docs/examples \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 887eeb2..5f874e9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -37,6 +37,7 @@ extensions = [ 'sphinx.ext.napoleon', 'sphinx.ext.coverage', 'recommonmark', + 'sphinx_gallery.gen_gallery' ] # Add any paths that contain templates here, relative to this directory. @@ -175,5 +176,9 @@ texinfo_documents = [ 'Miscellaneous'), ] +# -- Sphinx gallery config ------------------------------------------- - +sphinx_gallery_conf = { + 'examples_dirs': ['../examples'], + 'gallery_dirs': ['examples'], +} \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index ba2c241..c3163a1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -24,6 +24,7 @@ Welcome to Lark's documentation! how_to_use how_to_develop recipes + examples/index .. toctree:: diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..d75b0aa --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +# https://docs.readthedocs.io/en/stable/guides/specifying-dependencies.html#specifying-a-requirements-file +sphinx-gallery \ No newline at end of file diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index 8053ebd..0000000 --- a/examples/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Examples for Lark - -#### How to run the examples - -After cloning the repo, open the terminal into the root directory of the project, and run the following: - -```bash -[lark]$ python -m examples. -``` - -For example, the following will parse all the Python files in the standard library of your local installation: - -```bash -[lark]$ python -m examples.python_parser -``` - -### Beginners - -- [calc.py](calc.py) - A simple example of a REPL calculator -- [json\_parser.py](json_parser.py) - A simple JSON parser (comes with a tutorial, see docs) -- [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) -- [fruitflies.py](fruitflies.py) - A demonstration of ambiguity -- [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. -- [lark\_grammar.py](lark_grammar.py) + [lark.lark](lark.lark) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) - -### Advanced - -- [error\_reporting\_lalr.py](error_reporting_lalr.py) - A demonstration of example-driven error reporting with the LALR parser -- [python\_parser.py](python_parser.py) - A fully-working Python 2 & 3 parser (but not production ready yet!) -- [python\_bytecode.py](python_bytecode.py) - A toy example showing how to compile Python directly to bytecode -- [conf\_lalr.py](conf_lalr.py) - Demonstrates the power of LALR's contextual lexer on a toy configuration language -- [conf\_earley.py](conf_earley.py) - Demonstrates the power of Earley's dynamic lexer on a toy configuration language -- [custom\_lexer.py](custom_lexer.py) - Demonstrates using a custom lexer to parse a non-textual stream of data -- [reconstruct\_json.py](reconstruct_json.py) - Demonstrates the experimental text-reconstruction feature diff --git a/examples/README.rst b/examples/README.rst new file mode 100644 index 0000000..f2b0125 --- /dev/null +++ b/examples/README.rst @@ -0,0 +1,21 @@ +Examples for Lark +================= + +**How to run the examples**: + +After cloning the repo, open the terminal into the root directory of the +project, and run the following: + +.. code:: bash + + [lark]$ python -m examples. + +For example, the following will parse all the Python files in the +standard library of your local installation: + +.. code:: bash + + [lark]$ python -m examples.python_parser + +Beginner Examples +~~~~~~~~~~~~~~~~~ diff --git a/examples/advanced/README.rst b/examples/advanced/README.rst new file mode 100644 index 0000000..9605486 --- /dev/null +++ b/examples/advanced/README.rst @@ -0,0 +1,2 @@ +Advanced Examples +~~~~~~~~~~~~~~~~~ diff --git a/examples/advanced/conf_earley.py b/examples/advanced/conf_earley.py new file mode 100644 index 0000000..b21c1ac --- /dev/null +++ b/examples/advanced/conf_earley.py @@ -0,0 +1,44 @@ +""" +Earley’s dynamic lexer +====================== + +Demonstrates the power of Earley’s dynamic lexer on a toy configuration language + +Using a lexer for configuration files is tricky, because values don't +have to be surrounded by delimiters. Using a standard lexer for this just won't work. + +In this example we use a dynamic lexer and let the Earley parser resolve the ambiguity. + +Another approach is to use the contextual lexer with LALR. It is less powerful than Earley, +but it can handle some ambiguity when lexing and it's much faster. +See examples/conf_lalr.py for an example of that approach. + +""" +from lark import Lark + +parser = Lark(r""" + start: _NL? section+ + section: "[" NAME "]" _NL item+ + item: NAME "=" VALUE? _NL + VALUE: /./+ + + %import common.CNAME -> NAME + %import common.NEWLINE -> _NL + %import common.WS_INLINE + %ignore WS_INLINE + """, parser="earley") + +def test(): + sample_conf = """ +[bla] + +a=Hello +this="that",4 +empty= +""" + + r = parser.parse(sample_conf) + print (r.pretty()) + +if __name__ == '__main__': + test() diff --git a/examples/advanced/conf_lalr.py b/examples/advanced/conf_lalr.py new file mode 100644 index 0000000..5ffd1d2 --- /dev/null +++ b/examples/advanced/conf_lalr.py @@ -0,0 +1,40 @@ +""" +LALR’s contextual lexer +======================= + +Demonstrates the power of LALR’s contextual lexer on a toy configuration language. + +The tokens NAME and VALUE match the same input. A standard lexer would arbitrarily +choose one over the other, which would lead to a (confusing) parse error. +However, due to the unambiguous structure of the grammar, Lark's LALR(1) algorithm knows +which one of them to expect at each point during the parse. +The lexer then only matches the tokens that the parser expects. +The result is a correct parse, something that is impossible with a regular lexer. + +Another approach is to discard a lexer altogether and use the Earley algorithm. +It will handle more cases than the contextual lexer, but at the cost of performance. +See examples/conf_earley.py for an example of that approach. +""" +from lark import Lark + +parser = Lark(r""" + start: _NL? section+ + section: "[" NAME "]" _NL item+ + item: NAME "=" VALUE? _NL + VALUE: /./+ + + %import common.CNAME -> NAME + %import common.NEWLINE -> _NL + %import common.WS_INLINE + %ignore WS_INLINE + """, parser="lalr") + + +sample_conf = """ +[bla] +a=Hello +this="that",4 +empty= +""" + +print(parser.parse(sample_conf).pretty()) diff --git a/examples/custom_lexer.py b/examples/advanced/custom_lexer.py similarity index 77% rename from examples/custom_lexer.py rename to examples/advanced/custom_lexer.py index 786bf4f..05a5eb5 100644 --- a/examples/custom_lexer.py +++ b/examples/advanced/custom_lexer.py @@ -1,13 +1,14 @@ -# -# This example demonstrates using Lark with a custom lexer. -# -# You can use a custom lexer to tokenize text when the lexers offered by Lark -# are too slow, or not flexible enough. -# -# You can also use it (as shown in this example) to tokenize streams of objects. -# +""" +Custom lexer +============ +Demonstrates using a custom lexer to parse a non-textual stream of data +You can use a custom lexer to tokenize text when the lexers offered by Lark +are too slow, or not flexible enough. + +You can also use it (as shown in this example) to tokenize streams of objects. +""" from lark import Lark, Transformer, v_args from lark.lexer import Lexer, Token diff --git a/examples/error_puppet.py b/examples/advanced/error_puppet.py similarity index 60% rename from examples/error_puppet.py rename to examples/advanced/error_puppet.py index 87d69e1..d3fca9d 100644 --- a/examples/error_puppet.py +++ b/examples/advanced/error_puppet.py @@ -1,11 +1,14 @@ -# -# This example demonstrates error handling using a parsing puppet in LALR -# -# When the parser encounters an UnexpectedToken exception, it creates a -# parsing puppet with the current parse-state, and lets you control how -# to proceed step-by-step. When you've achieved the correct parse-state, -# you can resume the run by returning True. -# +""" +Error handling with parsing puppet +================================== + +This example demonstrates error handling using a parsing puppet in LALR + +When the parser encounters an UnexpectedToken exception, it creates a +parsing puppet with the current parse-state, and lets you control how +to proceed step-by-step. When you've achieved the correct parse-state, +you can resume the run by returning True. +""" from lark import UnexpectedToken, Token diff --git a/examples/error_reporting_lalr.py b/examples/advanced/error_reporting_lalr.py similarity index 93% rename from examples/error_reporting_lalr.py rename to examples/advanced/error_reporting_lalr.py index f038eda..deeeb5f 100644 --- a/examples/error_reporting_lalr.py +++ b/examples/advanced/error_reporting_lalr.py @@ -1,7 +1,10 @@ -# -# This demonstrates example-driven error reporting with the LALR parser -# +""" +Example Driver Error Reporting +============================== +A demonstration of example-driven error reporting with the LALR parser + +""" from lark import Lark, UnexpectedInput from .json_parser import json_grammar # Using the grammar from the json_parser example diff --git a/examples/python2.lark b/examples/advanced/python2.lark similarity index 100% rename from examples/python2.lark rename to examples/advanced/python2.lark diff --git a/examples/python3.lark b/examples/advanced/python3.lark similarity index 100% rename from examples/python3.lark rename to examples/advanced/python3.lark diff --git a/examples/python_bytecode.py b/examples/advanced/python_bytecode.py similarity index 85% rename from examples/python_bytecode.py rename to examples/advanced/python_bytecode.py index cbb8ccd..6165e82 100644 --- a/examples/python_bytecode.py +++ b/examples/advanced/python_bytecode.py @@ -1,12 +1,16 @@ -# -# This is a toy example that compiles Python directly to bytecode, without generating an AST. -# It currently only works for very very simple Python code. -# -# It requires the 'bytecode' library. You can get it using -# -# $ pip install bytecode -# +""" +Compile Python to Bytecode +========================== + +A toy example that compiles Python directly to bytecode, without generating an AST. +It currently only works for very very simple Python code. +It requires the 'bytecode' library. You can get it using +:: + + $ pip install bytecode + +""" from lark import Lark, Transformer, v_args from lark.indenter import Indenter diff --git a/examples/python_parser.py b/examples/advanced/python_parser.py similarity index 91% rename from examples/python_parser.py rename to examples/advanced/python_parser.py index 82bfcb9..5e4d664 100644 --- a/examples/python_parser.py +++ b/examples/advanced/python_parser.py @@ -1,7 +1,11 @@ -# -# This example demonstrates usage of the included Python grammars -# +""" +Real Python Parser +================== +A fully-working Python 2 & 3 parser (but not production ready yet!) + +This example demonstrates usage of the included Python grammars +""" import sys import os, os.path from io import open diff --git a/examples/qscintilla_json.py b/examples/advanced/qscintilla_json.py similarity index 97% rename from examples/qscintilla_json.py rename to examples/advanced/qscintilla_json.py index 287981c..b876d4c 100644 --- a/examples/qscintilla_json.py +++ b/examples/advanced/qscintilla_json.py @@ -1,10 +1,14 @@ -# -# This example shows how to write a syntax-highlighted editor with Qt and Lark -# -# Requirements: -# -# PyQt5==5.10.1 -# QScintilla==2.10.4 +""" +Syntax Highlighting +=================== + +This example shows how to write a syntax-highlighted editor with Qt and Lark + +Requirements: + + PyQt5==5.10.1 + QScintilla==2.10.4 +""" import sys import textwrap diff --git a/examples/reconstruct_json.py b/examples/advanced/reconstruct_json.py similarity index 71% rename from examples/reconstruct_json.py rename to examples/advanced/reconstruct_json.py index 59c58b0..4506c3a 100644 --- a/examples/reconstruct_json.py +++ b/examples/advanced/reconstruct_json.py @@ -1,9 +1,13 @@ -# -# This example demonstrates an experimental feature: Text reconstruction -# The Reconstructor takes a parse tree (already filtered from punctuation, of course), -# and reconstructs it into correct text, that can be parsed correctly. -# It can be useful for creating "hooks" to alter data before handing it to other parsers. You can also use it to generate samples from scratch. -# +""" +Reconstruct a JSON +================== + +Demonstrates the experimental text-reconstruction feature + +The Reconstructor takes a parse tree (already filtered from punctuation, of course), +and reconstructs it into correct text, that can be parsed correctly. +It can be useful for creating "hooks" to alter data before handing it to other parsers. You can also use it to generate samples from scratch. +""" import json diff --git a/examples/template_lark.lark b/examples/advanced/template_lark.lark similarity index 100% rename from examples/template_lark.lark rename to examples/advanced/template_lark.lark diff --git a/examples/templates.py b/examples/advanced/templates.py similarity index 73% rename from examples/templates.py rename to examples/advanced/templates.py index 2acc6eb..2f28d90 100644 --- a/examples/templates.py +++ b/examples/advanced/templates.py @@ -1,7 +1,10 @@ -# -# This example shows how to use Lark's templates to achieve cleaner grammars -# +""" +Templates +========= + +This example shows how to use Lark's templates to achieve cleaner grammars +"""" from lark import Lark grammar = r""" diff --git a/examples/calc.py b/examples/calc.py index c4470ef..cccee9e 100644 --- a/examples/calc.py +++ b/examples/calc.py @@ -1,7 +1,11 @@ -# -# This example shows how to write a basic calculator with variables. -# +""" +Basic calculator +================ + +A simple example of a REPL calculator +This example shows how to write a basic calculator with variables. +""" from lark import Lark, Transformer, v_args diff --git a/examples/conf_earley.py b/examples/conf_earley.py deleted file mode 100644 index 13b6c8d..0000000 --- a/examples/conf_earley.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# This example demonstrates parsing using the dynamic-lexer earley frontend -# -# Using a lexer for configuration files is tricky, because values don't -# have to be surrounded by delimiters. Using a standard lexer for this just won't work. -# -# In this example we use a dynamic lexer and let the Earley parser resolve the ambiguity. -# -# Another approach is to use the contextual lexer with LALR. It is less powerful than Earley, -# but it can handle some ambiguity when lexing and it's much faster. -# See examples/conf_lalr.py for an example of that approach. -# - - -from lark import Lark - -parser = Lark(r""" - start: _NL? section+ - section: "[" NAME "]" _NL item+ - item: NAME "=" VALUE? _NL - VALUE: /./+ - - %import common.CNAME -> NAME - %import common.NEWLINE -> _NL - %import common.WS_INLINE - %ignore WS_INLINE - """, parser="earley") - -def test(): - sample_conf = """ -[bla] - -a=Hello -this="that",4 -empty= -""" - - r = parser.parse(sample_conf) - print (r.pretty()) - -if __name__ == '__main__': - test() diff --git a/examples/conf_lalr.py b/examples/conf_lalr.py deleted file mode 100644 index 33d1dc0..0000000 --- a/examples/conf_lalr.py +++ /dev/null @@ -1,38 +0,0 @@ -# -# This example demonstrates the power of the contextual lexer, by parsing a config file. -# -# The tokens NAME and VALUE match the same input. A standard lexer would arbitrarily -# choose one over the other, which would lead to a (confusing) parse error. -# However, due to the unambiguous structure of the grammar, Lark's LALR(1) algorithm knows -# which one of them to expect at each point during the parse. -# The lexer then only matches the tokens that the parser expects. -# The result is a correct parse, something that is impossible with a regular lexer. -# -# Another approach is to discard a lexer altogether and use the Earley algorithm. -# It will handle more cases than the contextual lexer, but at the cost of performance. -# See examples/conf_earley.py for an example of that approach. -# - -from lark import Lark - -parser = Lark(r""" - start: _NL? section+ - section: "[" NAME "]" _NL item+ - item: NAME "=" VALUE? _NL - VALUE: /./+ - - %import common.CNAME -> NAME - %import common.NEWLINE -> _NL - %import common.WS_INLINE - %ignore WS_INLINE - """, parser="lalr") - - -sample_conf = """ -[bla] -a=Hello -this="that",4 -empty= -""" - -print(parser.parse(sample_conf).pretty()) diff --git a/examples/fruitflies.py b/examples/fruitflies.py index cb6b5cc..697b4ac 100644 --- a/examples/fruitflies.py +++ b/examples/fruitflies.py @@ -1,7 +1,12 @@ -# -# This example shows how to use get explicit ambiguity from Lark's Earley parser. -# +""" +Handling Ambiguity +================== +A demonstration of ambiguity + +This example shows how to use get explicit ambiguity from Lark's Earley parser. + +""" import sys from lark import Lark, tree diff --git a/examples/indented_tree.py b/examples/indented_tree.py index c31bb13..6cdaf37 100644 --- a/examples/indented_tree.py +++ b/examples/indented_tree.py @@ -1,13 +1,16 @@ -# -# This example demonstrates usage of the Indenter class. -# -# Since indentation is context-sensitive, a postlex stage is introduced to -# manufacture INDENT/DEDENT tokens. -# -# It is crucial for the indenter that the NL_type matches -# the spaces (and tabs) after the newline. -# +""" +Parsing Indentation +=================== + +A demonstration of parsing indentation (“whitespace significant” language) +and the usage of the Indenter class. +Since indentation is context-sensitive, a postlex stage is introduced to +manufacture INDENT/DEDENT tokens. + +It is crucial for the indenter that the NL_type matches +the spaces (and tabs) after the newline. +""" from lark import Lark from lark.indenter import Indenter diff --git a/examples/json_parser.py b/examples/json_parser.py index 7aa7d0f..c3573f3 100644 --- a/examples/json_parser.py +++ b/examples/json_parser.py @@ -1,10 +1,10 @@ -# -# This example shows how to write a basic JSON parser -# -# The code is short and clear, and outperforms every other parser (that's written in Python). -# For an explanation, check out the JSON parser tutorial at /docs/json_tutorial.md -# +""" +Simple JSON Parser +================== +The code is short and clear, and outperforms every other parser (that's written in Python). +For an explanation, check out the JSON parser tutorial at /docs/json_tutorial.md +""" import sys from lark import Lark, Transformer, v_args diff --git a/examples/lark_grammar.py b/examples/lark_grammar.py index c7ace47..e8566fb 100644 --- a/examples/lark_grammar.py +++ b/examples/lark_grammar.py @@ -1,3 +1,9 @@ +""" +Lark Grammar +============ + +A reference implementation of the Lark grammar (using LALR(1)) +""" from lark import Lark parser = Lark(open('examples/lark.lark'), parser="lalr") diff --git a/examples/turtle_dsl.py b/examples/turtle_dsl.py index 775a98e..81a9cde 100644 --- a/examples/turtle_dsl.py +++ b/examples/turtle_dsl.py @@ -1,4 +1,9 @@ -# This example implements a LOGO-like toy language for Python's turtle, with interpreter. +""" +Turtle DSL +========== + +Implements a LOGO-like toy language for Python’s turtle, with interpreter. +""" try: input = raw_input # For Python2 compatibility