| @@ -0,0 +1,20 @@ | |||
| # Minimal makefile for Sphinx documentation | |||
| # | |||
| # You can set these variables from the command line. | |||
| SPHINXOPTS = | |||
| SPHINXBUILD = sphinx-build | |||
| SPHINXPROJ = Lark | |||
| SOURCEDIR = . | |||
| BUILDDIR = _build | |||
| # Put it first so that "make" without argument is like "make help". | |||
| help: | |||
| @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | |||
| .PHONY: help Makefile | |||
| # Catch-all target: route all unknown targets to Sphinx using the new | |||
| # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). | |||
| %: Makefile | |||
| @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | |||
| @@ -2,6 +2,8 @@ | |||
| This page details the important classes in Lark. | |||
| **TODO** convert to sphinx autodoc! | |||
| ---- | |||
| ## lark.Lark | |||
| @@ -0,0 +1,177 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| # | |||
| # Lark documentation build configuration file, created by | |||
| # sphinx-quickstart on Sun Aug 16 13:09:41 2020. | |||
| # | |||
| # This file is execfile()d with the current directory set to its | |||
| # containing dir. | |||
| # | |||
| # Note that not all possible configuration values are present in this | |||
| # autogenerated file. | |||
| # | |||
| # All configuration values have a default; values that are commented out | |||
| # serve to show the default. | |||
| # If extensions (or modules to document with autodoc) are in another directory, | |||
| # add these directories to sys.path here. If the directory is relative to the | |||
| # documentation root, use os.path.abspath to make it absolute, like shown here. | |||
| # | |||
| # import os | |||
| # import sys | |||
| # sys.path.insert(0, os.path.abspath('.')) | |||
| # -- General configuration ------------------------------------------------ | |||
| # If your documentation needs a minimal Sphinx version, state it here. | |||
| # | |||
| # needs_sphinx = '1.0' | |||
| # Add any Sphinx extension module names here, as strings. They can be | |||
| # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom | |||
| # ones. | |||
| extensions = [ | |||
| 'sphinx.ext.autodoc', | |||
| 'sphinx.ext.coverage', | |||
| 'recommonmark' | |||
| ] | |||
| # Add any paths that contain templates here, relative to this directory. | |||
| templates_path = ['_templates'] | |||
| # The suffix(es) of source filenames. | |||
| # You can specify multiple suffix as a list of string: | |||
| # | |||
| # source_suffix = ['.rst', '.md'] | |||
| source_suffix = { | |||
| '.rst': 'restructuredtext', | |||
| '.md': 'markdown' | |||
| } | |||
| # The master toctree document. | |||
| master_doc = 'index' | |||
| # General information about the project. | |||
| project = 'Lark' | |||
| copyright = '2020, Erez Shinan' | |||
| author = 'Erez Shinan' | |||
| # The version info for the project you're documenting, acts as replacement for | |||
| # |version| and |release|, also used in various other places throughout the | |||
| # built documents. | |||
| # | |||
| # The short X.Y version. | |||
| version = '' | |||
| # The full version, including alpha/beta/rc tags. | |||
| release = '' | |||
| # The language for content autogenerated by Sphinx. Refer to documentation | |||
| # for a list of supported languages. | |||
| # | |||
| # This is also used if you do content translation via gettext catalogs. | |||
| # Usually you set "language" from the command line for these cases. | |||
| language = None | |||
| # List of patterns, relative to source directory, that match files and | |||
| # directories to ignore when looking for source files. | |||
| # This patterns also effect to html_static_path and html_extra_path | |||
| exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] | |||
| # The name of the Pygments (syntax highlighting) style to use. | |||
| pygments_style = 'sphinx' | |||
| # If true, `todo` and `todoList` produce output, else they produce nothing. | |||
| todo_include_todos = False | |||
| # -- Options for HTML output ---------------------------------------------- | |||
| # The theme to use for HTML and HTML Help pages. See the documentation for | |||
| # a list of builtin themes. | |||
| # | |||
| html_theme = 'sphinx_rtd_theme' | |||
| # Theme options are theme-specific and customize the look and feel of a theme | |||
| # further. For a list of options available for each theme, see the | |||
| # documentation. | |||
| # | |||
| # html_theme_options = {} | |||
| # Add any paths that contain custom static files (such as style sheets) here, | |||
| # relative to this directory. They are copied after the builtin static files, | |||
| # so a file named "default.css" will overwrite the builtin "default.css". | |||
| html_static_path = ['_static'] | |||
| # Custom sidebar templates, must be a dictionary that maps document names | |||
| # to template names. | |||
| # | |||
| # This is required for the alabaster theme | |||
| # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars | |||
| html_sidebars = { | |||
| '**': [ | |||
| 'relations.html', # needs 'show_related': True theme option to display | |||
| 'searchbox.html', | |||
| ] | |||
| } | |||
| # -- Options for HTMLHelp output ------------------------------------------ | |||
| # Output file base name for HTML help builder. | |||
| htmlhelp_basename = 'Larkdoc' | |||
| # -- Options for LaTeX output --------------------------------------------- | |||
| latex_elements = { | |||
| # The paper size ('letterpaper' or 'a4paper'). | |||
| # | |||
| # 'papersize': 'letterpaper', | |||
| # The font size ('10pt', '11pt' or '12pt'). | |||
| # | |||
| # 'pointsize': '10pt', | |||
| # Additional stuff for the LaTeX preamble. | |||
| # | |||
| # 'preamble': '', | |||
| # Latex figure (float) alignment | |||
| # | |||
| # 'figure_align': 'htbp', | |||
| } | |||
| # Grouping the document tree into LaTeX files. List of tuples | |||
| # (source start file, target name, title, | |||
| # author, documentclass [howto, manual, or own class]). | |||
| latex_documents = [ | |||
| (master_doc, 'Lark.tex', 'Lark Documentation', | |||
| 'Erez Shinan', 'manual'), | |||
| ] | |||
| # -- Options for manual page output --------------------------------------- | |||
| # One entry per manual page. List of tuples | |||
| # (source start file, name, description, authors, manual section). | |||
| man_pages = [ | |||
| (master_doc, 'lark', 'Lark Documentation', | |||
| [author], 1) | |||
| ] | |||
| # -- Options for Texinfo output ------------------------------------------- | |||
| # Grouping the document tree into Texinfo files. List of tuples | |||
| # (source start file, target name, title, author, | |||
| # dir menu entry, description, category) | |||
| texinfo_documents = [ | |||
| (master_doc, 'Lark', 'Lark Documentation', | |||
| author, 'Lark', 'One line description of project.', | |||
| 'Miscellaneous'), | |||
| ] | |||
| @@ -1,4 +1,6 @@ | |||
| # Main Features | |||
| # Features | |||
| ## Main Features | |||
| - Earley parser, capable of parsing any context-free grammar | |||
| - Implements SPPF, for efficient parsing and storing of ambiguous grammars. | |||
| - LALR(1) parser, limited in power of expression, but very efficient in space and performance (O(n)). | |||
| @@ -18,10 +20,10 @@ | |||
| [Read more about the parsers](parsers.md) | |||
| # Extra features | |||
| ## Extra features | |||
| - Import rules and tokens from other Lark grammars, for code reuse and modularity. | |||
| - Import grammars from Nearley.js ([read more](/docs/nearley.md)) | |||
| - Import grammars from Nearley.js ([read more](nearley.md)) | |||
| - CYK parser | |||
| ### Experimental features | |||
| @@ -1,13 +1,5 @@ | |||
| # Grammar Reference | |||
| Table of contents: | |||
| 1. [Definitions](#defs) | |||
| 1. [Terminals](#terms) | |||
| 1. [Rules](#rules) | |||
| 1. [Directives](#dirs) | |||
| <a name="defs"></a> | |||
| ## Definitions | |||
| A **grammar** is a list of rules and terminals, that together define a language. | |||
| @@ -20,7 +12,7 @@ Each rule is a list of terminals and rules, whose location and nesting define th | |||
| A **parsing algorithm** is an algorithm that takes a grammar definition and a sequence of symbols (members of the alphabet), and matches the entirety of the sequence by searching for a structure that is allowed by the grammar. | |||
| ## General Syntax and notes | |||
| ### General Syntax and notes | |||
| Grammars in Lark are based on [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form) syntax, with several enhancements. | |||
| @@ -58,7 +50,6 @@ Lark begins the parse with the rule 'start', unless specified otherwise in the o | |||
| Names of rules are always in lowercase, while names of terminals are always in uppercase. This distinction has practical effects, for the shape of the generated parse-tree, and the automatic construction of the lexer (aka tokenizer, or scanner). | |||
| <a name="terms"></a> | |||
| ## Terminals | |||
| Terminals are used to match text into symbols. They can be defined as a combination of literals and other terminals. | |||
| @@ -190,7 +181,6 @@ _ambig | |||
| ``` | |||
| <a name="rules"></a> | |||
| ## Rules | |||
| **Syntax:** | |||
| @@ -22,11 +22,11 @@ Of course, some specific use-cases may deviate from this process. Feel free to s | |||
| Browse the [Examples](https://github.com/lark-parser/lark/tree/master/examples) to find a template that suits your purposes. | |||
| Read the tutorials to get a better understanding of how everything works. (links in the [main page](/)) | |||
| Read the tutorials to get a better understanding of how everything works. (links in the [main page](/index)) | |||
| Use the [Cheatsheet (PDF)](lark_cheatsheet.pdf) for quick reference. | |||
| Use the [Cheatsheet (PDF)](/_static/lark_cheatsheet.pdf) for quick reference. | |||
| Use the reference pages for more in-depth explanations. (links in the [main page](/)] | |||
| Use the reference pages for more in-depth explanations. (links in the [main page](/index)] | |||
| ## LALR usage | |||
| @@ -1,55 +0,0 @@ | |||
| # Lark | |||
| A modern parsing library for Python | |||
| ## Overview | |||
| Lark can parse any context-free grammar. | |||
| Lark provides: | |||
| - Advanced grammar language, based on EBNF | |||
| - Three parsing algorithms to choose from: Earley, LALR(1) and CYK | |||
| - Automatic tree construction, inferred from your grammar | |||
| - Fast unicode lexer with regexp support, and automatic line-counting | |||
| Lark's code is hosted on Github: [https://github.com/lark-parser/lark](https://github.com/lark-parser/lark) | |||
| ### Install | |||
| ```bash | |||
| $ pip install lark-parser | |||
| ``` | |||
| #### Syntax Highlighting | |||
| - [Sublime Text & TextMate](https://github.com/lark-parser/lark_syntax) | |||
| - [Visual Studio Code](https://github.com/lark-parser/vscode-lark) (Or install through the vscode plugin system) | |||
| - [Intellij & PyCharm](https://github.com/lark-parser/intellij-syntax-highlighting) | |||
| ----- | |||
| ## Documentation Index | |||
| * [Philosophy & Design Choices](philosophy.md) | |||
| * [Full List of Features](features.md) | |||
| * [Examples](https://github.com/lark-parser/lark/tree/master/examples) | |||
| * [Online IDE](https://lark-parser.github.io/lark/ide/app.html) | |||
| * Tutorials | |||
| * [How to write a DSL](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/) - Implements a toy LOGO-like language with an interpreter | |||
| * [How to write a JSON parser](json_tutorial.md) - Teaches you how to use Lark | |||
| * Unofficial | |||
| * [Program Synthesis is Possible](https://www.cs.cornell.edu/~asampson/blog/minisynth.html) - Creates a DSL for Z3 | |||
| * Guides | |||
| * [How to use Lark](how_to_use.md) | |||
| * [How to develop Lark](how_to_develop.md) | |||
| * Reference | |||
| * [Grammar](grammar.md) | |||
| * [Tree Construction](tree_construction.md) | |||
| * [Visitors & Transformers](visitors.md) | |||
| * [Classes](classes.md) | |||
| * [Cheatsheet (PDF)](lark_cheatsheet.pdf) | |||
| * [Importing grammars from Nearley](nearley.md) | |||
| * Discussion | |||
| * [Gitter](https://gitter.im/lark-parser/Lobby) | |||
| * [Forum (Google Groups)](https://groups.google.com/forum/#!forum/lark-parser) | |||
| @@ -0,0 +1,64 @@ | |||
| .. Lark documentation master file, created by | |||
| sphinx-quickstart on Sun Aug 16 13:09:41 2020. | |||
| You can adapt this file completely to your liking, but it should at least | |||
| contain the root `toctree` directive. | |||
| Welcome to Lark's documentation! | |||
| ================================ | |||
| .. toctree:: | |||
| :maxdepth: 2 | |||
| :hidden: | |||
| philosophy | |||
| features | |||
| parsers | |||
| .. toctree:: | |||
| :maxdepth: 2 | |||
| :caption: Tutorials & Guides | |||
| :hidden: | |||
| json_tutorial | |||
| how_to_use | |||
| how_to_develop | |||
| nearley | |||
| recipes | |||
| .. toctree:: | |||
| :maxdepth: 2 | |||
| :caption: Reference | |||
| :hidden: | |||
| grammar | |||
| tree_construction | |||
| visitors | |||
| classes | |||
| Lark is a modern parsing library for Python. Lark can parse any context-free grammar. | |||
| Lark provides: | |||
| - Advanced grammar language, based on EBNF | |||
| - Three parsing algorithms to choose from: Earley, LALR(1) and CYK | |||
| - Automatic tree construction, inferred from your grammar | |||
| - Fast unicode lexer with regexp support, and automatic line-counting | |||
| **Install Lark**: | |||
| .. code:: bash | |||
| $ pip install lark-parser | |||
| **Syntax Highlighting**: | |||
| - `Sublime Text & TextMate`_ | |||
| - `Visual Studio Code`_ (Or install through the vscode plugin system) | |||
| - `Intellij & PyCharm`_ | |||
| .. _Sublime Text & TextMate: https://github.com/lark-parser/lark_syntax | |||
| .. _Visual Studio Code: https://github.com/lark-parser/vscode-lark | |||
| .. _Intellij & PyCharm: https://github.com/lark-parser/intellij-syntax-highlighting | |||
| @@ -1,7 +1,6 @@ | |||
| # Lark Tutorial - JSON parser | |||
| # JSON parser - Tutorial | |||
| Lark is a parser - a program that accepts a grammar and text, and produces a structured tree that represents that text. | |||
| In this tutorial we will write a JSON parser in Lark, and explore Lark's various features in the process. | |||
| It has 5 parts. | |||
| @@ -0,0 +1,36 @@ | |||
| @ECHO OFF | |||
| pushd %~dp0 | |||
| REM Command file for Sphinx documentation | |||
| if "%SPHINXBUILD%" == "" ( | |||
| set SPHINXBUILD=sphinx-build | |||
| ) | |||
| set SOURCEDIR=. | |||
| set BUILDDIR=_build | |||
| set SPHINXPROJ=Lark | |||
| if "%1" == "" goto help | |||
| %SPHINXBUILD% >NUL 2>NUL | |||
| if errorlevel 9009 ( | |||
| echo. | |||
| echo.The 'sphinx-build' command was not found. Make sure you have Sphinx | |||
| echo.installed, then set the SPHINXBUILD environment variable to point | |||
| echo.to the full path of the 'sphinx-build' executable. Alternatively you | |||
| echo.may add the Sphinx directory to PATH. | |||
| echo. | |||
| echo.If you don't have Sphinx installed, grab it from | |||
| echo.http://sphinx-doc.org/ | |||
| exit /b 1 | |||
| ) | |||
| %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% | |||
| goto end | |||
| :help | |||
| %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% | |||
| :end | |||
| popd | |||
| @@ -1,7 +1,7 @@ | |||
| # Parsers | |||
| Lark implements the following parsing algorithms: Earley, LALR(1), and CYK | |||
| # Earley | |||
| ## Earley | |||
| An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time. | |||
| @@ -30,7 +30,7 @@ Lark provides the following options to combat ambiguity: | |||
| **TODO: Add documentation on dynamic_complete** | |||
| # LALR(1) | |||
| ## LALR(1) | |||
| [LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). | |||
| @@ -42,7 +42,7 @@ The contextual lexer communicates with the parser, and uses the parser's lookahe | |||
| This is an improvement to LALR(1) that is unique to Lark. | |||
| # CYK Parser | |||
| ## CYK Parser | |||
| A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). | |||
| @@ -4,7 +4,7 @@ Parsers are innately complicated and confusing. They're difficult to understand, | |||
| Lark's mission is to make the process of writing them as simple and abstract as possible, by following these design principles: | |||
| ### Design Principles | |||
| ## Design Principles | |||
| 1. Readability matters | |||
| @@ -23,7 +23,7 @@ In accordance with these principles, I arrived at the following design choices: | |||
| ----------- | |||
| # Design Choices | |||
| ## Design Choices | |||
| ### 1. Separation of code and grammar | |||
| @@ -1,4 +1,4 @@ | |||
| # Automatic Tree Construction - Reference | |||
| # Tree Construction Reference | |||
| Lark builds a tree automatically based on the structure of the grammar, where each rule that is matched becomes a branch (node) in the tree, and its children are its matches, in the order of matching. | |||
| @@ -13,7 +13,7 @@ If `maybe_placeholders=False` (the default), then `[]` behaves like `()?`. | |||
| If `maybe_placeholders=True`, then using `[item]` will return the item if it matched, or the value `None`, if it didn't. | |||
| ### Terminals | |||
| ## Terminals | |||
| Terminals are always values in the tree, never branches. | |||
| @@ -74,7 +74,7 @@ Lark will parse "((hello world))" as: | |||
| The brackets do not appear in the tree by design. The words appear because they are matched by a named terminal. | |||
| # Shaping the tree | |||
| ## Shaping the tree | |||
| Users can alter the automatic construction of the tree using a collection of grammar features. | |||
| @@ -1,4 +1,4 @@ | |||
| ## Transformers & Visitors | |||
| # Transformers & Visitors | |||
| Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns. | |||