@@ -0,0 +1,20 @@ | |||
# Minimal makefile for Sphinx documentation | |||
# | |||
# You can set these variables from the command line. | |||
SPHINXOPTS = | |||
SPHINXBUILD = sphinx-build | |||
SPHINXPROJ = Lark | |||
SOURCEDIR = . | |||
BUILDDIR = _build | |||
# Put it first so that "make" without argument is like "make help". | |||
help: | |||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | |||
.PHONY: help Makefile | |||
# Catch-all target: route all unknown targets to Sphinx using the new | |||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). | |||
%: Makefile | |||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) |
@@ -2,6 +2,8 @@ | |||
This page details the important classes in Lark. | |||
**TODO** convert to sphinx autodoc! | |||
---- | |||
## lark.Lark | |||
@@ -0,0 +1,177 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Lark documentation build configuration file, created by | |||
# sphinx-quickstart on Sun Aug 16 13:09:41 2020. | |||
# | |||
# This file is execfile()d with the current directory set to its | |||
# containing dir. | |||
# | |||
# Note that not all possible configuration values are present in this | |||
# autogenerated file. | |||
# | |||
# All configuration values have a default; values that are commented out | |||
# serve to show the default. | |||
# If extensions (or modules to document with autodoc) are in another directory, | |||
# add these directories to sys.path here. If the directory is relative to the | |||
# documentation root, use os.path.abspath to make it absolute, like shown here. | |||
# | |||
# import os | |||
# import sys | |||
# sys.path.insert(0, os.path.abspath('.')) | |||
# -- General configuration ------------------------------------------------ | |||
# If your documentation needs a minimal Sphinx version, state it here. | |||
# | |||
# needs_sphinx = '1.0' | |||
# Add any Sphinx extension module names here, as strings. They can be | |||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom | |||
# ones. | |||
extensions = [ | |||
'sphinx.ext.autodoc', | |||
'sphinx.ext.coverage', | |||
'recommonmark' | |||
] | |||
# Add any paths that contain templates here, relative to this directory. | |||
templates_path = ['_templates'] | |||
# The suffix(es) of source filenames. | |||
# You can specify multiple suffix as a list of string: | |||
# | |||
# source_suffix = ['.rst', '.md'] | |||
source_suffix = { | |||
'.rst': 'restructuredtext', | |||
'.md': 'markdown' | |||
} | |||
# The master toctree document. | |||
master_doc = 'index' | |||
# General information about the project. | |||
project = 'Lark' | |||
copyright = '2020, Erez Shinan' | |||
author = 'Erez Shinan' | |||
# The version info for the project you're documenting, acts as replacement for | |||
# |version| and |release|, also used in various other places throughout the | |||
# built documents. | |||
# | |||
# The short X.Y version. | |||
version = '' | |||
# The full version, including alpha/beta/rc tags. | |||
release = '' | |||
# The language for content autogenerated by Sphinx. Refer to documentation | |||
# for a list of supported languages. | |||
# | |||
# This is also used if you do content translation via gettext catalogs. | |||
# Usually you set "language" from the command line for these cases. | |||
language = None | |||
# List of patterns, relative to source directory, that match files and | |||
# directories to ignore when looking for source files. | |||
# This patterns also effect to html_static_path and html_extra_path | |||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] | |||
# The name of the Pygments (syntax highlighting) style to use. | |||
pygments_style = 'sphinx' | |||
# If true, `todo` and `todoList` produce output, else they produce nothing. | |||
todo_include_todos = False | |||
# -- Options for HTML output ---------------------------------------------- | |||
# The theme to use for HTML and HTML Help pages. See the documentation for | |||
# a list of builtin themes. | |||
# | |||
html_theme = 'sphinx_rtd_theme' | |||
# Theme options are theme-specific and customize the look and feel of a theme | |||
# further. For a list of options available for each theme, see the | |||
# documentation. | |||
# | |||
# html_theme_options = {} | |||
# Add any paths that contain custom static files (such as style sheets) here, | |||
# relative to this directory. They are copied after the builtin static files, | |||
# so a file named "default.css" will overwrite the builtin "default.css". | |||
html_static_path = ['_static'] | |||
# Custom sidebar templates, must be a dictionary that maps document names | |||
# to template names. | |||
# | |||
# This is required for the alabaster theme | |||
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars | |||
html_sidebars = { | |||
'**': [ | |||
'relations.html', # needs 'show_related': True theme option to display | |||
'searchbox.html', | |||
] | |||
} | |||
# -- Options for HTMLHelp output ------------------------------------------ | |||
# Output file base name for HTML help builder. | |||
htmlhelp_basename = 'Larkdoc' | |||
# -- Options for LaTeX output --------------------------------------------- | |||
latex_elements = { | |||
# The paper size ('letterpaper' or 'a4paper'). | |||
# | |||
# 'papersize': 'letterpaper', | |||
# The font size ('10pt', '11pt' or '12pt'). | |||
# | |||
# 'pointsize': '10pt', | |||
# Additional stuff for the LaTeX preamble. | |||
# | |||
# 'preamble': '', | |||
# Latex figure (float) alignment | |||
# | |||
# 'figure_align': 'htbp', | |||
} | |||
# Grouping the document tree into LaTeX files. List of tuples | |||
# (source start file, target name, title, | |||
# author, documentclass [howto, manual, or own class]). | |||
latex_documents = [ | |||
(master_doc, 'Lark.tex', 'Lark Documentation', | |||
'Erez Shinan', 'manual'), | |||
] | |||
# -- Options for manual page output --------------------------------------- | |||
# One entry per manual page. List of tuples | |||
# (source start file, name, description, authors, manual section). | |||
man_pages = [ | |||
(master_doc, 'lark', 'Lark Documentation', | |||
[author], 1) | |||
] | |||
# -- Options for Texinfo output ------------------------------------------- | |||
# Grouping the document tree into Texinfo files. List of tuples | |||
# (source start file, target name, title, author, | |||
# dir menu entry, description, category) | |||
texinfo_documents = [ | |||
(master_doc, 'Lark', 'Lark Documentation', | |||
author, 'Lark', 'One line description of project.', | |||
'Miscellaneous'), | |||
] | |||
@@ -1,4 +1,6 @@ | |||
# Main Features | |||
# Features | |||
## Main Features | |||
- Earley parser, capable of parsing any context-free grammar | |||
- Implements SPPF, for efficient parsing and storing of ambiguous grammars. | |||
- LALR(1) parser, limited in power of expression, but very efficient in space and performance (O(n)). | |||
@@ -18,10 +20,10 @@ | |||
[Read more about the parsers](parsers.md) | |||
# Extra features | |||
## Extra features | |||
- Import rules and tokens from other Lark grammars, for code reuse and modularity. | |||
- Import grammars from Nearley.js ([read more](/docs/nearley.md)) | |||
- Import grammars from Nearley.js ([read more](nearley.md)) | |||
- CYK parser | |||
### Experimental features | |||
@@ -1,13 +1,5 @@ | |||
# Grammar Reference | |||
Table of contents: | |||
1. [Definitions](#defs) | |||
1. [Terminals](#terms) | |||
1. [Rules](#rules) | |||
1. [Directives](#dirs) | |||
<a name="defs"></a> | |||
## Definitions | |||
A **grammar** is a list of rules and terminals, that together define a language. | |||
@@ -20,7 +12,7 @@ Each rule is a list of terminals and rules, whose location and nesting define th | |||
A **parsing algorithm** is an algorithm that takes a grammar definition and a sequence of symbols (members of the alphabet), and matches the entirety of the sequence by searching for a structure that is allowed by the grammar. | |||
## General Syntax and notes | |||
### General Syntax and notes | |||
Grammars in Lark are based on [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form) syntax, with several enhancements. | |||
@@ -58,7 +50,6 @@ Lark begins the parse with the rule 'start', unless specified otherwise in the o | |||
Names of rules are always in lowercase, while names of terminals are always in uppercase. This distinction has practical effects, for the shape of the generated parse-tree, and the automatic construction of the lexer (aka tokenizer, or scanner). | |||
<a name="terms"></a> | |||
## Terminals | |||
Terminals are used to match text into symbols. They can be defined as a combination of literals and other terminals. | |||
@@ -190,7 +181,6 @@ _ambig | |||
``` | |||
<a name="rules"></a> | |||
## Rules | |||
**Syntax:** | |||
@@ -22,11 +22,11 @@ Of course, some specific use-cases may deviate from this process. Feel free to s | |||
Browse the [Examples](https://github.com/lark-parser/lark/tree/master/examples) to find a template that suits your purposes. | |||
Read the tutorials to get a better understanding of how everything works. (links in the [main page](/)) | |||
Read the tutorials to get a better understanding of how everything works. (links in the [main page](/index)) | |||
Use the [Cheatsheet (PDF)](lark_cheatsheet.pdf) for quick reference. | |||
Use the [Cheatsheet (PDF)](/_static/lark_cheatsheet.pdf) for quick reference. | |||
Use the reference pages for more in-depth explanations. (links in the [main page](/)] | |||
Use the reference pages for more in-depth explanations. (links in the [main page](/index)] | |||
## LALR usage | |||
@@ -1,55 +0,0 @@ | |||
# Lark | |||
A modern parsing library for Python | |||
## Overview | |||
Lark can parse any context-free grammar. | |||
Lark provides: | |||
- Advanced grammar language, based on EBNF | |||
- Three parsing algorithms to choose from: Earley, LALR(1) and CYK | |||
- Automatic tree construction, inferred from your grammar | |||
- Fast unicode lexer with regexp support, and automatic line-counting | |||
Lark's code is hosted on Github: [https://github.com/lark-parser/lark](https://github.com/lark-parser/lark) | |||
### Install | |||
```bash | |||
$ pip install lark-parser | |||
``` | |||
#### Syntax Highlighting | |||
- [Sublime Text & TextMate](https://github.com/lark-parser/lark_syntax) | |||
- [Visual Studio Code](https://github.com/lark-parser/vscode-lark) (Or install through the vscode plugin system) | |||
- [Intellij & PyCharm](https://github.com/lark-parser/intellij-syntax-highlighting) | |||
----- | |||
## Documentation Index | |||
* [Philosophy & Design Choices](philosophy.md) | |||
* [Full List of Features](features.md) | |||
* [Examples](https://github.com/lark-parser/lark/tree/master/examples) | |||
* [Online IDE](https://lark-parser.github.io/lark/ide/app.html) | |||
* Tutorials | |||
* [How to write a DSL](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/) - Implements a toy LOGO-like language with an interpreter | |||
* [How to write a JSON parser](json_tutorial.md) - Teaches you how to use Lark | |||
* Unofficial | |||
* [Program Synthesis is Possible](https://www.cs.cornell.edu/~asampson/blog/minisynth.html) - Creates a DSL for Z3 | |||
* Guides | |||
* [How to use Lark](how_to_use.md) | |||
* [How to develop Lark](how_to_develop.md) | |||
* Reference | |||
* [Grammar](grammar.md) | |||
* [Tree Construction](tree_construction.md) | |||
* [Visitors & Transformers](visitors.md) | |||
* [Classes](classes.md) | |||
* [Cheatsheet (PDF)](lark_cheatsheet.pdf) | |||
* [Importing grammars from Nearley](nearley.md) | |||
* Discussion | |||
* [Gitter](https://gitter.im/lark-parser/Lobby) | |||
* [Forum (Google Groups)](https://groups.google.com/forum/#!forum/lark-parser) |
@@ -0,0 +1,64 @@ | |||
.. Lark documentation master file, created by | |||
sphinx-quickstart on Sun Aug 16 13:09:41 2020. | |||
You can adapt this file completely to your liking, but it should at least | |||
contain the root `toctree` directive. | |||
Welcome to Lark's documentation! | |||
================================ | |||
.. toctree:: | |||
:maxdepth: 2 | |||
:hidden: | |||
philosophy | |||
features | |||
parsers | |||
.. toctree:: | |||
:maxdepth: 2 | |||
:caption: Tutorials & Guides | |||
:hidden: | |||
json_tutorial | |||
how_to_use | |||
how_to_develop | |||
nearley | |||
recipes | |||
.. toctree:: | |||
:maxdepth: 2 | |||
:caption: Reference | |||
:hidden: | |||
grammar | |||
tree_construction | |||
visitors | |||
classes | |||
Lark is a modern parsing library for Python. Lark can parse any context-free grammar. | |||
Lark provides: | |||
- Advanced grammar language, based on EBNF | |||
- Three parsing algorithms to choose from: Earley, LALR(1) and CYK | |||
- Automatic tree construction, inferred from your grammar | |||
- Fast unicode lexer with regexp support, and automatic line-counting | |||
**Install Lark**: | |||
.. code:: bash | |||
$ pip install lark-parser | |||
**Syntax Highlighting**: | |||
- `Sublime Text & TextMate`_ | |||
- `Visual Studio Code`_ (Or install through the vscode plugin system) | |||
- `Intellij & PyCharm`_ | |||
.. _Sublime Text & TextMate: https://github.com/lark-parser/lark_syntax | |||
.. _Visual Studio Code: https://github.com/lark-parser/vscode-lark | |||
.. _Intellij & PyCharm: https://github.com/lark-parser/intellij-syntax-highlighting |
@@ -1,7 +1,6 @@ | |||
# Lark Tutorial - JSON parser | |||
# JSON parser - Tutorial | |||
Lark is a parser - a program that accepts a grammar and text, and produces a structured tree that represents that text. | |||
In this tutorial we will write a JSON parser in Lark, and explore Lark's various features in the process. | |||
It has 5 parts. | |||
@@ -0,0 +1,36 @@ | |||
@ECHO OFF | |||
pushd %~dp0 | |||
REM Command file for Sphinx documentation | |||
if "%SPHINXBUILD%" == "" ( | |||
set SPHINXBUILD=sphinx-build | |||
) | |||
set SOURCEDIR=. | |||
set BUILDDIR=_build | |||
set SPHINXPROJ=Lark | |||
if "%1" == "" goto help | |||
%SPHINXBUILD% >NUL 2>NUL | |||
if errorlevel 9009 ( | |||
echo. | |||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx | |||
echo.installed, then set the SPHINXBUILD environment variable to point | |||
echo.to the full path of the 'sphinx-build' executable. Alternatively you | |||
echo.may add the Sphinx directory to PATH. | |||
echo. | |||
echo.If you don't have Sphinx installed, grab it from | |||
echo.http://sphinx-doc.org/ | |||
exit /b 1 | |||
) | |||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% | |||
goto end | |||
:help | |||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% | |||
:end | |||
popd |
@@ -1,7 +1,7 @@ | |||
# Parsers | |||
Lark implements the following parsing algorithms: Earley, LALR(1), and CYK | |||
# Earley | |||
## Earley | |||
An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time. | |||
@@ -30,7 +30,7 @@ Lark provides the following options to combat ambiguity: | |||
**TODO: Add documentation on dynamic_complete** | |||
# LALR(1) | |||
## LALR(1) | |||
[LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). | |||
@@ -42,7 +42,7 @@ The contextual lexer communicates with the parser, and uses the parser's lookahe | |||
This is an improvement to LALR(1) that is unique to Lark. | |||
# CYK Parser | |||
## CYK Parser | |||
A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). | |||
@@ -4,7 +4,7 @@ Parsers are innately complicated and confusing. They're difficult to understand, | |||
Lark's mission is to make the process of writing them as simple and abstract as possible, by following these design principles: | |||
### Design Principles | |||
## Design Principles | |||
1. Readability matters | |||
@@ -23,7 +23,7 @@ In accordance with these principles, I arrived at the following design choices: | |||
----------- | |||
# Design Choices | |||
## Design Choices | |||
### 1. Separation of code and grammar | |||
@@ -1,4 +1,4 @@ | |||
# Automatic Tree Construction - Reference | |||
# Tree Construction Reference | |||
Lark builds a tree automatically based on the structure of the grammar, where each rule that is matched becomes a branch (node) in the tree, and its children are its matches, in the order of matching. | |||
@@ -13,7 +13,7 @@ If `maybe_placeholders=False` (the default), then `[]` behaves like `()?`. | |||
If `maybe_placeholders=True`, then using `[item]` will return the item if it matched, or the value `None`, if it didn't. | |||
### Terminals | |||
## Terminals | |||
Terminals are always values in the tree, never branches. | |||
@@ -74,7 +74,7 @@ Lark will parse "((hello world))" as: | |||
The brackets do not appear in the tree by design. The words appear because they are matched by a named terminal. | |||
# Shaping the tree | |||
## Shaping the tree | |||
Users can alter the automatic construction of the tree using a collection of grammar features. | |||
@@ -1,4 +1,4 @@ | |||
## Transformers & Visitors | |||
# Transformers & Visitors | |||
Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns. | |||