diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..58127b4 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = Lark +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/lark_cheatsheet.pdf b/docs/_static/lark_cheatsheet.pdf similarity index 100% rename from docs/lark_cheatsheet.pdf rename to docs/_static/lark_cheatsheet.pdf diff --git a/docs/classes.md b/docs/classes.md index 7bd92fe..4ec7f4c 100644 --- a/docs/classes.md +++ b/docs/classes.md @@ -2,6 +2,8 @@ This page details the important classes in Lark. +**TODO** convert to sphinx autodoc! + ---- ## lark.Lark diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..a522559 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Lark documentation build configuration file, created by +# sphinx-quickstart on Sun Aug 16 13:09:41 2020. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.coverage', + 'recommonmark' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown' +} + + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'Lark' +copyright = '2020, Erez Shinan' +author = 'Erez Shinan' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '' +# The full version, including alpha/beta/rc tags. +release = '' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# This is required for the alabaster theme +# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +html_sidebars = { + '**': [ + 'relations.html', # needs 'show_related': True theme option to display + 'searchbox.html', + ] +} + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Larkdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'Lark.tex', 'Lark Documentation', + 'Erez Shinan', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'lark', 'Lark Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'Lark', 'Lark Documentation', + author, 'Lark', 'One line description of project.', + 'Miscellaneous'), +] + + + diff --git a/docs/features.md b/docs/features.md index 9346989..68cde87 100644 --- a/docs/features.md +++ b/docs/features.md @@ -1,4 +1,6 @@ -# Main Features +# Features + +## Main Features - Earley parser, capable of parsing any context-free grammar - Implements SPPF, for efficient parsing and storing of ambiguous grammars. - LALR(1) parser, limited in power of expression, but very efficient in space and performance (O(n)). @@ -18,10 +20,10 @@ [Read more about the parsers](parsers.md) -# Extra features +## Extra features - Import rules and tokens from other Lark grammars, for code reuse and modularity. - - Import grammars from Nearley.js ([read more](/docs/nearley.md)) + - Import grammars from Nearley.js ([read more](nearley.md)) - CYK parser ### Experimental features diff --git a/docs/grammar.md b/docs/grammar.md index d4ecec5..7db6a3c 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -1,13 +1,5 @@ # Grammar Reference -Table of contents: - -1. [Definitions](#defs) -1. [Terminals](#terms) -1. [Rules](#rules) -1. [Directives](#dirs) - - ## Definitions A **grammar** is a list of rules and terminals, that together define a language. @@ -20,7 +12,7 @@ Each rule is a list of terminals and rules, whose location and nesting define th A **parsing algorithm** is an algorithm that takes a grammar definition and a sequence of symbols (members of the alphabet), and matches the entirety of the sequence by searching for a structure that is allowed by the grammar. -## General Syntax and notes +### General Syntax and notes Grammars in Lark are based on [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form) syntax, with several enhancements. @@ -58,7 +50,6 @@ Lark begins the parse with the rule 'start', unless specified otherwise in the o Names of rules are always in lowercase, while names of terminals are always in uppercase. This distinction has practical effects, for the shape of the generated parse-tree, and the automatic construction of the lexer (aka tokenizer, or scanner). - ## Terminals Terminals are used to match text into symbols. They can be defined as a combination of literals and other terminals. @@ -190,7 +181,6 @@ _ambig ``` - ## Rules **Syntax:** diff --git a/docs/how_to_use.md b/docs/how_to_use.md index 886b440..c4ba4dd 100644 --- a/docs/how_to_use.md +++ b/docs/how_to_use.md @@ -22,11 +22,11 @@ Of course, some specific use-cases may deviate from this process. Feel free to s Browse the [Examples](https://github.com/lark-parser/lark/tree/master/examples) to find a template that suits your purposes. -Read the tutorials to get a better understanding of how everything works. (links in the [main page](/)) +Read the tutorials to get a better understanding of how everything works. (links in the [main page](/index)) -Use the [Cheatsheet (PDF)](lark_cheatsheet.pdf) for quick reference. +Use the [Cheatsheet (PDF)](/_static/lark_cheatsheet.pdf) for quick reference. -Use the reference pages for more in-depth explanations. (links in the [main page](/)] +Use the reference pages for more in-depth explanations. (links in the [main page](/index)] ## LALR usage diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 1310be2..0000000 --- a/docs/index.md +++ /dev/null @@ -1,55 +0,0 @@ -# Lark - -A modern parsing library for Python - -## Overview - -Lark can parse any context-free grammar. - -Lark provides: - -- Advanced grammar language, based on EBNF -- Three parsing algorithms to choose from: Earley, LALR(1) and CYK -- Automatic tree construction, inferred from your grammar -- Fast unicode lexer with regexp support, and automatic line-counting - -Lark's code is hosted on Github: [https://github.com/lark-parser/lark](https://github.com/lark-parser/lark) - -### Install -```bash -$ pip install lark-parser -``` - -#### Syntax Highlighting - -- [Sublime Text & TextMate](https://github.com/lark-parser/lark_syntax) -- [Visual Studio Code](https://github.com/lark-parser/vscode-lark) (Or install through the vscode plugin system) -- [Intellij & PyCharm](https://github.com/lark-parser/intellij-syntax-highlighting) - ------ - -## Documentation Index - - -* [Philosophy & Design Choices](philosophy.md) -* [Full List of Features](features.md) -* [Examples](https://github.com/lark-parser/lark/tree/master/examples) -* [Online IDE](https://lark-parser.github.io/lark/ide/app.html) -* Tutorials - * [How to write a DSL](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/) - Implements a toy LOGO-like language with an interpreter - * [How to write a JSON parser](json_tutorial.md) - Teaches you how to use Lark - * Unofficial - * [Program Synthesis is Possible](https://www.cs.cornell.edu/~asampson/blog/minisynth.html) - Creates a DSL for Z3 -* Guides - * [How to use Lark](how_to_use.md) - * [How to develop Lark](how_to_develop.md) -* Reference - * [Grammar](grammar.md) - * [Tree Construction](tree_construction.md) - * [Visitors & Transformers](visitors.md) - * [Classes](classes.md) - * [Cheatsheet (PDF)](lark_cheatsheet.pdf) - * [Importing grammars from Nearley](nearley.md) -* Discussion - * [Gitter](https://gitter.im/lark-parser/Lobby) - * [Forum (Google Groups)](https://groups.google.com/forum/#!forum/lark-parser) diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..f6611ce --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,64 @@ +.. Lark documentation master file, created by + sphinx-quickstart on Sun Aug 16 13:09:41 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Lark's documentation! +================================ + +.. toctree:: + :maxdepth: 2 + :hidden: + + philosophy + features + parsers + +.. toctree:: + :maxdepth: 2 + :caption: Tutorials & Guides + :hidden: + + json_tutorial + how_to_use + how_to_develop + nearley + recipes + + +.. toctree:: + :maxdepth: 2 + :caption: Reference + :hidden: + + grammar + tree_construction + visitors + classes + + +Lark is a modern parsing library for Python. Lark can parse any context-free grammar. + +Lark provides: + +- Advanced grammar language, based on EBNF +- Three parsing algorithms to choose from: Earley, LALR(1) and CYK +- Automatic tree construction, inferred from your grammar +- Fast unicode lexer with regexp support, and automatic line-counting + + +**Install Lark**: + +.. code:: bash + + $ pip install lark-parser + +**Syntax Highlighting**: + +- `Sublime Text & TextMate`_ +- `Visual Studio Code`_ (Or install through the vscode plugin system) +- `Intellij & PyCharm`_ + +.. _Sublime Text & TextMate: https://github.com/lark-parser/lark_syntax +.. _Visual Studio Code: https://github.com/lark-parser/vscode-lark +.. _Intellij & PyCharm: https://github.com/lark-parser/intellij-syntax-highlighting \ No newline at end of file diff --git a/docs/json_tutorial.md b/docs/json_tutorial.md index 9cc87e7..aa9544d 100644 --- a/docs/json_tutorial.md +++ b/docs/json_tutorial.md @@ -1,7 +1,6 @@ -# Lark Tutorial - JSON parser +# JSON parser - Tutorial Lark is a parser - a program that accepts a grammar and text, and produces a structured tree that represents that text. - In this tutorial we will write a JSON parser in Lark, and explore Lark's various features in the process. It has 5 parts. diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..4f2e286 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=Lark + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/parsers.md b/docs/parsers.md index cff5a4b..7a05f93 100644 --- a/docs/parsers.md +++ b/docs/parsers.md @@ -1,7 +1,7 @@ - +# Parsers Lark implements the following parsing algorithms: Earley, LALR(1), and CYK -# Earley +## Earley An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time. @@ -30,7 +30,7 @@ Lark provides the following options to combat ambiguity: **TODO: Add documentation on dynamic_complete** -# LALR(1) +## LALR(1) [LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). @@ -42,7 +42,7 @@ The contextual lexer communicates with the parser, and uses the parser's lookahe This is an improvement to LALR(1) that is unique to Lark. -# CYK Parser +## CYK Parser A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). diff --git a/docs/philosophy.md b/docs/philosophy.md index a2097d0..a1d8f8c 100644 --- a/docs/philosophy.md +++ b/docs/philosophy.md @@ -4,7 +4,7 @@ Parsers are innately complicated and confusing. They're difficult to understand, Lark's mission is to make the process of writing them as simple and abstract as possible, by following these design principles: -### Design Principles +## Design Principles 1. Readability matters @@ -23,7 +23,7 @@ In accordance with these principles, I arrived at the following design choices: ----------- -# Design Choices +## Design Choices ### 1. Separation of code and grammar diff --git a/docs/tree_construction.md b/docs/tree_construction.md index a4d6088..50ce0ee 100644 --- a/docs/tree_construction.md +++ b/docs/tree_construction.md @@ -1,4 +1,4 @@ -# Automatic Tree Construction - Reference +# Tree Construction Reference Lark builds a tree automatically based on the structure of the grammar, where each rule that is matched becomes a branch (node) in the tree, and its children are its matches, in the order of matching. @@ -13,7 +13,7 @@ If `maybe_placeholders=False` (the default), then `[]` behaves like `()?`. If `maybe_placeholders=True`, then using `[item]` will return the item if it matched, or the value `None`, if it didn't. -### Terminals +## Terminals Terminals are always values in the tree, never branches. @@ -74,7 +74,7 @@ Lark will parse "((hello world))" as: The brackets do not appear in the tree by design. The words appear because they are matched by a named terminal. -# Shaping the tree +## Shaping the tree Users can alter the automatic construction of the tree using a collection of grammar features. diff --git a/docs/visitors.md b/docs/visitors.md index dcdc8f8..146af1c 100644 --- a/docs/visitors.md +++ b/docs/visitors.md @@ -1,4 +1,4 @@ -## Transformers & Visitors +# Transformers & Visitors Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns.