From 985c38e0e04b6641acf3ebaa3bd700d9d66cb013 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sun, 28 Jun 2020 12:03:35 +0300 Subject: [PATCH] Documentation fix (Removed bloat from README) --- README.md | 21 --------------------- docs/classes.md | 26 +++++++++++++++++++++++++- lark/lexer.py | 4 ---- tests/test_nearley/nearley | 2 +- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 7f62f84..464f409 100644 --- a/README.md +++ b/README.md @@ -177,27 +177,6 @@ You can use the output as a regular python module: 0.38981434460254655 ``` -### Using Unicode character classes with `regex` -Python's builtin `re` module has a few persistent known bugs and also won't parse -advanced regex features such as character classes. -With `pip install lark-parser[regex]`, the `regex` module will be installed alongside `lark` -and can act as a drop-in replacement to `re`. - -Any instance of `Lark` instantiated with `regex=True` will now use the `regex` module -instead of `re`. For example, we can now use character classes to match PEP-3131 compliant Python identifiers. -```python -from lark import Lark ->>> g = Lark(r""" - ?start: NAME - NAME: ID_START ID_CONTINUE* - ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ - ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ - """, regex=True) - ->>> g.parse('வணக்கம்') -'வணக்கம்' - -``` ## License diff --git a/docs/classes.md b/docs/classes.md index 084cda6..61cefb2 100644 --- a/docs/classes.md +++ b/docs/classes.md @@ -70,6 +70,8 @@ Useful for caching and multiprocessing. **g_regex_flags** - Flags that are applied to all terminals (both regex and strings) +**regex** - Use the `regex` library instead of the built-in `re` module (See below) + **keep_all_tokens** - Prevent the tree builder from automagically removing "punctuation" tokens (default: False) **cache** - Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. @@ -94,13 +96,35 @@ Useful for caching and multiprocessing. - "resolve": The parser will automatically choose the simplest derivation (it chooses consistently: greedy for tokens, non-greedy for rules) - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). -#### Domain Specific +#### Misc. - **postlex** - Lexer post-processing (Default: None) Only works with the standard and contextual lexers. - **priority** - How priorities should be evaluated - auto, none, normal, invert (Default: auto) - **lexer_callbacks** - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. - **edit_terminals** - A callback + +#### Using Unicode character classes with `regex` +Python's builtin `re` module has a few persistent known bugs and also won't parse +advanced regex features such as character classes. +With `pip install lark-parser[regex]`, the `regex` module will be installed alongside `lark` +and can act as a drop-in replacement to `re`. + +Any instance of `Lark` instantiated with `regex=True` will now use the `regex` module +instead of `re`. For example, we can now use character classes to match PEP-3131 compliant Python identifiers. +```python +from lark import Lark +>>> g = Lark(r""" + ?start: NAME + NAME: ID_START ID_CONTINUE* + ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ + ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ + """, regex=True) + +>>> g.parse('வணக்கம்') +'வணக்கம்' + +``` ---- ## Tree diff --git a/lark/lexer.py b/lark/lexer.py index 4d5c498..8d0d03f 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -1,10 +1,6 @@ ## Lexer Implementation import re -try: - import regex -except ImportError: - regex = None from .utils import Str, classify, get_regexp_width, Py36, Serialize from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken diff --git a/tests/test_nearley/nearley b/tests/test_nearley/nearley index cf8925f..a46b374 160000 --- a/tests/test_nearley/nearley +++ b/tests/test_nearley/nearley @@ -1 +1 @@ -Subproject commit cf8925f729bde741a3076c5856c0c0862bc7f5de +Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44