From 288078a6a02ebaa8e741adf56dc46533d4677175 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sun, 16 Aug 2020 19:53:45 +0300 Subject: [PATCH] Corrections to PR --- docs/classes.rst | 27 +++++++-- docs/index.rst | 4 +- docs/visitors.rst | 30 ++++++--- lark/exceptions.py | 9 +-- lark/lark.py | 118 ++++++++++++++++-------------------- lark/parsers/lalr_puppet.py | 15 ++--- lark/tree.py | 17 +++--- lark/visitors.py | 103 +++++++++++-------------------- 8 files changed, 154 insertions(+), 169 deletions(-) diff --git a/docs/classes.rst b/docs/classes.rst index 63f9aef..3778147 100644 --- a/docs/classes.rst +++ b/docs/classes.rst @@ -7,10 +7,29 @@ Lark .. autoclass:: lark.Lark :members: open, parse, save, load -LarkOptions ------------ +**Using Unicode character classes with regex** -.. autoclass:: lark.lark.LarkOptions +Python's builtin `re` module has a few persistent known bugs and also won't parse +advanced regex features such as character classes. +With `pip install lark-parser[regex]`, the `regex` module will be installed alongside `lark` and can act as a drop-in replacement to `re`. + +Any instance of `Lark` instantiated with `regex=True` will now use the `regex` module instead of `re`. + +For example, we can now use character classes to match PEP-3131 compliant Python identifiers. + +Example: + :: + + from lark import Lark + >>> g = Lark(r""" + ?start: NAME + NAME: ID_START ID_CONTINUE* + ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ + ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ + """, regex=True) + + >>> g.parse('வணக்கம்') + 'வணக்கம்' Tree ---- @@ -24,7 +43,7 @@ Token .. autoclass:: lark.Token -Transformer, Vistor & Interpretor +Transformer, Visitor & Interpreter --------------------------------- See :doc:`visitors`. diff --git a/docs/index.rst b/docs/index.rst index 8466875..ba2c241 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,10 +33,10 @@ Welcome to Lark's documentation! grammar tree_construction - visitors classes + visitors nearley - + Lark is a modern parsing library for Python. Lark can parse any context-free grammar. diff --git a/docs/visitors.rst b/docs/visitors.rst index 0a42c2b..a734e3b 100644 --- a/docs/visitors.rst +++ b/docs/visitors.rst @@ -17,12 +17,33 @@ See: `visitors.py`_ Visitor ------- -.. autoclass:: lark.visitors.VisitorBase +Visitors visit each node of the tree, and run the appropriate method on it according to the node's data. + +They work bottom-up, starting with the leaves and ending at the root of the tree. + +There are two classes that implement the visitor interface: + +- ``Visitor``: Visit every node (without recursion) +- ``Visitor_Recursive``: Visit every node using recursion. Slightly faster. + +Example: + :: + + class IncreaseAllNumbers(Visitor): + def number(self, tree): + assert tree.data == "number" + tree.children[0] += 1 + + IncreaseAllNumbers().visit(parse_tree) .. autoclass:: lark.visitors.Visitor .. autoclass:: lark.visitors.Visitor_Recursive +Interpreter +----------- + +.. autoclass:: lark.visitors.Interpreter Transformer ----------- @@ -30,11 +51,6 @@ Transformer .. autoclass:: lark.visitors.Transformer :members: __default__, __default_token__ -Interpreter ------------ - -.. autoclass:: lark.visitors.Interpreter - v_args ------ @@ -43,4 +59,4 @@ v_args Discard ------- -.. autoclass:: lark.visitors.Discard \ No newline at end of file +.. autoclass:: lark.visitors.Discard \ No newline at end of file diff --git a/lark/exceptions.py b/lark/exceptions.py index dcd80b5..13bf83e 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -26,11 +26,12 @@ class UnexpectedEOF(ParseError): class UnexpectedInput(LarkError): """UnexpectedInput Error. + Used as a base class for the following exceptions: + - ``UnexpectedToken``: The parser recieved an unexpected token - ``UnexpectedCharacters``: The lexer encountered an unexpected string - After catching one of these exceptions, you may call the following - helper methods to create a nicer error message. + After catching one of these exceptions, you may call the following helper methods to create a nicer error message. """ pos_in_stream = None @@ -57,7 +58,7 @@ class UnexpectedInput(LarkError): def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): """Allows you to detect what's wrong in the input text by matching against example errors. - + Given a parser instance and a dictionary mapping some label with some malformed syntax examples, it'll return the label for the example that bests matches the current error. The function will @@ -66,7 +67,7 @@ class UnexpectedInput(LarkError): For an example usage, see examples/error_reporting_lalr.py - Args: + Parameters: parse_fn: parse function (usually ``lark_instance.parse``) examples: dictionary of ``{'example_string': value}``. use_accepts: Recommended to call this with ``use_accepts=True``. diff --git a/lark/lark.py b/lark/lark.py index abc87d1..b54e725 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -27,75 +27,67 @@ class LarkOptions(Serialize): """ OPTIONS_DOC = """ - **General** - + **=== General ===** + start - The start symbol. Either a string, or a list of strings for - multiple possible starts (Default: "start") + The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start") debug - Display debug information, such as warnings (default: False) + Display debug information, such as warnings (default: False) transformer - Applies the transformer to every parse tree (equivlent - to applying it after the parse, but faster) + Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster) propagate_positions - Propagates (line, column, end_line, end_column) attributes into all tree branches. + Propagates (line, column, end_line, end_column) attributes into all tree branches. maybe_placeholders - When True, the ``[]`` operator returns ``None`` - when not matched. When ``False``, ``[]`` behaves like the ``?`` - operator, and returns no value at all. (default= ``False``. Recommended - to set to ``True``) + When True, the ``[]`` operator returns ``None`` when not matched. + + When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. + (default= ``False``. Recommended to set to ``True``) regex - When True, uses the ``regex`` module instead of the - stdlib ``re``. + When True, uses the ``regex`` module instead of the stdlib ``re``. cache - Cache the results of the Lark grammar analysis, for x2 to - x3 faster loading. LALR only for now. + Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. - - When ``False``, does nothing (default) - - When ``True``, caches to a temporary file in the local directory - - When given a string, caches to the path pointed by the string + - When ``False``, does nothing (default) + - When ``True``, caches to a temporary file in the local directory + - When given a string, caches to the path pointed by the string g_regex_flags - Flags that are applied to all terminals (both regex and strings) + Flags that are applied to all terminals (both regex and strings) keep_all_tokens - Prevent the tree builder from automagically removing "punctuation" tokens (default: False) + Prevent the tree builder from automagically removing "punctuation" tokens (default: False) - **Algorithm** + **=== Algorithm ===** parser - Decides which parser engine to use. Accepts "earley" or "lalr". - (Default: "earley"). (there is also a "cyk" option for legacy) + Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley"). + (there is also a "cyk" option for legacy) lexer - Decides whether or not to use a lexer stage - - - "auto" (default): Choose for me based on the parser - - "standard": Use a standard lexer - - "contextual": Stronger lexer (only works with parser="lalr") - - "dynamic": Flexible and powerful (only with parser="earley") - - "dynamic_complete": Same as dynamic, but tries *every* variation - of tokenizing possible. + Decides whether or not to use a lexer stage + + - "auto" (default): Choose for me based on the parser + - "standard": Use a standard lexer + - "contextual": Stronger lexer (only works with parser="lalr") + - "dynamic": Flexible and powerful (only with parser="earley") + - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible. ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" - - - "resolve" - The parser will automatically choose the simplest - derivation (it chooses consistently: greedy for tokens, - non-greedy for rules) - - "explicit": The parser will return all derivations wrapped in - "_ambig" tree nodes (i.e. a forest). + Decides how to handle ambiguity in the parse. Only relevant if parser="earley" - **Domain Specific** + - "resolve" - The parser will automatically choose the simplest derivation + (it chooses consistently: greedy for tokens, non-greedy for rules) + - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). + + **=== Misc. / Domain Specific ===** postlex - Lexer post-processing (Default: None) Only works with the - standard and contextual lexers. + Lexer post-processing (Default: None) Only works with the standard and contextual lexers. priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto) + How priorities should be evaluated - auto, none, normal, invert (Default: auto) lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. + Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. use_bytes - Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). + Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). edit_terminals - A callback + A callback for editing the terminals before parse. """ if __doc__: __doc__ += OPTIONS_DOC @@ -170,13 +162,11 @@ class LarkOptions(Serialize): class Lark(Serialize): """Main interface for the library. - It's mostly a thin wrapper for the many different parsers, and for - the tree constructor. + It's mostly a thin wrapper for the many different parsers, and for the tree constructor. - Args: - grammar: a string or file-object containing the - grammar spec (using Lark's ebnf syntax) - options : a dictionary controlling various aspects of Lark. + Parameters: + grammar: a string or file-object containing the grammar spec (using Lark's ebnf syntax) + options: a dictionary controlling various aspects of Lark. Example: >>> Lark(r'''start: "foo" ''') @@ -317,8 +307,7 @@ class Lark(Serialize): self.save(f) # TODO: merge with above - if __init__.__doc__: - __init__.__doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC + __doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC __serialize_fields__ = 'parser', 'rules', 'options' @@ -391,8 +380,7 @@ class Lark(Serialize): def open(cls, grammar_filename, rel_to=None, **options): """Create an instance of Lark with the grammar given by its filename - If ``rel_to`` is provided, the function will find the grammar - filename in relation to it. + If ``rel_to`` is provided, the function will find the grammar filename in relation to it. Example: @@ -426,17 +414,15 @@ class Lark(Serialize): def parse(self, text, start=None, on_error=None): """Parse the given text, according to the options provided. - If a transformer is supplied to ``__init__``, returns whatever is the - result of the transformation. - - Args: + Parameters: text (str): Text to be parsed. - start (str, optional): Required if Lark was given multiple - possible start symbols (using the start option). - on_error (function, optional): if provided, will be called on - UnexpectedToken error. Return true to resume parsing. - LALR only. See examples/error_puppet.py for an example - of how to use on_error. + start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). + on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. + LALR only. See examples/error_puppet.py for an example of how to use on_error. + + Returns: + If a transformer is supplied to ``__init__``, returns whatever is the + result of the transformation. Otherwise, returns a Tree instance. """ diff --git a/lark/parsers/lalr_puppet.py b/lark/parsers/lalr_puppet.py index 63642ae..50272af 100644 --- a/lark/parsers/lalr_puppet.py +++ b/lark/parsers/lalr_puppet.py @@ -7,11 +7,9 @@ from .. import Token class ParserPuppet(object): - """ParserPuppet gives you advanced control over error handling when - parsing with LALR. + """ParserPuppet gives you advanced control over error handling when parsing with LALR. - For a simpler, more streamlined interface, see the ``on_error`` - argument to ``Lark.parse()``. + For a simpler, more streamlined interface, see the ``on_error`` argument to ``Lark.parse()``. """ def __init__(self, parser, state_stack, value_stack, start, stream, set_state): self.parser = parser @@ -24,8 +22,7 @@ class ParserPuppet(object): self.result = None def feed_token(self, token): - """Feed the parser with a token, and advance it to the next state, - as if it recieved it from the lexer. + """Feed the parser with a token, and advance it to the next state, as if it recieved it from the lexer. Note that ``token`` has to be an instance of ``Token``. """ @@ -89,9 +86,9 @@ class ParserPuppet(object): return '\n'.join(out) def choices(self): - """Returns a dictionary of token types, matched to their action in - the parser. Only returns token types that are accepted by the - current state. + """Returns a dictionary of token types, matched to their action in the parser. + + Only returns token types that are accepted by the current state. Updated by ``feed_token()``. """ diff --git a/lark/tree.py b/lark/tree.py index b48450e..b9dddf4 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -18,15 +18,14 @@ class Meta: class Tree(object): """The main tree class. - Creates a new tree, and stores "data" and "children" in attributes of - the same name. Trees can be hashed and compared. + Creates a new tree, and stores "data" and "children" in attributes of the same name. + Trees can be hashed and compared. - Args: + Parameters: data: The name of the rule or alias children: List of matched sub-rules and terminals meta: Line & Column numbers (if ``propagate_positions`` is enabled). - meta attributes: line, column, start_pos, end_line, - end_column, end_pos + meta attributes: line, column, start_pos, end_line, end_column, end_pos """ def __init__(self, data, children, meta=None): self.data = data @@ -79,9 +78,8 @@ class Tree(object): def iter_subtrees(self): """Depth-first iteration. - - Iterates over all the subtrees, never returning to the - same node twice (Lark's parse-tree is actually a DAG). + + Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). """ queue = [self] subtrees = OrderedDict() @@ -121,8 +119,7 @@ class Tree(object): def iter_subtrees_topdown(self): """Breadth-first iteration. - Iterates over all the subtrees, return nodes in order like - pretty() does. + Iterates over all the subtrees, return nodes in order like pretty() does. """ stack = [self] while stack: diff --git a/lark/visitors.py b/lark/visitors.py index 81bb831..6ea39b0 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -45,28 +45,23 @@ class _Decoratable: class Transformer(_Decoratable): - """Transformer visit each node of the tree, and run the appropriate method - on it according to the node's data. + """Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. - Calls its methods (provided by user via inheritance) according to - ``tree.data``. The returned value replaces the old one in the structure. + Calls its methods (provided by user via inheritance) according to ``tree.data``. + The returned value replaces the old one in the structure. - They work bottom-up (or depth-first), starting with the leaves and - ending at the root of the tree. Transformers can be used to - implement map & reduce patterns. Because nodes are reduced from leaf to - root, at any point the callbacks may assume the children have already been - transformed (if applicable). ``Transformer`` can do anything ``Visitor`` - can do, but because it reconstructs the tree, it is slightly less - efficient. + They work bottom-up (or depth-first), starting with the leaves and ending at the root of the tree. + Transformers can be used to implement map & reduce patterns. Because nodes are reduced from leaf to root, + at any point the callbacks may assume the children have already been transformed (if applicable). + + ``Transformer`` can do anything ``Visitor`` can do, but because it reconstructs the tree, + it is slightly less efficient. It can be used to implement map or reduce patterns. All these classes implement the transformer interface: - - ``Transformer`` - Recursively transforms the tree. This is the one you - probably want. - - ``Transformer_InPlace`` - Non-recursive. Changes the tree in-place - instead of returning new instances - - ``Transformer_InPlaceRecursive`` - Recursive. Changes the tree in-place - instead of returning new instances + - ``Transformer`` - Recursively transforms the tree. This is the one you probably want. + - ``Transformer_InPlace`` - Non-recursive. Changes the tree in-place instead of returning new instances + - ``Transformer_InPlaceRecursive`` - Recursive. Changes the tree in-place instead of returning new instances Example: :: @@ -82,7 +77,7 @@ class Transformer(_Decoratable): # Prints: Tree(a, [3]) - Args: + Parameters: visit_tokens: By default, transformers only visit rules. visit_tokens=True will tell ``Transformer`` to visit tokens as well. This is a slightly slower alternative to lexer_callbacks @@ -164,16 +159,16 @@ class Transformer(_Decoratable): def __default__(self, data, children, meta): """Default operation on tree (for override) - Function that is called on if a function with a corresponding name has - not been found. Defaults to reconstruct the Tree + Function that is called on if a function with a corresponding name has not been found. + Defaults to reconstruct the Tree. """ return Tree(data, children, meta) def __default_token__(self, token): """Default operation on token (for override) - - Function that is called on if a function with a corresponding name has - not been found. Defaults to just return the argument. + + Function that is called on if a function with a corresponding name has not been found. + Defaults to just return the argument. """ return token @@ -259,25 +254,6 @@ class Transformer_InPlaceRecursive(Transformer): # Visitors class VisitorBase: - """Visitors visit each node of the tree - - Run the appropriate method on it according to the node's data. - They work bottom-up, starting with the leaves and ending at the root - of the tree. There are two classes that implement the visitor interface: - - - ``Visitor``: Visit every node (without recursion) - - ``Visitor_Recursive``: Visit every node using recursion. Slightly faster. - - Example: - :: - - class IncreaseAllNumbers(Visitor): - def number(self, tree): - assert tree.data == "number" - tree.children[0] += 1 - - IncreaseAllNumbers().visit(parse_tree) - """ def _call_userfunc(self, tree): return getattr(self, tree.data, self.__default__)(tree) @@ -293,8 +269,7 @@ class Visitor(VisitorBase): """Bottom-up visitor, non-recursive. Visits the tree, starting with the leaves and finally the root (bottom-up) - Calls its methods (provided by user via inheritance) according to - ``tree.data`` + Calls its methods (provided by user via inheritance) according to ``tree.data`` """ def visit(self, tree): @@ -312,8 +287,7 @@ class Visitor_Recursive(VisitorBase): """Bottom-up visitor, recursive. Visits the tree, starting with the leaves and finally the root (bottom-up) - Calls its methods (provided by user via inheritance) according to - ``tree.data`` + Calls its methods (provided by user via inheritance) according to ``tree.data`` """ def visit(self, tree): @@ -348,13 +322,12 @@ class Interpreter(_Decoratable): """Interpreter walks the tree starting at the root. Visits the tree, starting with the root and finally the leaves (top-down) - Calls its methods (provided by user via inheritance) according to - ``tree.data`` - Unlike ``Transformer`` and ``Visitor``, the Interpreter doesn't - automatically visit its sub-branches. The user has to explicitly call ``visit``, - ``visit_children``, or use the ``@visit_children_decor``. This allows the - user to implement branching and loops. + For each tree node, it calls its methods (provided by user via inheritance) according to ``tree.data``. + + Unlike ``Transformer`` and ``Visitor``, the Interpreter doesn't automatically visit its sub-branches. + The user has to explicitly call ``visit``, ``visit_children``, or use the ``@visit_children_decor``. + This allows the user to implement branching and loops. Example: :: @@ -452,21 +425,17 @@ def _vargs_tree(f, data, children, meta): def v_args(inline=False, meta=False, tree=False, wrapper=None): - """A convenience decorator factory for modifying the behavior of - user-supplied visitor methods. - - By default, callback methods of transformers/visitors accept one argument - - a list of the node's children. ``v_args`` can modify this behavior. When - used on a transformer/visitor class definition, it applies to all the - callback methods inside it. Accepts one of three following flags. - - Args: - inline: Children are provided as ``*args`` instead of a list - argument (not recommended for very long lists). - meta: Provides two arguments: ``children`` and ``meta`` (instead of - just the first) - tree: Provides the entire tree as the argument, instead of the - children. + """A convenience decorator factory for modifying the behavior of user-supplied visitor methods. + + By default, callback methods of transformers/visitors accept one argument - a list of the node's children. + + ``v_args`` can modify this behavior. When used on a transformer/visitor class definition, + it applies to all the callback methods inside it. + + Parameters: + inline: Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). + meta: Provides two arguments: ``children`` and ``meta`` (instead of just the first) + tree: Provides the entire tree as the argument, instead of the children. Example: ::