From c59249301277889b71298ea9ed7a0e60299b5d52 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Wed, 18 Aug 2021 00:35:30 +0200 Subject: [PATCH 1/2] Added example using dynamic_complete (Thanks @x_TheGreyHat_x) --- examples/advanced/dynamic_complete.py | 103 ++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 examples/advanced/dynamic_complete.py diff --git a/examples/advanced/dynamic_complete.py b/examples/advanced/dynamic_complete.py new file mode 100644 index 0000000..0458a9e --- /dev/null +++ b/examples/advanced/dynamic_complete.py @@ -0,0 +1,103 @@ +""" +Using lexer dynamic_complete +============================ + +Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'`` + +Sometimes you have data that is highly ambiguous or 'broken' in some sense. +When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able +parse just about anything as long as there is a valid way to generate it from +the Grammar, including looking 'into' the Regexes. + +This examples shows how to parse a json input where are quotes have been +replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` +Notice that underscores might still appear inside strings, so a potentially +valid reading of the above might in normal json be: +``{"foo_:{}, _bar": [], "baz": ""}`` +""" +from pprint import pprint + +from lark import Lark, Tree, Transformer, v_args +from lark.visitors import Transformer_InPlace + +GRAMMAR = r""" +%import common.SIGNED_NUMBER +%import common.WS_INLINE +%import common.NEWLINE +%ignore WS_INLINE + +?start: value + +?value: object + | array + | string + | SIGNED_NUMBER -> number + | "true" -> true + | "false" -> false + | "null" -> null + +array : "[" [value ("," value)*] "]" +object : "{" [pair ("," pair)*] "}" +pair : string ":" value + +string: STRING +STRING : ESCAPED_STRING + +ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR +QUOTE_CHAR: "_" + +_STRING_INNER: /.*/ +_STRING_ESC_INNER: _STRING_INNER /(? Date: Wed, 18 Aug 2021 01:08:19 +0200 Subject: [PATCH 2/2] Updated docstrings --- examples/advanced/dynamic_complete.py | 45 +++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/examples/advanced/dynamic_complete.py b/examples/advanced/dynamic_complete.py index 0458a9e..48d5048 100644 --- a/examples/advanced/dynamic_complete.py +++ b/examples/advanced/dynamic_complete.py @@ -9,10 +9,10 @@ When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be ab parse just about anything as long as there is a valid way to generate it from the Grammar, including looking 'into' the Regexes. -This examples shows how to parse a json input where are quotes have been +This examples shows how to parse a json input where the quotes have been replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` Notice that underscores might still appear inside strings, so a potentially -valid reading of the above might in normal json be: +valid reading of the above is: ``{"foo_:{}, _bar": [], "baz": ""}`` """ from pprint import pprint @@ -53,15 +53,56 @@ _STRING_ESC_INNER: _STRING_INNER /(?