| @@ -0,0 +1,103 @@ | |||
| """ | |||
| Using lexer dynamic_complete | |||
| ============================ | |||
| Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'`` | |||
| Sometimes you have data that is highly ambiguous or 'broken' in some sense. | |||
| When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able | |||
| parse just about anything as long as there is a valid way to generate it from | |||
| the Grammar, including looking 'into' the Regexes. | |||
| This examples shows how to parse a json input where are quotes have been | |||
| replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` | |||
| Notice that underscores might still appear inside strings, so a potentially | |||
| valid reading of the above might in normal json be: | |||
| ``{"foo_:{}, _bar": [], "baz": ""}`` | |||
| """ | |||
| from pprint import pprint | |||
| from lark import Lark, Tree, Transformer, v_args | |||
| from lark.visitors import Transformer_InPlace | |||
| GRAMMAR = r""" | |||
| %import common.SIGNED_NUMBER | |||
| %import common.WS_INLINE | |||
| %import common.NEWLINE | |||
| %ignore WS_INLINE | |||
| ?start: value | |||
| ?value: object | |||
| | array | |||
| | string | |||
| | SIGNED_NUMBER -> number | |||
| | "true" -> true | |||
| | "false" -> false | |||
| | "null" -> null | |||
| array : "[" [value ("," value)*] "]" | |||
| object : "{" [pair ("," pair)*] "}" | |||
| pair : string ":" value | |||
| string: STRING | |||
| STRING : ESCAPED_STRING | |||
| ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR | |||
| QUOTE_CHAR: "_" | |||
| _STRING_INNER: /.*/ | |||
| _STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/ | |||
| """ | |||
| def score(tree: Tree): | |||
| return sum(len(t.children) for t in tree.iter_subtrees()) | |||
| class RemoveAmbiguities(Transformer_InPlace): | |||
| def _ambig(self, options): | |||
| return max(options, key=score) | |||
| class TreeToJson(Transformer): | |||
| @v_args(inline=True) | |||
| def string(self, s): | |||
| return s[1:-1].replace('\\"', '"') | |||
| array = list | |||
| pair = tuple | |||
| object = dict | |||
| number = v_args(inline=True)(float) | |||
| null = lambda self, _: None | |||
| true = lambda self, _: True | |||
| false = lambda self, _: False | |||
| parser = Lark(GRAMMAR, parser='earley', ambiguity="explicit", lexer='dynamic_complete') | |||
| EXAMPLES = [ | |||
| r'{_array_:[1,2,3]}', | |||
| r'{_abc_: _array must be of the following format [_1_, _2_, _3_]_}', | |||
| r'{_foo_:{}, _bar_: [], _baz_: __}', | |||
| r'{_error_:_invalid_client_, _error_description_:_AADSTS7000215: Invalid ' | |||
| r'client secret is provided.\r\nTrace ID: ' | |||
| r'a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00\r\nCorrelation ID: ' | |||
| r'aa0aaa00-0aaa-0000-00a0-00000aaaa0aa\r\nTimestamp: 1997-10-10 00:00:00Z_, ' | |||
| r'_error_codes_:[7000215], _timestamp_:_1997-10-10 00:00:00Z_, ' | |||
| r'_trace_id_:_a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00_, ' | |||
| r'_correlation_id_:_aa0aaa00-0aaa-0000-00a0-00000aaaa0aa_, ' | |||
| r'_error_uri_:_https://example.com_}', | |||
| ] | |||
| for example in EXAMPLES: | |||
| tree = parser.parse(example) | |||
| tree = RemoveAmbiguities().transform(tree) | |||
| result = TreeToJson().transform(tree) | |||
| print('-' * 100) | |||
| pprint(result) | |||