|
- """
- Using lexer dynamic_complete
- ============================
-
- Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'``
-
- Sometimes you have data that is highly ambiguous or 'broken' in some sense.
- When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able
- parse just about anything as long as there is a valid way to generate it from
- the Grammar, including looking 'into' the Regexes.
-
- This examples shows how to parse a json input where the quotes have been
- replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}``
- Notice that underscores might still appear inside strings, so a potentially
- valid reading of the above is:
- ``{"foo_:{}, _bar": [], "baz": ""}``
- """
- from pprint import pprint
-
- from lark import Lark, Tree, Transformer, v_args
- from lark.visitors import Transformer_InPlace
-
- GRAMMAR = r"""
- %import common.SIGNED_NUMBER
- %import common.WS_INLINE
- %import common.NEWLINE
- %ignore WS_INLINE
-
- ?start: value
-
- ?value: object
- | array
- | string
- | SIGNED_NUMBER -> number
- | "true" -> true
- | "false" -> false
- | "null" -> null
-
- array : "[" [value ("," value)*] "]"
- object : "{" [pair ("," pair)*] "}"
- pair : string ":" value
-
- string: STRING
- STRING : ESCAPED_STRING
-
- ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR
- QUOTE_CHAR: "_"
-
- _STRING_INNER: /.*/
- _STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
-
- """
-
-
- def score(tree: Tree):
- """
- Scores an option by how many children (and grand-children, and
- grand-grand-children, ...) it has.
- This means that the option with fewer large terminals get's selected
-
- Between
- object
- pair
- string _foo_
- object
- pair
- string _bar_: [], _baz_
- string __
-
- and
-
- object
- pair
- string _foo_
- object
- pair
- string _bar_
- array
- pair
- string _baz_
- string __
-
- this will give the second a higher score. (9 vs 13)
- """
- return sum(len(t.children) for t in tree.iter_subtrees())
-
-
- class RemoveAmbiguities(Transformer_InPlace):
- """
- Selects an option to resolve an ambiguity using the score function above.
- Scores each option and selects the one with the higher score, e.g. the one
- with more nodes.
-
- If there is a performance problem with the Tree having to many _ambig and
- being slow and to large, this can instead be written as a ForestVisitor.
- Look at the 'Custom SPPF Prioritizer' example.
- """
- def _ambig(self, options):
- return max(options, key=score)
-
-
- class TreeToJson(Transformer):
- """
- This is the same Transformer as the json_parser example.
- """
- @v_args(inline=True)
- def string(self, s):
- return s[1:-1].replace('\\"', '"')
-
- array = list
- pair = tuple
- object = dict
- number = v_args(inline=True)(float)
-
- null = lambda self, _: None
- true = lambda self, _: True
- false = lambda self, _: False
-
-
- parser = Lark(GRAMMAR, parser='earley', ambiguity="explicit", lexer='dynamic_complete')
-
- EXAMPLES = [
- r'{_array_:[1,2,3]}',
-
- r'{_abc_: _array must be of the following format [_1_, _2_, _3_]_}',
-
- r'{_foo_:{}, _bar_: [], _baz_: __}',
-
- r'{_error_:_invalid_client_, _error_description_:_AADSTS7000215: Invalid '
- r'client secret is provided.\r\nTrace ID: '
- r'a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00\r\nCorrelation ID: '
- r'aa0aaa00-0aaa-0000-00a0-00000aaaa0aa\r\nTimestamp: 1997-10-10 00:00:00Z_, '
- r'_error_codes_:[7000215], _timestamp_:_1997-10-10 00:00:00Z_, '
- r'_trace_id_:_a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00_, '
- r'_correlation_id_:_aa0aaa00-0aaa-0000-00a0-00000aaaa0aa_, '
- r'_error_uri_:_https://example.com_}',
-
- ]
- for example in EXAMPLES:
- tree = parser.parse(example)
- tree = RemoveAmbiguities().transform(tree)
- result = TreeToJson().transform(tree)
- print('-' * 100)
- pprint(result)
|