|
@@ -0,0 +1,103 @@ |
|
|
|
|
|
""" |
|
|
|
|
|
Using lexer dynamic_complete |
|
|
|
|
|
============================ |
|
|
|
|
|
|
|
|
|
|
|
Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'`` |
|
|
|
|
|
|
|
|
|
|
|
Sometimes you have data that is highly ambiguous or 'broken' in some sense. |
|
|
|
|
|
When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able |
|
|
|
|
|
parse just about anything as long as there is a valid way to generate it from |
|
|
|
|
|
the Grammar, including looking 'into' the Regexes. |
|
|
|
|
|
|
|
|
|
|
|
This examples shows how to parse a json input where are quotes have been |
|
|
|
|
|
replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` |
|
|
|
|
|
Notice that underscores might still appear inside strings, so a potentially |
|
|
|
|
|
valid reading of the above might in normal json be: |
|
|
|
|
|
``{"foo_:{}, _bar": [], "baz": ""}`` |
|
|
|
|
|
""" |
|
|
|
|
|
from pprint import pprint |
|
|
|
|
|
|
|
|
|
|
|
from lark import Lark, Tree, Transformer, v_args |
|
|
|
|
|
from lark.visitors import Transformer_InPlace |
|
|
|
|
|
|
|
|
|
|
|
GRAMMAR = r""" |
|
|
|
|
|
%import common.SIGNED_NUMBER |
|
|
|
|
|
%import common.WS_INLINE |
|
|
|
|
|
%import common.NEWLINE |
|
|
|
|
|
%ignore WS_INLINE |
|
|
|
|
|
|
|
|
|
|
|
?start: value |
|
|
|
|
|
|
|
|
|
|
|
?value: object |
|
|
|
|
|
| array |
|
|
|
|
|
| string |
|
|
|
|
|
| SIGNED_NUMBER -> number |
|
|
|
|
|
| "true" -> true |
|
|
|
|
|
| "false" -> false |
|
|
|
|
|
| "null" -> null |
|
|
|
|
|
|
|
|
|
|
|
array : "[" [value ("," value)*] "]" |
|
|
|
|
|
object : "{" [pair ("," pair)*] "}" |
|
|
|
|
|
pair : string ":" value |
|
|
|
|
|
|
|
|
|
|
|
string: STRING |
|
|
|
|
|
STRING : ESCAPED_STRING |
|
|
|
|
|
|
|
|
|
|
|
ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR |
|
|
|
|
|
QUOTE_CHAR: "_" |
|
|
|
|
|
|
|
|
|
|
|
_STRING_INNER: /.*/ |
|
|
|
|
|
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/ |
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def score(tree: Tree): |
|
|
|
|
|
return sum(len(t.children) for t in tree.iter_subtrees()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RemoveAmbiguities(Transformer_InPlace): |
|
|
|
|
|
def _ambig(self, options): |
|
|
|
|
|
return max(options, key=score) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TreeToJson(Transformer): |
|
|
|
|
|
@v_args(inline=True) |
|
|
|
|
|
def string(self, s): |
|
|
|
|
|
return s[1:-1].replace('\\"', '"') |
|
|
|
|
|
|
|
|
|
|
|
array = list |
|
|
|
|
|
pair = tuple |
|
|
|
|
|
object = dict |
|
|
|
|
|
number = v_args(inline=True)(float) |
|
|
|
|
|
|
|
|
|
|
|
null = lambda self, _: None |
|
|
|
|
|
true = lambda self, _: True |
|
|
|
|
|
false = lambda self, _: False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parser = Lark(GRAMMAR, parser='earley', ambiguity="explicit", lexer='dynamic_complete') |
|
|
|
|
|
|
|
|
|
|
|
EXAMPLES = [ |
|
|
|
|
|
r'{_array_:[1,2,3]}', |
|
|
|
|
|
|
|
|
|
|
|
r'{_abc_: _array must be of the following format [_1_, _2_, _3_]_}', |
|
|
|
|
|
|
|
|
|
|
|
r'{_foo_:{}, _bar_: [], _baz_: __}', |
|
|
|
|
|
|
|
|
|
|
|
r'{_error_:_invalid_client_, _error_description_:_AADSTS7000215: Invalid ' |
|
|
|
|
|
r'client secret is provided.\r\nTrace ID: ' |
|
|
|
|
|
r'a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00\r\nCorrelation ID: ' |
|
|
|
|
|
r'aa0aaa00-0aaa-0000-00a0-00000aaaa0aa\r\nTimestamp: 1997-10-10 00:00:00Z_, ' |
|
|
|
|
|
r'_error_codes_:[7000215], _timestamp_:_1997-10-10 00:00:00Z_, ' |
|
|
|
|
|
r'_trace_id_:_a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00_, ' |
|
|
|
|
|
r'_correlation_id_:_aa0aaa00-0aaa-0000-00a0-00000aaaa0aa_, ' |
|
|
|
|
|
r'_error_uri_:_https://example.com_}', |
|
|
|
|
|
|
|
|
|
|
|
] |
|
|
|
|
|
for example in EXAMPLES: |
|
|
|
|
|
tree = parser.parse(example) |
|
|
|
|
|
tree = RemoveAmbiguities().transform(tree) |
|
|
|
|
|
result = TreeToJson().transform(tree) |
|
|
|
|
|
print('-' * 100) |
|
|
|
|
|
pprint(result) |