|
@@ -9,10 +9,10 @@ When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be ab |
|
|
parse just about anything as long as there is a valid way to generate it from |
|
|
parse just about anything as long as there is a valid way to generate it from |
|
|
the Grammar, including looking 'into' the Regexes. |
|
|
the Grammar, including looking 'into' the Regexes. |
|
|
|
|
|
|
|
|
This examples shows how to parse a json input where are quotes have been |
|
|
|
|
|
|
|
|
This examples shows how to parse a json input where the quotes have been |
|
|
replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` |
|
|
replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` |
|
|
Notice that underscores might still appear inside strings, so a potentially |
|
|
Notice that underscores might still appear inside strings, so a potentially |
|
|
valid reading of the above might in normal json be: |
|
|
|
|
|
|
|
|
valid reading of the above is: |
|
|
``{"foo_:{}, _bar": [], "baz": ""}`` |
|
|
``{"foo_:{}, _bar": [], "baz": ""}`` |
|
|
""" |
|
|
""" |
|
|
from pprint import pprint |
|
|
from pprint import pprint |
|
@@ -53,15 +53,56 @@ _STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def score(tree: Tree): |
|
|
def score(tree: Tree): |
|
|
|
|
|
""" |
|
|
|
|
|
Scores an option by how many children (and grand-children, and |
|
|
|
|
|
grand-grand-children, ...) it has. |
|
|
|
|
|
This means that the option with fewer large terminals get's selected |
|
|
|
|
|
|
|
|
|
|
|
Between |
|
|
|
|
|
object |
|
|
|
|
|
pair |
|
|
|
|
|
string _foo_ |
|
|
|
|
|
object |
|
|
|
|
|
pair |
|
|
|
|
|
string _bar_: [], _baz_ |
|
|
|
|
|
string __ |
|
|
|
|
|
|
|
|
|
|
|
and |
|
|
|
|
|
|
|
|
|
|
|
object |
|
|
|
|
|
pair |
|
|
|
|
|
string _foo_ |
|
|
|
|
|
object |
|
|
|
|
|
pair |
|
|
|
|
|
string _bar_ |
|
|
|
|
|
array |
|
|
|
|
|
pair |
|
|
|
|
|
string _baz_ |
|
|
|
|
|
string __ |
|
|
|
|
|
|
|
|
|
|
|
this will give the second a higher score. (9 vs 13) |
|
|
|
|
|
""" |
|
|
return sum(len(t.children) for t in tree.iter_subtrees()) |
|
|
return sum(len(t.children) for t in tree.iter_subtrees()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RemoveAmbiguities(Transformer_InPlace): |
|
|
class RemoveAmbiguities(Transformer_InPlace): |
|
|
|
|
|
""" |
|
|
|
|
|
Selects an option to resolve an ambiguity using the score function above. |
|
|
|
|
|
Scores each option and selects the one with the higher score, e.g. the one |
|
|
|
|
|
with more nodes. |
|
|
|
|
|
|
|
|
|
|
|
If there is a performance problem with the Tree having to many _ambig and |
|
|
|
|
|
being slow and to large, this can instead be written as a ForestVisitor. |
|
|
|
|
|
Look at the 'Custom SPPF Prioritizer' example. |
|
|
|
|
|
""" |
|
|
def _ambig(self, options): |
|
|
def _ambig(self, options): |
|
|
return max(options, key=score) |
|
|
return max(options, key=score) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TreeToJson(Transformer): |
|
|
class TreeToJson(Transformer): |
|
|
|
|
|
""" |
|
|
|
|
|
This is the same Transformer as the json_parser example. |
|
|
|
|
|
""" |
|
|
@v_args(inline=True) |
|
|
@v_args(inline=True) |
|
|
def string(self, s): |
|
|
def string(self, s): |
|
|
return s[1:-1].replace('\\"', '"') |
|
|
return s[1:-1].replace('\\"', '"') |
|
|