From 7a67f0d027a820543ba5dfe4e429c1c9a3cdeeeb Mon Sep 17 00:00:00 2001 From: julienmalard Date: Sun, 16 Aug 2020 10:47:36 -0400 Subject: [PATCH 1/3] Postproc option for reconstruct, and fixed isalnum bug --- lark/reconstruct.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 89967b2..dfdaae1 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,3 +1,5 @@ +import unicodedata + from collections import defaultdict from .tree import Tree @@ -93,6 +95,8 @@ def make_recons_rule(origin, expansion, old_expansion): def make_recons_rule_to_term(origin, term): return make_recons_rule(origin, [Terminal(term.name)], [term]) +def _isalnum(x): + return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] class Reconstructor: """ @@ -193,12 +197,15 @@ class Reconstructor: else: yield item - def reconstruct(self, tree): - x = self._reconstruct(tree) + def reconstruct(self, tree, postproc=None): + if postproc is None: + x = self._reconstruct(tree) + else: + x = postproc(self._reconstruct(tree)) y = [] prev_item = '' for item in x: - if prev_item and item and prev_item[-1].isalnum() and item[0].isalnum(): + if prev_item and item and _isalnum(prev_item[-1]) and _isalnum(item[0]): y.append(' ') y.append(item) prev_item = item From a768506945a56ed1503cba230fb79d7beaa4dacf Mon Sep 17 00:00:00 2001 From: julienmalard Date: Mon, 17 Aug 2020 08:23:44 -0400 Subject: [PATCH 2/3] Erez's idea --- lark/reconstruct.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index dfdaae1..1091681 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -198,10 +198,9 @@ class Reconstructor: yield item def reconstruct(self, tree, postproc=None): - if postproc is None: - x = self._reconstruct(tree) - else: - x = postproc(self._reconstruct(tree)) + x = self._reconstruct(tree) + if postproc: + x = postproc(x) y = [] prev_item = '' for item in x: From 023709f7104166e790d8dd0c7e88d8070cc8e4e9 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Mon, 17 Aug 2020 16:40:12 +0300 Subject: [PATCH 3/3] Added comment --- lark/reconstruct.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 1091681..35e5994 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -96,6 +96,7 @@ def make_recons_rule_to_term(origin, term): return make_recons_rule(origin, [Terminal(term.name)], [term]) def _isalnum(x): + # Categories defined here: https://www.python.org/dev/peps/pep-3131/ return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] class Reconstructor: