| @@ -146,6 +146,7 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail | |||||
| ### Projects using Lark | ### Projects using Lark | ||||
| - [Poetry](https://github.com/python-poetry/poetry-core) - A utility for dependency management and packaging | |||||
| - [tartiflette](https://github.com/dailymotion/tartiflette) - a GraphQL server by Dailymotion | - [tartiflette](https://github.com/dailymotion/tartiflette) - a GraphQL server by Dailymotion | ||||
| - [Hypothesis](https://github.com/HypothesisWorks/hypothesis) - Library for property-based testing | - [Hypothesis](https://github.com/HypothesisWorks/hypothesis) - Library for property-based testing | ||||
| - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration | - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration | ||||
| @@ -40,6 +40,9 @@ class Tree: | |||||
| def expand_kids_by_index(self, *indices: int) -> None: | def expand_kids_by_index(self, *indices: int) -> None: | ||||
| ... | ... | ||||
| def expand_kids_by_data(self, *data_values: str) -> bool: | |||||
| ... | |||||
| def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: | def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: | ||||
| ... | ... | ||||
| @@ -210,7 +210,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| # TODO considered_rules and expected can be figured out using state | # TODO considered_rules and expected can be figured out using state | ||||
| self.line = getattr(token, 'line', '?') | self.line = getattr(token, 'line', '?') | ||||
| self.column = getattr(token, 'column', '?') | self.column = getattr(token, 'column', '?') | ||||
| self.pos_in_stream = getattr(token, 'pos_in_stream', None) | |||||
| self.pos_in_stream = getattr(token, 'start_pos', None) | |||||
| self.state = state | self.state = state | ||||
| self.token = token | self.token = token | ||||
| @@ -150,7 +150,7 @@ class Token(Str): | |||||
| @property | @property | ||||
| def pos_in_stream(self): | def pos_in_stream(self): | ||||
| warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning) | |||||
| warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning, 2) | |||||
| return self.start_pos | return self.start_pos | ||||
| def update(self, type_=None, value=None): | def update(self, type_=None, value=None): | ||||
| @@ -91,6 +91,7 @@ TERMINALS = { | |||||
| 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | ||||
| 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS, | 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS, | ||||
| '_NL': r'(\r?\n)+\s*', | '_NL': r'(\r?\n)+\s*', | ||||
| '_NL_OR': r'(\r?\n)+\s*\|', | |||||
| 'WS': r'[ \t]+', | 'WS': r'[ \t]+', | ||||
| 'COMMENT': r'\s*//[^\n]*', | 'COMMENT': r'\s*//[^\n]*', | ||||
| '_TO': '->', | '_TO': '->', | ||||
| @@ -113,9 +114,10 @@ RULES = { | |||||
| ''], | ''], | ||||
| '_template_params': ['RULE', | '_template_params': ['RULE', | ||||
| '_template_params _COMMA RULE'], | '_template_params _COMMA RULE'], | ||||
| 'expansions': ['alias', | |||||
| 'expansions _OR alias', | |||||
| 'expansions _NL _OR alias'], | |||||
| 'expansions': ['_expansions'], | |||||
| '_expansions': ['alias', | |||||
| '_expansions _OR alias', | |||||
| '_expansions _NL_OR alias'], | |||||
| '?alias': ['expansion _TO RULE', 'expansion'], | '?alias': ['expansion _TO RULE', 'expansion'], | ||||
| 'expansion': ['_expansion'], | 'expansion': ['_expansion'], | ||||
| @@ -356,12 +358,8 @@ class SimplifyRule_Visitor(Visitor): | |||||
| @staticmethod | @staticmethod | ||||
| def _flatten(tree): | def _flatten(tree): | ||||
| while True: | |||||
| to_expand = [i for i, child in enumerate(tree.children) | |||||
| if isinstance(child, Tree) and child.data == tree.data] | |||||
| if not to_expand: | |||||
| break | |||||
| tree.expand_kids_by_index(*to_expand) | |||||
| while tree.expand_kids_by_data(tree.data): | |||||
| pass | |||||
| def expansion(self, tree): | def expansion(self, tree): | ||||
| # rules_list unpacking | # rules_list unpacking | ||||
| @@ -599,8 +597,7 @@ def _make_joined_pattern(regexp, flags_set): | |||||
| return PatternRE(regexp, flags) | return PatternRE(regexp, flags) | ||||
| class TerminalTreeToPattern(Transformer): | |||||
| class TerminalTreeToPattern(Transformer_NonRecursive): | |||||
| def pattern(self, ps): | def pattern(self, ps): | ||||
| p ,= ps | p ,= ps | ||||
| return p | return p | ||||
| @@ -670,8 +667,8 @@ class Grammar: | |||||
| def compile(self, start, terminals_to_keep): | def compile(self, start, terminals_to_keep): | ||||
| # We change the trees in-place (to support huge grammars) | # We change the trees in-place (to support huge grammars) | ||||
| # So deepcopy allows calling compile more than once. | # So deepcopy allows calling compile more than once. | ||||
| term_defs = deepcopy(list(self.term_defs)) | |||||
| rule_defs = [(n,p,nr_deepcopy_tree(t),o) for n,p,t,o in self.rule_defs] | |||||
| term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs] | |||||
| rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs] | |||||
| # =================== | # =================== | ||||
| # Compile Terminals | # Compile Terminals | ||||
| @@ -919,7 +916,7 @@ def _get_parser(): | |||||
| parser_conf = ParserConf(rules, callback, ['start']) | parser_conf = ParserConf(rules, callback, ['start']) | ||||
| lexer_conf.lexer_type = 'standard' | lexer_conf.lexer_type = 'standard' | ||||
| parser_conf.parser_type = 'lalr' | parser_conf.parser_type = 'lalr' | ||||
| _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {}) | |||||
| _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None) | |||||
| return _get_parser.cache | return _get_parser.cache | ||||
| GRAMMAR_ERRORS = [ | GRAMMAR_ERRORS = [ | ||||
| @@ -1096,9 +1093,7 @@ class GrammarBuilder: | |||||
| # TODO: think about what to do with 'options' | # TODO: think about what to do with 'options' | ||||
| base = self._definitions[name][1] | base = self._definitions[name][1] | ||||
| while len(base.children) == 2: | |||||
| assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', base | |||||
| base = base.children[0] | |||||
| assert isinstance(base, Tree) and base.data == 'expansions' | |||||
| base.children.insert(0, exp) | base.children.insert(0, exp) | ||||
| def _ignore(self, exp_or_name): | def _ignore(self, exp_or_name): | ||||
| @@ -204,8 +204,7 @@ class AmbiguousExpander: | |||||
| if i in self.to_expand: | if i in self.to_expand: | ||||
| ambiguous.append(i) | ambiguous.append(i) | ||||
| to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)] | |||||
| child.expand_kids_by_index(*to_expand) | |||||
| child.expand_kids_by_data('_ambig') | |||||
| if not ambiguous: | if not ambiguous: | ||||
| return self.node_builder(children) | return self.node_builder(children) | ||||
| @@ -107,6 +107,17 @@ class Tree(object): | |||||
| kid = self.children[i] | kid = self.children[i] | ||||
| self.children[i:i+1] = kid.children | self.children[i:i+1] = kid.children | ||||
| def expand_kids_by_data(self, *data_values): | |||||
| """Expand (inline) children with any of the given data values. Returns True if anything changed""" | |||||
| changed = False | |||||
| for i in range(len(self.children)-1, -1, -1): | |||||
| child = self.children[i] | |||||
| if isinstance(child, Tree) and child.data in data_values: | |||||
| self.children[i:i+1] = child.children | |||||
| changed = True | |||||
| return changed | |||||
| def scan_values(self, pred): | def scan_values(self, pred): | ||||
| """Return all values in the tree that evaluate pred(value) as true. | """Return all values in the tree that evaluate pred(value) as true. | ||||
| @@ -246,6 +246,18 @@ class TestGrammar(TestCase): | |||||
| self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190) | self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190) | ||||
| self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192) | self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192) | ||||
| def test_large_terminal(self): | |||||
| # TODO: The `reversed` below is required because otherwise the regex engine is happy | |||||
| # with just parsing 9 from the string 999 instead of consuming the longest | |||||
| g = "start: NUMBERS\n" | |||||
| g += "NUMBERS: " + '|'.join('"%s"' % i for i in reversed(range(0, 1000))) | |||||
| l = Lark(g, parser='lalr') | |||||
| for i in (0, 9, 99, 999): | |||||
| self.assertEqual(l.parse(str(i)), Tree('start', [str(i)])) | |||||
| for i in (-1, 1000): | |||||
| self.assertRaises(UnexpectedInput, l.parse, str(i)) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| main() | main() | ||||