| @@ -427,9 +427,9 @@ I measured memory consumption using a little script called [memusg](https://gist | |||
| | Lark - Earley *(with lexer)* | 42s | 4s | 1167M | 608M | | |||
| | Lark - LALR(1) | 8s | 1.53s | 453M | 266M | | |||
| | Lark - LALR(1) tree-less | 4.76s | 1.23s | 70M | 134M | | |||
| | PyParsing ([Parser](http://pyparsing.wikispaces.com/file/view/jsonParser.py)) | 32s | 3.53s | 443M | 225M | | |||
| | funcparserlib ([Parser](https://github.com/vlasovskikh/funcparserlib/blob/master/funcparserlib/tests/json.py)) | 8.5s | 1.3s | 483M | 293M | | |||
| | Parsimonious ([Parser](https://gist.githubusercontent.com/reclosedev/5222560/raw/5e97cf7eb62c3a3671885ec170577285e891f7d5/parsimonious_json.py)) | ? | 5.7s | ? | 1545M | | |||
| | PyParsing ([Parser](https://github.com/pyparsing/pyparsing/blob/master/examples/jsonParser.py)) | 32s | 3.53s | 443M | 225M | | |||
| | funcparserlib ([Parser](https://github.com/vlasovskikh/funcparserlib/blob/master/tests/json.py)) | 8.5s | 1.3s | 483M | 293M | | |||
| | Parsimonious ([Parser](https://gist.github.com/reclosedev/5222560)) | ? | 5.7s | ? | 1545M | | |||
| I added a few other parsers for comparison. PyParsing and funcparselib fair pretty well in their memory usage (they don't build a tree), but they can't compete with the run-time speed of LALR(1). | |||
| @@ -442,7 +442,7 @@ Once again, shout-out to PyPy for being so effective. | |||
| This is the end of the tutorial. I hoped you liked it and learned a little about Lark. | |||
| To see what else you can do with Lark, check out the [examples](examples). | |||
| To see what else you can do with Lark, check out the [examples](/examples). | |||
| For questions or any other subject, feel free to email me at erezshin at gmail dot com. | |||
| @@ -33,7 +33,7 @@ class LarkOptions: | |||
| regex: bool | |||
| debug: bool | |||
| keep_all_tokens: bool | |||
| propagate_positions: Union[bool, str] | |||
| propagate_positions: Union[bool, Callable] | |||
| maybe_placeholders: bool | |||
| lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
| cache: Union[bool, str] | |||
| @@ -77,7 +77,7 @@ class Lark: | |||
| regex: bool = False, | |||
| debug: bool = False, | |||
| keep_all_tokens: bool = False, | |||
| propagate_positions: Union[bool, str] = False, | |||
| propagate_positions: Union[bool, Callable] = False, | |||
| maybe_placeholders: bool = False, | |||
| lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | |||
| cache: Union[bool, str] = False, | |||
| @@ -1,3 +1,5 @@ | |||
| from copy import deepcopy | |||
| from .utils import Serialize | |||
| from .lexer import TerminalDef | |||
| @@ -24,6 +26,17 @@ class LexerConf(Serialize): | |||
| def _deserialize(self): | |||
| self.terminals_by_name = {t.name: t for t in self.terminals} | |||
| def __deepcopy__(self, memo=None): | |||
| return type(self)( | |||
| deepcopy(self.terminals, memo), | |||
| self.re_module, | |||
| deepcopy(self.ignore, memo), | |||
| deepcopy(self.postlex, memo), | |||
| deepcopy(self.callbacks, memo), | |||
| deepcopy(self.g_regex_flags, memo), | |||
| deepcopy(self.skip_validation, memo), | |||
| deepcopy(self.use_bytes, memo), | |||
| ) | |||
| class ParserConf(Serialize): | |||
| @@ -127,6 +127,8 @@ class UnexpectedInput(LarkError): | |||
| class UnexpectedEOF(ParseError, UnexpectedInput): | |||
| def __init__(self, expected, state=None, terminals_by_name=None): | |||
| super(UnexpectedEOF, self).__init__() | |||
| self.expected = expected | |||
| self.state = state | |||
| from .lexer import Token | |||
| @@ -136,7 +138,6 @@ class UnexpectedEOF(ParseError, UnexpectedInput): | |||
| self.column = -1 | |||
| self._terminals_by_name = terminals_by_name | |||
| super(UnexpectedEOF, self).__init__() | |||
| def __str__(self): | |||
| message = "Unexpected end-of-input. " | |||
| @@ -147,6 +148,8 @@ class UnexpectedEOF(ParseError, UnexpectedInput): | |||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | |||
| terminals_by_name=None, considered_rules=None): | |||
| super(UnexpectedCharacters, self).__init__() | |||
| # TODO considered_tokens and allowed can be figured out using state | |||
| self.line = line | |||
| self.column = column | |||
| @@ -165,7 +168,6 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| self.char = seq[lex_pos] | |||
| self._context = self.get_context(seq) | |||
| super(UnexpectedCharacters, self).__init__() | |||
| def __str__(self): | |||
| message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column) | |||
| @@ -188,6 +190,8 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
| """ | |||
| def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | |||
| super(UnexpectedToken, self).__init__() | |||
| # TODO considered_rules and expected can be figured out using state | |||
| self.line = getattr(token, 'line', '?') | |||
| self.column = getattr(token, 'column', '?') | |||
| @@ -202,7 +206,6 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
| self._terminals_by_name = terminals_by_name | |||
| self.token_history = token_history | |||
| super(UnexpectedToken, self).__init__() | |||
| @property | |||
| def accepts(self): | |||
| @@ -229,10 +232,10 @@ class VisitError(LarkError): | |||
| """ | |||
| def __init__(self, rule, obj, orig_exc): | |||
| self.obj = obj | |||
| self.orig_exc = orig_exc | |||
| message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) | |||
| super(VisitError, self).__init__(message) | |||
| self.obj = obj | |||
| self.orig_exc = orig_exc | |||
| ###} | |||
| @@ -39,7 +39,7 @@ class LarkOptions(Serialize): | |||
| Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | |||
| propagate_positions | |||
| Propagates (line, column, end_line, end_column) attributes into all tree branches. | |||
| Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees. | |||
| Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. | |||
| maybe_placeholders | |||
| When ``True``, the ``[]`` operator returns ``None`` when not matched. | |||
| @@ -157,7 +157,7 @@ class LarkOptions(Serialize): | |||
| assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) | |||
| if self.parser == 'earley' and self.transformer: | |||
| raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||
| raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. ' | |||
| 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | |||
| if o: | |||
| @@ -443,11 +443,11 @@ class Lark(Serialize): | |||
| d = f | |||
| else: | |||
| d = pickle.load(f) | |||
| memo = d['memo'] | |||
| memo_json = d['memo'] | |||
| data = d['data'] | |||
| assert memo | |||
| memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | |||
| assert memo_json | |||
| memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | |||
| options = dict(data['options']) | |||
| if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): | |||
| raise ConfigurationError("Some options are not allowed when loading a Parser: {}" | |||
| @@ -504,11 +504,11 @@ class Lark(Serialize): | |||
| Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...) | |||
| """ | |||
| package = FromPackageLoader(package, search_paths) | |||
| full_path, text = package(None, grammar_path) | |||
| package_loader = FromPackageLoader(package, search_paths) | |||
| full_path, text = package_loader(None, grammar_path) | |||
| options.setdefault('source_path', full_path) | |||
| options.setdefault('import_paths', []) | |||
| options['import_paths'].append(package) | |||
| options['import_paths'].append(package_loader) | |||
| return cls(text, **options) | |||
| def __repr__(self): | |||
| @@ -124,20 +124,20 @@ class Token(str): | |||
| def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): | |||
| try: | |||
| self = super(Token, cls).__new__(cls, value) | |||
| inst = super(Token, cls).__new__(cls, value) | |||
| except UnicodeDecodeError: | |||
| value = value.decode('latin1') | |||
| self = super(Token, cls).__new__(cls, value) | |||
| self.type = type_ | |||
| self.start_pos = start_pos | |||
| self.value = value | |||
| self.line = line | |||
| self.column = column | |||
| self.end_line = end_line | |||
| self.end_column = end_column | |||
| self.end_pos = end_pos | |||
| return self | |||
| inst = super(Token, cls).__new__(cls, value) | |||
| inst.type = type_ | |||
| inst.start_pos = start_pos if start_pos is not None else pos_in_stream | |||
| inst.value = value | |||
| inst.line = line | |||
| inst.column = column | |||
| inst.end_line = end_line | |||
| inst.end_column = end_column | |||
| inst.end_pos = end_pos | |||
| return inst | |||
| def update(self, type_=None, value=None): | |||
| return Token.new_borrow_pos( | |||
| @@ -200,15 +200,13 @@ class LineCounter: | |||
| class UnlessCallback: | |||
| def __init__(self, mres): | |||
| self.mres = mres | |||
| def __init__(self, scanner): | |||
| self.scanner = scanner | |||
| def __call__(self, t): | |||
| for mre, type_from_index in self.mres: | |||
| m = mre.match(t.value) | |||
| if m: | |||
| t.type = type_from_index[m.lastindex] | |||
| break | |||
| res = self.scanner.match(t.value, 0) | |||
| if res: | |||
| _value, t.type = res | |||
| return t | |||
| @@ -223,6 +221,11 @@ class CallChain: | |||
| return self.callback2(t) if self.cond(t2) else t2 | |||
| def _get_match(re_, regexp, s, flags): | |||
| m = re_.match(regexp, s, flags) | |||
| if m: | |||
| return m.group(0) | |||
| def _create_unless(terminals, g_regex_flags, re_, use_bytes): | |||
| tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | |||
| assert len(tokens_by_type) <= 2, tokens_by_type.keys() | |||
| @@ -234,40 +237,54 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes): | |||
| if strtok.priority > retok.priority: | |||
| continue | |||
| s = strtok.pattern.value | |||
| m = re_.match(retok.pattern.to_regexp(), s, g_regex_flags) | |||
| if m and m.group(0) == s: | |||
| if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags): | |||
| unless.append(strtok) | |||
| if strtok.pattern.flags <= retok.pattern.flags: | |||
| embedded_strs.add(strtok) | |||
| if unless: | |||
| callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) | |||
| terminals = [t for t in terminals if t not in embedded_strs] | |||
| return terminals, callback | |||
| def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes): | |||
| # Python sets an unreasonable group limit (currently 100) in its re module | |||
| # Worse, the only way to know we reached it is by catching an AssertionError! | |||
| # This function recursively tries less and less groups until it's successful. | |||
| postfix = '$' if match_whole else '' | |||
| mres = [] | |||
| while terminals: | |||
| pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size]) | |||
| if use_bytes: | |||
| pattern = pattern.encode('latin-1') | |||
| try: | |||
| mre = re_.compile(pattern, g_regex_flags) | |||
| except AssertionError: # Yes, this is what Python provides us.. :/ | |||
| return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) | |||
| callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) | |||
| mres.append((mre, {i: n for n, i in mre.groupindex.items()})) | |||
| terminals = terminals[max_size:] | |||
| return mres | |||
| new_terminals = [t for t in terminals if t not in embedded_strs] | |||
| return new_terminals, callback | |||
| def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): | |||
| return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) | |||
| class Scanner: | |||
| def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False): | |||
| self.terminals = terminals | |||
| self.g_regex_flags = g_regex_flags | |||
| self.re_ = re_ | |||
| self.use_bytes = use_bytes | |||
| self.match_whole = match_whole | |||
| self.allowed_types = {t.name for t in self.terminals} | |||
| self._mres = self._build_mres(terminals, len(terminals)) | |||
| def _build_mres(self, terminals, max_size): | |||
| # Python sets an unreasonable group limit (currently 100) in its re module | |||
| # Worse, the only way to know we reached it is by catching an AssertionError! | |||
| # This function recursively tries less and less groups until it's successful. | |||
| postfix = '$' if self.match_whole else '' | |||
| mres = [] | |||
| while terminals: | |||
| pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size]) | |||
| if self.use_bytes: | |||
| pattern = pattern.encode('latin-1') | |||
| try: | |||
| mre = self.re_.compile(pattern, self.g_regex_flags) | |||
| except AssertionError: # Yes, this is what Python provides us.. :/ | |||
| return self._build_mres(terminals, max_size//2) | |||
| mres.append((mre, {i: n for n, i in mre.groupindex.items()})) | |||
| terminals = terminals[max_size:] | |||
| return mres | |||
| def match(self, text, pos): | |||
| for mre, type_from_index in self._mres: | |||
| m = mre.match(text, pos) | |||
| if m: | |||
| return m.group(0), type_from_index[m.lastindex] | |||
| def _regexp_has_newline(r): | |||
| @@ -327,9 +344,9 @@ class TraditionalLexer(Lexer): | |||
| self.use_bytes = conf.use_bytes | |||
| self.terminals_by_name = conf.terminals_by_name | |||
| self._mres = None | |||
| self._scanner = None | |||
| def _build(self): | |||
| def _build_scanner(self): | |||
| terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) | |||
| assert all(self.callback.values()) | |||
| @@ -340,19 +357,16 @@ class TraditionalLexer(Lexer): | |||
| else: | |||
| self.callback[type_] = f | |||
| self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes) | |||
| self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes) | |||
| @property | |||
| def mres(self): | |||
| if self._mres is None: | |||
| self._build() | |||
| return self._mres | |||
| def scanner(self): | |||
| if self._scanner is None: | |||
| self._build_scanner() | |||
| return self._scanner | |||
| def match(self, text, pos): | |||
| for mre, type_from_index in self.mres: | |||
| m = mre.match(text, pos) | |||
| if m: | |||
| return m.group(0), type_from_index[m.lastindex] | |||
| return self.scanner.match(text, pos) | |||
| def lex(self, state, parser_state): | |||
| with suppress(EOFError): | |||
| @@ -364,7 +378,7 @@ class TraditionalLexer(Lexer): | |||
| while line_ctr.char_pos < len(lex_state.text): | |||
| res = self.match(lex_state.text, line_ctr.char_pos) | |||
| if not res: | |||
| allowed = {v for m, tfi in self.mres for v in tfi.values()} - self.ignore_types | |||
| allowed = self.scanner.allowed_types - self.ignore_types | |||
| if not allowed: | |||
| allowed = {"<END-OF-FILE>"} | |||
| raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, | |||
| @@ -22,54 +22,59 @@ class ExpandSingleChild: | |||
| class PropagatePositions: | |||
| def __init__(self, node_builder): | |||
| def __init__(self, node_builder, node_filter=None): | |||
| self.node_builder = node_builder | |||
| self.node_filter = node_filter | |||
| def __call__(self, children): | |||
| res = self.node_builder(children) | |||
| # local reference to Tree.meta reduces number of presence checks | |||
| if isinstance(res, Tree): | |||
| res_meta = res.meta | |||
| # Calculate positions while the tree is streaming, according to the rule: | |||
| # - nodes start at the start of their first child's container, | |||
| # and end at the end of their last child's container. | |||
| # Containers are nodes that take up space in text, but have been inlined in the tree. | |||
| src_meta = self._pp_get_meta(children) | |||
| if src_meta is not None: | |||
| res_meta.line = src_meta.line | |||
| res_meta.column = src_meta.column | |||
| res_meta.start_pos = src_meta.start_pos | |||
| res_meta.empty = False | |||
| res_meta = res.meta | |||
| src_meta = self._pp_get_meta(reversed(children)) | |||
| if src_meta is not None: | |||
| res_meta.end_line = src_meta.end_line | |||
| res_meta.end_column = src_meta.end_column | |||
| res_meta.end_pos = src_meta.end_pos | |||
| res_meta.empty = False | |||
| first_meta = self._pp_get_meta(children) | |||
| if first_meta is not None: | |||
| if not hasattr(res_meta, 'line'): | |||
| # meta was already set, probably because the rule has been inlined (e.g. `?rule`) | |||
| res_meta.line = getattr(first_meta, 'container_line', first_meta.line) | |||
| res_meta.column = getattr(first_meta, 'container_column', first_meta.column) | |||
| res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) | |||
| res_meta.empty = False | |||
| res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line) | |||
| res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column) | |||
| last_meta = self._pp_get_meta(reversed(children)) | |||
| if last_meta is not None: | |||
| if not hasattr(res_meta, 'end_line'): | |||
| res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) | |||
| res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) | |||
| res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) | |||
| res_meta.empty = False | |||
| res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) | |||
| res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) | |||
| return res | |||
| def _pp_get_meta(self, children): | |||
| for c in children: | |||
| if self.node_filter is not None and not self.node_filter(c): | |||
| continue | |||
| if isinstance(c, Tree): | |||
| if not c.meta.empty: | |||
| return c.meta | |||
| elif isinstance(c, Token): | |||
| return c | |||
| class PropagatePositions_IgnoreWs(PropagatePositions): | |||
| def _pp_get_meta(self, children): | |||
| for c in children: | |||
| if isinstance(c, Tree): | |||
| if not c.meta.empty: | |||
| return c.meta | |||
| elif isinstance(c, Token): | |||
| if c and not c.isspace(): # Disregard whitespace-only tokens | |||
| return c | |||
| def make_propagate_positions(option): | |||
| if option == "ignore_ws": | |||
| return PropagatePositions_IgnoreWs | |||
| if callable(option): | |||
| return partial(PropagatePositions, node_filter=option) | |||
| elif option is True: | |||
| return PropagatePositions | |||
| elif option is False: | |||
| @@ -39,8 +39,7 @@ class MakeParsingFrontend: | |||
| lexer_conf.lexer_type = self.lexer_type | |||
| return ParsingFrontend(lexer_conf, parser_conf, options) | |||
| @classmethod | |||
| def deserialize(cls, data, memo, lexer_conf, callbacks, options): | |||
| def deserialize(self, data, memo, lexer_conf, callbacks, options): | |||
| parser_conf = ParserConf.deserialize(data['parser_conf'], memo) | |||
| parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) | |||
| parser_conf.callbacks = callbacks | |||
| @@ -92,26 +91,26 @@ class ParsingFrontend(Serialize): | |||
| def _verify_start(self, start=None): | |||
| if start is None: | |||
| start = self.parser_conf.start | |||
| if len(start) > 1: | |||
| raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||
| start ,= start | |||
| start_decls = self.parser_conf.start | |||
| if len(start_decls) > 1: | |||
| raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) | |||
| start ,= start_decls | |||
| elif start not in self.parser_conf.start: | |||
| raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) | |||
| return start | |||
| def parse(self, text, start=None, on_error=None): | |||
| start = self._verify_start(start) | |||
| chosen_start = self._verify_start(start) | |||
| stream = text if self.skip_lexer else LexerThread(self.lexer, text) | |||
| kw = {} if on_error is None else {'on_error': on_error} | |||
| return self.parser.parse(stream, start, **kw) | |||
| return self.parser.parse(stream, chosen_start, **kw) | |||
| def parse_interactive(self, text=None, start=None): | |||
| start = self._verify_start(start) | |||
| chosen_start = self._verify_start(start) | |||
| if self.parser_conf.parser_type != 'lalr': | |||
| raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") | |||
| stream = text if self.skip_lexer else LexerThread(self.lexer, text) | |||
| return self.parser.parse_interactive(stream, start) | |||
| return self.parser.parse_interactive(stream, chosen_start) | |||
| def get_frontend(parser, lexer): | |||
| @@ -178,8 +178,8 @@ class _Parser(object): | |||
| for token in state.lexer.lex(state): | |||
| state.feed_token(token) | |||
| token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) | |||
| return state.feed_token(token, True) | |||
| end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) | |||
| return state.feed_token(end_token, True) | |||
| except UnexpectedInput as e: | |||
| try: | |||
| e.interactive_parser = InteractiveParser(self, state, state.lexer) | |||
| @@ -61,14 +61,13 @@ class Serialize(object): | |||
| fields = getattr(self, '__serialize_fields__') | |||
| res = {f: _serialize(getattr(self, f), memo) for f in fields} | |||
| res['__type__'] = type(self).__name__ | |||
| postprocess = getattr(self, '_serialize', None) | |||
| if postprocess: | |||
| postprocess(res, memo) | |||
| if hasattr(self, '_serialize'): | |||
| self._serialize(res, memo) | |||
| return res | |||
| @classmethod | |||
| def deserialize(cls, data, memo): | |||
| namespace = getattr(cls, '__serialize_namespace__', {}) | |||
| namespace = getattr(cls, '__serialize_namespace__', []) | |||
| namespace = {c.__name__:c for c in namespace} | |||
| fields = getattr(cls, '__serialize_fields__') | |||
| @@ -82,9 +81,10 @@ class Serialize(object): | |||
| setattr(inst, f, _deserialize(data[f], namespace, memo)) | |||
| except KeyError as e: | |||
| raise KeyError("Cannot find key for class", cls, e) | |||
| postprocess = getattr(inst, '_deserialize', None) | |||
| if postprocess: | |||
| postprocess() | |||
| if hasattr(inst, '_deserialize'): | |||
| inst._deserialize() | |||
| return inst | |||
| @@ -198,14 +198,6 @@ def dedup_list(l): | |||
| return [x for x in l if not (x in dedup or dedup.add(x))] | |||
| def compare(a, b): | |||
| if a == b: | |||
| return 0 | |||
| elif a > b: | |||
| return 1 | |||
| return -1 | |||
| class Enumerator(Serialize): | |||
| def __init__(self): | |||
| self.enums = {} | |||
| @@ -94,6 +94,26 @@ class TestParsers(unittest.TestCase): | |||
| r = g.parse('a') | |||
| self.assertEqual( r.children[0].meta.line, 1 ) | |||
| def test_propagate_positions2(self): | |||
| g = Lark("""start: a | |||
| a: b | |||
| ?b: "(" t ")" | |||
| !t: "t" | |||
| """, propagate_positions=True) | |||
| start = g.parse("(t)") | |||
| a ,= start.children | |||
| t ,= a.children | |||
| assert t.children[0] == "t" | |||
| assert t.meta.column == 2 | |||
| assert t.meta.end_column == 3 | |||
| assert start.meta.column == a.meta.column == 1 | |||
| assert start.meta.end_column == a.meta.end_column == 4 | |||
| def test_expand1(self): | |||
| g = Lark("""start: a | |||