@@ -4,6 +4,7 @@ | |||||
/lark_parser.egg-info/** | /lark_parser.egg-info/** | ||||
tags | tags | ||||
.vscode | .vscode | ||||
.idea | |||||
.ropeproject | .ropeproject | ||||
.cache | .cache | ||||
/dist | /dist | ||||
@@ -72,7 +72,7 @@ Lark is great at handling ambiguity. Let's parse the phrase "fruit flies like ba | |||||
 |  | ||||
See more [examples in the wiki](https://github.com/erezsh/lark/wiki/Examples) | |||||
See more [examples here](https://github.com/lark-parser/lark/tree/master/examples) | |||||
@@ -95,7 +95,7 @@ See more [examples in the wiki](https://github.com/erezsh/lark/wiki/Examples) | |||||
- Extensive test suite [](https://codecov.io/gh/erezsh/lark) | - Extensive test suite [](https://codecov.io/gh/erezsh/lark) | ||||
- And much more! | - And much more! | ||||
See the full list of [features in the wiki](https://github.com/erezsh/lark/wiki/Features) | |||||
See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features/) | |||||
### Comparison to other libraries | ### Comparison to other libraries | ||||
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une | |||||
from .lexer import Token | from .lexer import Token | ||||
from .lark import Lark | from .lark import Lark | ||||
__version__ = "0.7.2" | |||||
__version__ = "0.7.4" |
@@ -8,7 +8,6 @@ from .exceptions import UnexpectedCharacters, LexError | |||||
###{standalone | ###{standalone | ||||
class Pattern(Serialize): | class Pattern(Serialize): | ||||
__serialize_fields__ = 'value', 'flags' | |||||
def __init__(self, value, flags=()): | def __init__(self, value, flags=()): | ||||
self.value = value | self.value = value | ||||
@@ -41,6 +40,8 @@ class Pattern(Serialize): | |||||
class PatternStr(Pattern): | class PatternStr(Pattern): | ||||
__serialize_fields__ = 'value', 'flags' | |||||
type = "str" | type = "str" | ||||
def to_regexp(self): | def to_regexp(self): | ||||
@@ -52,6 +53,8 @@ class PatternStr(Pattern): | |||||
max_width = min_width | max_width = min_width | ||||
class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
__serialize_fields__ = 'value', 'flags', '_width' | |||||
type = "re" | type = "re" | ||||
def to_regexp(self): | def to_regexp(self): | ||||
@@ -98,7 +101,7 @@ class Token(Str): | |||||
self.type = type_ | self.type = type_ | ||||
self.pos_in_stream = pos_in_stream | self.pos_in_stream = pos_in_stream | ||||
self.value = Str(value) | |||||
self.value = value | |||||
self.line = line | self.line = line | ||||
self.column = column | self.column = column | ||||
self.end_line = end_line | self.end_line = end_line | ||||
@@ -265,13 +268,14 @@ def build_mres(terminals, match_whole=False): | |||||
return _build_mres(terminals, len(terminals), match_whole) | return _build_mres(terminals, len(terminals), match_whole) | ||||
def _regexp_has_newline(r): | def _regexp_has_newline(r): | ||||
"""Expressions that may indicate newlines in a regexp: | |||||
r"""Expressions that may indicate newlines in a regexp: | |||||
- newlines (\n) | - newlines (\n) | ||||
- escaped newline (\\n) | - escaped newline (\\n) | ||||
- anything but ([^...]) | - anything but ([^...]) | ||||
- any-char (.) when the flag (?s) exists | - any-char (.) when the flag (?s) exists | ||||
- spaces (\s) | |||||
""" | """ | ||||
return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r) | |||||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | |||||
class Lexer(object): | class Lexer(object): | ||||
"""Lexer interface | """Lexer interface | ||||
@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder | |||||
from .parser_frontends import LALR_TraditionalLexer | from .parser_frontends import LALR_TraditionalLexer | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | ||||
from .utils import classify, suppress, dedup_list | |||||
from .utils import classify, suppress, dedup_list, Str | |||||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | ||||
from .tree import Tree, SlottedTree as ST | from .tree import Tree, SlottedTree as ST | ||||
@@ -351,7 +351,10 @@ def _fix_escaping(s): | |||||
for n in i: | for n in i: | ||||
w += n | w += n | ||||
if n == '\\': | if n == '\\': | ||||
n2 = next(i) | |||||
try: | |||||
n2 = next(i) | |||||
except StopIteration: | |||||
raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||||
if n2 == '\\': | if n2 == '\\': | ||||
w += '\\\\' | w += '\\\\' | ||||
elif n2 not in 'uxnftr': | elif n2 not in 'uxnftr': | ||||
@@ -451,9 +454,9 @@ class PrepareSymbols(Transformer_InPlace): | |||||
if isinstance(v, Tree): | if isinstance(v, Tree): | ||||
return v | return v | ||||
elif v.type == 'RULE': | elif v.type == 'RULE': | ||||
return NonTerminal(v.value) | |||||
return NonTerminal(Str(v.value)) | |||||
elif v.type == 'TERMINAL': | elif v.type == 'TERMINAL': | ||||
return Terminal(v.value, filter_out=v.startswith('_')) | |||||
return Terminal(Str(v.value), filter_out=v.startswith('_')) | |||||
assert False | assert False | ||||
def _choice_of_rules(rules): | def _choice_of_rules(rules): | ||||
@@ -511,12 +514,12 @@ class Grammar: | |||||
simplify_rule = SimplifyRule_Visitor() | simplify_rule = SimplifyRule_Visitor() | ||||
compiled_rules = [] | compiled_rules = [] | ||||
for i, rule_content in enumerate(rules): | |||||
for rule_content in rules: | |||||
name, tree, options = rule_content | name, tree, options = rule_content | ||||
simplify_rule.visit(tree) | simplify_rule.visit(tree) | ||||
expansions = rule_tree_to_text.transform(tree) | expansions = rule_tree_to_text.transform(tree) | ||||
for expansion, alias in expansions: | |||||
for i, (expansion, alias) in enumerate(expansions): | |||||
if alias and name.startswith('_'): | if alias and name.startswith('_'): | ||||
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | ||||
@@ -538,7 +541,7 @@ class Grammar: | |||||
for dups in duplicates.values(): | for dups in duplicates.values(): | ||||
if len(dups) > 1: | if len(dups) > 1: | ||||
if dups[0].expansion: | if dups[0].expansion: | ||||
raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates)) | |||||
raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" % ''.join('\n * %s' % i for i in dups)) | |||||
# Empty rule; assert all other attributes are equal | # Empty rule; assert all other attributes are equal | ||||
assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups) | assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups) | ||||
@@ -605,7 +608,9 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||||
_, tree, _ = imported_rules[symbol] | _, tree, _ = imported_rules[symbol] | ||||
except KeyError: | except KeyError: | ||||
raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | ||||
return tree.scan_values(lambda x: x.type in ('RULE', 'TERMINAL')) | |||||
return _find_used_symbols(tree) | |||||
def get_namespace_name(name): | def get_namespace_name(name): | ||||
try: | try: | ||||
@@ -682,6 +687,11 @@ class PrepareGrammar(Transformer_InPlace): | |||||
return name | return name | ||||
def _find_used_symbols(tree): | |||||
assert tree.data == 'expansions' | |||||
return {t for x in tree.find_data('expansion') | |||||
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | |||||
class GrammarLoader: | class GrammarLoader: | ||||
def __init__(self): | def __init__(self): | ||||
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
@@ -843,9 +853,7 @@ class GrammarLoader: | |||||
rule_names.add(name) | rule_names.add(name) | ||||
for name, expansions, _o in rules: | for name, expansions, _o in rules: | ||||
used_symbols = {t for x in expansions.find_data('expansion') | |||||
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | |||||
for sym in used_symbols: | |||||
for sym in _find_used_symbols(expansions): | |||||
if sym.type == 'TERMINAL': | if sym.type == 'TERMINAL': | ||||
if sym not in terminal_names: | if sym not in terminal_names: | ||||
raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) | raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) | ||||
@@ -118,7 +118,7 @@ class LALR_ContextualLexer(LALR_WithLexer): | |||||
class LALR_CustomLexer(LALR_WithLexer): | class LALR_CustomLexer(LALR_WithLexer): | ||||
def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | ||||
self.lexer = lexer_cls(self.lexer_conf) | |||||
self.lexer = lexer_cls(lexer_conf) | |||||
debug = options.debug if options else False | debug = options.debug if options else False | ||||
self.parser = LALR_Parser(parser_conf, debug=debug) | self.parser = LALR_Parser(parser_conf, debug=debug) | ||||
WithLexer.__init__(self, lexer_conf, parser_conf, options) | WithLexer.__init__(self, lexer_conf, parser_conf, options) | ||||
@@ -139,7 +139,8 @@ class Earley(WithLexer): | |||||
self.init_traditional_lexer() | self.init_traditional_lexer() | ||||
resolve_ambiguity = options.ambiguity == 'resolve' | resolve_ambiguity = options.ambiguity == 'resolve' | ||||
self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity) | |||||
debug = options.debug if options else False | |||||
self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug) | |||||
def match(self, term, token): | def match(self, term, token): | ||||
return term.name == token.type | return term.name == token.type | ||||
@@ -152,10 +153,12 @@ class XEarley(_ParserFrontend): | |||||
self._prepare_match(lexer_conf) | self._prepare_match(lexer_conf) | ||||
resolve_ambiguity = options.ambiguity == 'resolve' | resolve_ambiguity = options.ambiguity == 'resolve' | ||||
debug = options.debug if options else False | |||||
self.parser = xearley.Parser(parser_conf, | self.parser = xearley.Parser(parser_conf, | ||||
self.match, | self.match, | ||||
ignore=lexer_conf.ignore, | ignore=lexer_conf.ignore, | ||||
resolve_ambiguity=resolve_ambiguity, | resolve_ambiguity=resolve_ambiguity, | ||||
debug=debug, | |||||
**kw | **kw | ||||
) | ) | ||||
@@ -20,10 +20,11 @@ from .earley_common import Item, TransitiveItem | |||||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor | from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor | ||||
class Parser: | class Parser: | ||||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True): | |||||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): | |||||
analysis = GrammarAnalyzer(parser_conf) | analysis = GrammarAnalyzer(parser_conf) | ||||
self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
self.debug = debug | |||||
self.FIRST = analysis.FIRST | self.FIRST = analysis.FIRST | ||||
self.NULLABLE = analysis.NULLABLE | self.NULLABLE = analysis.NULLABLE | ||||
@@ -296,6 +297,10 @@ class Parser: | |||||
# symbol should have been completed in the last step of the Earley cycle, and will be in | # symbol should have been completed in the last step of the Earley cycle, and will be in | ||||
# this column. Find the item for the start_symbol, which is the root of the SPPF tree. | # this column. Find the item for the start_symbol, which is the root of the SPPF tree. | ||||
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | ||||
if self.debug: | |||||
from .earley_forest import ForestToPyDotVisitor | |||||
debug_walker = ForestToPyDotVisitor() | |||||
debug_walker.visit(solutions[0], "sppf.png") | |||||
if not solutions: | if not solutions: | ||||
expected_tokens = [t.expect for t in to_scan] | expected_tokens = [t.expect for t in to_scan] | ||||
@@ -122,7 +122,7 @@ class PackedNode(ForestNode): | |||||
ambiguously. Hence, we use the sort order to identify | ambiguously. Hence, we use the sort order to identify | ||||
the order in which ambiguous children should be considered. | the order in which ambiguous children should be considered. | ||||
""" | """ | ||||
return self.is_empty, -self.priority, -self.rule.order | |||||
return self.is_empty, -self.priority, self.rule.order | |||||
def __iter__(self): | def __iter__(self): | ||||
return iter([self.left, self.right]) | return iter([self.left, self.right]) | ||||
@@ -24,8 +24,8 @@ from .earley_forest import SymbolNode | |||||
class Parser(BaseParser): | class Parser(BaseParser): | ||||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False): | |||||
BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity) | |||||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False): | |||||
BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug) | |||||
self.ignore = [Terminal(t) for t in ignore] | self.ignore = [Terminal(t) for t in ignore] | ||||
self.complete_lex = complete_lex | self.complete_lex = complete_lex | ||||
@@ -0,0 +1,39 @@ | |||||
import codecs | |||||
import sys | |||||
import json | |||||
from lark import Lark | |||||
from lark.grammar import RuleOptions, Rule | |||||
from lark.lexer import TerminalDef | |||||
import argparse | |||||
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''') | |||||
argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file') | |||||
argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)') | |||||
argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+') | |||||
argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")') | |||||
def serialize(infile, outfile, lexer, start): | |||||
lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual | |||||
data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) | |||||
outfile.write('{\n') | |||||
outfile.write(' "data": %s,\n' % json.dumps(data)) | |||||
outfile.write(' "memo": %s\n' % json.dumps(memo)) | |||||
outfile.write('}\n') | |||||
def main(): | |||||
if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv: | |||||
print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file") | |||||
print("") | |||||
argparser.print_help() | |||||
else: | |||||
args = argparser.parse_args() | |||||
serialize(args.grammar_file, args.out, args.lexer, args.start) | |||||
if __name__ == '__main__': | |||||
main() |
@@ -56,30 +56,6 @@ class Tree(object): | |||||
def __hash__(self): | def __hash__(self): | ||||
return hash((self.data, tuple(self.children))) | return hash((self.data, tuple(self.children))) | ||||
###} | |||||
def expand_kids_by_index(self, *indices): | |||||
"Expand (inline) children at the given indices" | |||||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
kid = self.children[i] | |||||
self.children[i:i+1] = kid.children | |||||
def find_pred(self, pred): | |||||
"Find all nodes where pred(tree) == True" | |||||
return filter(pred, self.iter_subtrees()) | |||||
def find_data(self, data): | |||||
"Find all nodes where tree.data == data" | |||||
return self.find_pred(lambda t: t.data == data) | |||||
def scan_values(self, pred): | |||||
for c in self.children: | |||||
if isinstance(c, Tree): | |||||
for t in c.scan_values(pred): | |||||
yield t | |||||
else: | |||||
if pred(c): | |||||
yield c | |||||
def iter_subtrees(self): | def iter_subtrees(self): | ||||
# TODO: Re-write as a more efficient version | # TODO: Re-write as a more efficient version | ||||
@@ -102,6 +78,31 @@ class Tree(object): | |||||
yield x | yield x | ||||
seen.add(id(x)) | seen.add(id(x)) | ||||
def find_pred(self, pred): | |||||
"Find all nodes where pred(tree) == True" | |||||
return filter(pred, self.iter_subtrees()) | |||||
def find_data(self, data): | |||||
"Find all nodes where tree.data == data" | |||||
return self.find_pred(lambda t: t.data == data) | |||||
###} | |||||
def expand_kids_by_index(self, *indices): | |||||
"Expand (inline) children at the given indices" | |||||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
kid = self.children[i] | |||||
self.children[i:i+1] = kid.children | |||||
def scan_values(self, pred): | |||||
for c in self.children: | |||||
if isinstance(c, Tree): | |||||
for t in c.scan_values(pred): | |||||
yield t | |||||
else: | |||||
if pred(c): | |||||
yield c | |||||
def iter_subtrees_topdown(self): | def iter_subtrees_topdown(self): | ||||
stack = [self] | stack = [self] | ||||
while stack: | while stack: | ||||
@@ -160,7 +160,7 @@ def smart_decorator(f, create_decorator): | |||||
elif isinstance(f, partial): | elif isinstance(f, partial): | ||||
# wraps does not work for partials in 2.7: https://bugs.python.org/issue3445 | # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445 | ||||
return create_decorator(f.__func__, True) | |||||
return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True)) | |||||
else: | else: | ||||
return create_decorator(f.__func__.__call__, True) | return create_decorator(f.__func__.__call__, True) | ||||
@@ -172,7 +172,7 @@ import sre_parse | |||||
import sre_constants | import sre_constants | ||||
def get_regexp_width(regexp): | def get_regexp_width(regexp): | ||||
try: | try: | ||||
return sre_parse.parse(regexp).getwidth() | |||||
return [int(x) for x in sre_parse.parse(regexp).getwidth()] | |||||
except sre_constants.error: | except sre_constants.error: | ||||
raise ValueError(regexp) | raise ValueError(regexp) | ||||
@@ -0,0 +1,10 @@ | |||||
version: 2 | |||||
mkdocs: | |||||
configuration: mkdocs.yml | |||||
fail_on_warning: false | |||||
formats: all | |||||
python: | |||||
version: 3.5 |
@@ -21,6 +21,7 @@ from .test_parser import ( | |||||
TestCykStandard, | TestCykStandard, | ||||
TestLalrContextual, | TestLalrContextual, | ||||
TestEarleyDynamic, | TestEarleyDynamic, | ||||
TestLalrCustom, | |||||
# TestFullEarleyStandard, | # TestFullEarleyStandard, | ||||
TestFullEarleyDynamic, | TestFullEarleyDynamic, | ||||
@@ -22,7 +22,7 @@ from lark.exceptions import GrammarError, ParseError, UnexpectedToken, Unexpecte | |||||
from lark.tree import Tree | from lark.tree import Tree | ||||
from lark.visitors import Transformer, Transformer_InPlace, v_args | from lark.visitors import Transformer, Transformer_InPlace, v_args | ||||
from lark.grammar import Rule | from lark.grammar import Rule | ||||
from lark.lexer import TerminalDef | |||||
from lark.lexer import TerminalDef, Lexer, TraditionalLexer | |||||
__path__ = os.path.dirname(__file__) | __path__ = os.path.dirname(__file__) | ||||
def _read(n, *args): | def _read(n, *args): | ||||
@@ -431,12 +431,22 @@ def _make_full_earley_test(LEXER): | |||||
_TestFullEarley.__name__ = _NAME | _TestFullEarley.__name__ = _NAME | ||||
globals()[_NAME] = _TestFullEarley | globals()[_NAME] = _TestFullEarley | ||||
class CustomLexer(Lexer): | |||||
""" | |||||
Purpose of this custom lexer is to test the integration, | |||||
so it uses the traditionalparser as implementation without custom lexing behaviour. | |||||
""" | |||||
def __init__(self, lexer_conf): | |||||
self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks) | |||||
def lex(self, *args, **kwargs): | |||||
return self.lexer.lex(*args, **kwargs) | |||||
def _make_parser_test(LEXER, PARSER): | def _make_parser_test(LEXER, PARSER): | ||||
lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER | |||||
def _Lark(grammar, **kwargs): | def _Lark(grammar, **kwargs): | ||||
return Lark(grammar, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs) | |||||
return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) | |||||
def _Lark_open(gfilename, **kwargs): | def _Lark_open(gfilename, **kwargs): | ||||
return Lark.open(gfilename, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs) | |||||
return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) | |||||
class _TestParser(unittest.TestCase): | class _TestParser(unittest.TestCase): | ||||
def test_basic1(self): | def test_basic1(self): | ||||
g = _Lark("""start: a+ b a* "b" a* | g = _Lark("""start: a+ b a* "b" a* | ||||
@@ -1532,7 +1542,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
parser = _Lark(grammar) | parser = _Lark(grammar) | ||||
@unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | |||||
@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") | |||||
def test_serialize(self): | def test_serialize(self): | ||||
grammar = """ | grammar = """ | ||||
start: _ANY b "C" | start: _ANY b "C" | ||||
@@ -1558,6 +1568,28 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(parser.parse('xa', 'a'), Tree('a', [])) | self.assertEqual(parser.parse('xa', 'a'), Tree('a', [])) | ||||
self.assertEqual(parser.parse('xb', 'b'), Tree('b', [])) | self.assertEqual(parser.parse('xb', 'b'), Tree('b', [])) | ||||
def test_lexer_detect_newline_tokens(self): | |||||
# Detect newlines in regular tokens | |||||
g = _Lark(r"""start: "go" tail* | |||||
!tail : SA "@" | SB "@" | SC "@" | SD "@" | |||||
SA : "a" /\n/ | |||||
SB : /b./s | |||||
SC : "c" /[^a-z]/ | |||||
SD : "d" /\s/ | |||||
""") | |||||
a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children] | |||||
self.assertEqual(a.line, 2) | |||||
self.assertEqual(b.line, 3) | |||||
self.assertEqual(c.line, 4) | |||||
self.assertEqual(d.line, 5) | |||||
# Detect newlines in ignored tokens | |||||
for re in ['/\\n/', '/[^a-z]/', '/\\s/']: | |||||
g = _Lark('''!start: "a" "a" | |||||
%ignore {}'''.format(re)) | |||||
a, b = g.parse('a\na').children | |||||
self.assertEqual(a.line, 1) | |||||
self.assertEqual(b.line, 2) | |||||
_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | ||||
@@ -1572,6 +1604,7 @@ _TO_TEST = [ | |||||
('dynamic_complete', 'earley'), | ('dynamic_complete', 'earley'), | ||||
('standard', 'lalr'), | ('standard', 'lalr'), | ||||
('contextual', 'lalr'), | ('contextual', 'lalr'), | ||||
('custom', 'lalr'), | |||||
# (None, 'earley'), | # (None, 'earley'), | ||||
] | ] | ||||
@@ -4,6 +4,7 @@ import unittest | |||||
from unittest import TestCase | from unittest import TestCase | ||||
import copy | import copy | ||||
import pickle | import pickle | ||||
import functools | |||||
from lark.tree import Tree | from lark.tree import Tree | ||||
from lark.visitors import Transformer, Interpreter, visit_children_decor, v_args, Discard | from lark.visitors import Transformer, Interpreter, visit_children_decor, v_args, Discard | ||||
@@ -146,6 +147,22 @@ class TestTrees(TestCase): | |||||
res = T().transform(t) | res = T().transform(t) | ||||
self.assertEqual(res, 2.9) | self.assertEqual(res, 2.9) | ||||
def test_partial(self): | |||||
tree = Tree("start", [Tree("a", ["test1"]), Tree("b", ["test2"])]) | |||||
def test(prefix, s, postfix): | |||||
return prefix + s.upper() + postfix | |||||
@v_args(inline=True) | |||||
class T(Transformer): | |||||
a = functools.partial(test, "@", postfix="!") | |||||
b = functools.partial(lambda s: s + "!") | |||||
res = T().transform(tree) | |||||
assert res.children == ["@TEST1!", "test2!"] | |||||
def test_discard(self): | def test_discard(self): | ||||
class MyTransformer(Transformer): | class MyTransformer(Transformer): | ||||
def a(self, args): | def a(self, args): | ||||