@@ -25,3 +25,16 @@ class UnexpectedToken(ParseError): | |||
def is_terminal(sym): | |||
return sym.isupper() or sym[0] == '$' | |||
class LexerConf: | |||
def __init__(self, tokens, ignore, postlex): | |||
self.tokens = tokens | |||
self.ignore = ignore | |||
self.postlex = postlex | |||
class ParserConf: | |||
def __init__(self, rules, callback, start): | |||
self.rules = rules | |||
self.callback = callback | |||
self.start = start | |||
@@ -5,7 +5,7 @@ import os | |||
from .utils import STRING_TYPE, inline_args | |||
from .load_grammar import load_grammar | |||
from .tree import Tree, Transformer | |||
from .common import GrammarError | |||
from .common import GrammarError, LexerConf, ParserConf | |||
from .lexer import Lexer | |||
from .parse_tree_builder import ParseTreeBuilder | |||
@@ -105,45 +105,46 @@ class Lark: | |||
assert isinstance(grammar, STRING_TYPE) | |||
if self.options.cache_grammar: | |||
if self.options.cache_grammar or self.options.keep_all_tokens: | |||
raise NotImplementedError("Not available yet") | |||
assert not self.options.profile, "Feature temporarily disabled" | |||
self.profiler = Profiler() if self.options.profile else None | |||
self.tokens, self.rules = load_grammar(grammar) | |||
tokens, self.rules = load_grammar(grammar) | |||
self.ignore_tokens = [] | |||
for tokendef, flags in tokens: | |||
for flag in flags: | |||
if flag == 'ignore': | |||
self.ignore_tokens.append(tokendef.name) | |||
else: | |||
raise GrammarError("No such flag: %s" % flag) | |||
self.lexer_conf = LexerConf([t[0] for t in tokens], self.ignore_tokens, self.options.postlex) | |||
if not self.options.only_lex: | |||
self.parser_engine = ENGINE_DICT[self.options.parser]() | |||
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
self.parser = self._build_parser() | |||
self.lexer = self._build_lexer() | |||
else: | |||
self.lexer = self._build_lexer() | |||
if self.profiler: self.profiler.enter_section('outside_lark') | |||
__init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC | |||
def _build_lexer(self): | |||
ignore_tokens = [] | |||
tokens = [] | |||
for tokendef, flags in self.tokens: | |||
for flag in flags: | |||
if flag == 'ignore': | |||
ignore_tokens.append(tokendef.name) | |||
else: | |||
raise GrammarError("No such flag: %s" % flag) | |||
tokens.append(tokendef) | |||
return Lexer(tokens, ignore=ignore_tokens) | |||
return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | |||
def _build_parser(self): | |||
self.parser_class = ENGINE_DICT[self.options.parser] | |||
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | |||
if self.profiler: | |||
for f in dir(callback): | |||
if not f.startswith('__'): | |||
if not (f.startswith('__') and f.endswith('__')): | |||
setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f))) | |||
return self.parser_engine.build_parser(rules, callback, self.options.start) | |||
parser_conf = ParserConf(rules, callback, self.options.start) | |||
return self.parser_class(self.lexer_conf, parser_conf) | |||
def lex(self, text): | |||
@@ -156,15 +157,17 @@ class Lark: | |||
def parse(self, text): | |||
assert not self.options.only_lex | |||
if self.profiler: | |||
self.profiler.enter_section('lex') | |||
l = list(self.lex(text)) | |||
self.profiler.enter_section('parse') | |||
try: | |||
return self.parser.parse(l) | |||
finally: | |||
self.profiler.enter_section('outside_lark') | |||
else: | |||
l = list(self.lex(text)) | |||
return self.parser.parse(l) | |||
return self.parser.parse(text) | |||
# if self.profiler: | |||
# self.profiler.enter_section('lex') | |||
# l = list(self.lex(text)) | |||
# self.profiler.enter_section('parse') | |||
# try: | |||
# return self.parser.parse(l) | |||
# finally: | |||
# self.profiler.enter_section('outside_lark') | |||
# else: | |||
# l = list(self.lex(text)) | |||
# return self.parser.parse(l) | |||
@@ -6,7 +6,7 @@ from .lexer import Lexer, Token, UnexpectedInput, TokenDef__Str, TokenDef__Regex | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import LALR | |||
from .parsers.lalr_parser import UnexpectedToken | |||
from .common import is_terminal, GrammarError | |||
from .common import is_terminal, GrammarError, LexerConf, ParserConf | |||
from .tree import Tree as T, Transformer, InlineTransformer, Visitor | |||
@@ -279,11 +279,12 @@ class ExtractAnonTokens(InlineTransformer): | |||
class GrammarLoader: | |||
def __init__(self): | |||
tokens = [TokenDef__Regexp(name, value) for name, value in TOKENS.items()] | |||
self.lexer = Lexer(tokens, ignore=['WS', 'COMMENT']) | |||
d = {r: [(x.split(), None) for x in xs] for r, xs in RULES.items()} | |||
rules, callback = ParseTreeBuilder(T).create_tree_builder(d, None) | |||
self.parser = LALR().build_parser(rules, callback, 'start') | |||
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'], None) | |||
parser_conf = ParserConf(rules, callback, 'start') | |||
self.parser = LALR(lexer_conf, parser_conf) | |||
self.simplify_tree = SimplifyTree() | |||
self.simplify_rule = SimplifyRule_Visitor() | |||
@@ -291,12 +292,9 @@ class GrammarLoader: | |||
def load_grammar(self, grammar_text): | |||
try: | |||
token_stream = list(self.lexer.lex(grammar_text+"\n")) | |||
tree = self.simplify_tree.transform( self.parser.parse(grammar_text+'\n') ) | |||
except UnexpectedInput as e: | |||
raise GrammarError("Unexpected input %r at line %d column %d" % (e.context, e.line, e.column)) | |||
try: | |||
tree = self.simplify_tree.transform( self.parser.parse(token_stream) ) | |||
except UnexpectedToken as e: | |||
if '_COLON' in e.expected: | |||
raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) | |||
@@ -1,32 +1,55 @@ | |||
from .lexer import Lexer | |||
from .parsers.lalr_analysis import GrammarAnalyzer | |||
from .common import is_terminal | |||
from .parsers import lalr_parser, earley | |||
class LALR: | |||
def build_parser(self, rules, callback, start): | |||
ga = GrammarAnalyzer(rules, start) | |||
ga.analyze() | |||
return lalr_parser.Parser(ga, callback) | |||
class WithLexer: | |||
def __init__(self, lexer_conf): | |||
self.lexer_conf = lexer_conf | |||
self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore) | |||
class Earley: | |||
@staticmethod | |||
def _process_expansion(x): | |||
return [{'literal': s} if is_terminal(s) else s for s in x] | |||
def lex(self, text): | |||
stream = self.lexer.lex(text) | |||
if self.lexer_conf.postlex: | |||
return self.lexer_conf.postlex.process(stream) | |||
else: | |||
return stream | |||
def build_parser(self, rules, callback, start): | |||
rules = [{'name':n, 'symbols': self._process_expansion(x), 'postprocess':getattr(callback, a)} for n,x,a in rules] | |||
return EarleyParser(earley.Parser(rules, start)) | |||
class LALR(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf): | |||
WithLexer.__init__(self, lexer_conf) | |||
class EarleyParser: | |||
def __init__(self, parser): | |||
self.parser = parser | |||
analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | |||
analyzer.analyze() | |||
self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) | |||
def parse(self, text): | |||
res = self.parser.parse(text) | |||
tokens = list(self.lex(text)) | |||
return self.parser.parse(tokens) | |||
class Earley(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf): | |||
WithLexer.__init__(self, lexer_conf) | |||
rules = [{'name':n, | |||
'symbols': self._process_expansion(x), | |||
'postprocess': getattr(parser_conf.callback, a)} | |||
for n,x,a in parser_conf.rules] | |||
self.parser = earley.Parser(rules, parser_conf.start) | |||
def parse(self, text): | |||
tokens = list(self.lex(text)) | |||
res = self.parser.parse(tokens) | |||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
return res[0] | |||
@staticmethod | |||
def _process_expansion(x): | |||
return [{'literal': s} if is_terminal(s) else s for s in x] | |||
ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||