From 7182ba399136bf2c0f1f74d6652e60ffeb55d448 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 7 Jan 2018 22:33:37 +0200 Subject: [PATCH] Minor refactoring for the standalone tool (in progress) --- lark/grammar.py | 38 ++++++++++++++++++++++++- lark/lexer.py | 52 +++++++++++++++++------------------ lark/load_grammar.py | 28 +------------------ lark/parsers/lalr_analysis.py | 8 ++++-- lark/parsers/lalr_parser.py | 5 +--- 5 files changed, 70 insertions(+), 61 deletions(-) diff --git a/lark/grammar.py b/lark/grammar.py index 281c21c..f853182 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -10,7 +10,43 @@ class Rule(object): self.alias = alias self.options = options - def __repr__(self): + def __str__(self): return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) + def __repr__(self): + return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) + + +class RuleOptions: + def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): + self.keep_all_tokens = keep_all_tokens + self.expand1 = expand1 + self.create_token = create_token # used for scanless postprocessing + self.priority = priority + self.filter_out = filter_out # remove this rule from the tree + # used for "token"-rules in scanless + @classmethod + def from_rule(cls, name, *x): + if len(x) > 1: + priority, expansions = x + priority = int(priority) + else: + expansions ,= x + priority = None + + keep_all_tokens = name.startswith('!') + name = name.lstrip('!') + expand1 = name.startswith('?') + name = name.lstrip('?') + + return name, expansions, cls(keep_all_tokens, expand1, priority=priority) + + def __repr__(self): + return 'RuleOptions(%r, %r, %r, %r, %r)' % ( + self.keep_all_tokens, + self.expand1, + self.create_token, + self.priority, + self.filter_out + ) diff --git a/lark/lexer.py b/lark/lexer.py index ba920c6..5ca77de 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -111,35 +111,11 @@ def build_mres(tokens, match_whole=False): return _build_mres(tokens, len(tokens), match_whole) -class LineCounter: - def __init__(self): - self.newline_char = '\n' - self.char_pos = 0 - self.line = 1 - self.column = 0 - self.line_start_pos = 0 - - def feed(self, token, test_newline=True): - """Consume a token and calculat the new line & column. - - As an optional optimization, set test_newline=False is token doesn't contain a newline. - """ - if test_newline: - newlines = token.count(self.newline_char) - if newlines: - self.line += newlines - self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 - - self.char_pos += len(token) - self.column = self.char_pos - self.line_start_pos - - class Lexer: def __init__(self, tokens, ignore=()): assert all(isinstance(t, TokenDef) for t in tokens), tokens - self.ignore = ignore tokens = list(tokens) # Sanitization @@ -156,7 +132,7 @@ class Lexer: # Init self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] - self.ignore_types = [t for t in ignore] + self.ignore_types = list(ignore) tokens.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) @@ -206,6 +182,30 @@ class ContextualLexer: l.lexer = self.lexers[self.parser_state] +###{lexer + +class LineCounter: + def __init__(self): + self.newline_char = '\n' + self.char_pos = 0 + self.line = 1 + self.column = 0 + self.line_start_pos = 0 + + def feed(self, token, test_newline=True): + """Consume a token and calculate the new line & column. + + As an optional optimization, set test_newline=False is token doesn't contain a newline. + """ + if test_newline: + newlines = token.count(self.newline_char) + if newlines: + self.line += newlines + self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 + + self.char_pos += len(token) + self.column = self.char_pos - self.line_start_pos + class _Lex: "Built to serve both Lexer and ContextualLexer" def __init__(self, lexer): @@ -235,4 +235,4 @@ class _Lex: if line_ctr.char_pos < len(stream): raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) break - +###} diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 7726845..ce4ec5a 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -12,6 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import LALR from .parsers.lalr_parser import UnexpectedToken from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef +from .grammar import RuleOptions from .tree import Tree as T, Transformer, InlineTransformer, Visitor @@ -494,33 +495,6 @@ class Grammar: -class RuleOptions: - def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): - self.keep_all_tokens = keep_all_tokens - self.expand1 = expand1 - self.create_token = create_token # used for scanless postprocessing - self.priority = priority - - self.filter_out = filter_out # remove this rule from the tree - # used for "token"-rules in scanless - @classmethod - def from_rule(cls, name, *x): - if len(x) > 1: - priority, expansions = x - priority = int(priority) - else: - expansions ,= x - priority = None - - keep_all_tokens = name.startswith('!') - name = name.lstrip('!') - expand1 = name.startswith('?') - name = name.lstrip('?') - - return name, expansions, cls(keep_all_tokens, expand1, priority=priority) - - - _imported_grammars = {} def import_grammar(grammar_path): if grammar_path not in _imported_grammars: diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 2c9e8a4..6eb3fdf 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -15,13 +15,15 @@ from ..common import GrammarError, is_terminal from .grammar_analysis import GrammarAnalyzer class Action: + def __init__(self, name): + self.name = name def __str__(self): - return self.__name__ + return self.name def __repr__(self): return str(self) -class Shift(Action): pass -class Reduce(Action): pass +Shift = Action('Shift') +Reduce = Action('Reduce') class ParseTable: def __init__(self, states, start_state, end_state): diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index bc45d4e..b093990 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -7,10 +7,6 @@ from ..common import ParseError, UnexpectedToken from .lalr_analysis import LALR_Analyzer, Shift -class FinalReduce: - def __init__(self, value): - self.value = value - class Parser: def __init__(self, parser_conf): assert all(r.options is None or r.options.priority is None @@ -20,6 +16,7 @@ class Parser: callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) for rule in analysis.rules} + self.parser_conf = parser_conf self.parser = _Parser(analysis.parse_table, callbacks) self.parse = self.parser.parse