Browse Source

Minor refactoring for the standalone tool (in progress)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.2
Erez Shinan 6 years ago
parent
commit
7182ba3991
5 changed files with 70 additions and 61 deletions
  1. +37
    -1
      lark/grammar.py
  2. +26
    -26
      lark/lexer.py
  3. +1
    -27
      lark/load_grammar.py
  4. +5
    -3
      lark/parsers/lalr_analysis.py
  5. +1
    -4
      lark/parsers/lalr_parser.py

+ 37
- 1
lark/grammar.py View File

@@ -10,7 +10,43 @@ class Rule(object):
self.alias = alias
self.options = options

def __repr__(self):
def __str__(self):
return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion)))

def __repr__(self):
return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)


class RuleOptions:
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None):
self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1
self.create_token = create_token # used for scanless postprocessing
self.priority = priority

self.filter_out = filter_out # remove this rule from the tree
# used for "token"-rules in scanless
@classmethod
def from_rule(cls, name, *x):
if len(x) > 1:
priority, expansions = x
priority = int(priority)
else:
expansions ,= x
priority = None

keep_all_tokens = name.startswith('!')
name = name.lstrip('!')
expand1 = name.startswith('?')
name = name.lstrip('?')

return name, expansions, cls(keep_all_tokens, expand1, priority=priority)

def __repr__(self):
return 'RuleOptions(%r, %r, %r, %r, %r)' % (
self.keep_all_tokens,
self.expand1,
self.create_token,
self.priority,
self.filter_out
)

+ 26
- 26
lark/lexer.py View File

@@ -111,35 +111,11 @@ def build_mres(tokens, match_whole=False):
return _build_mres(tokens, len(tokens), match_whole)


class LineCounter:
def __init__(self):
self.newline_char = '\n'
self.char_pos = 0
self.line = 1
self.column = 0
self.line_start_pos = 0

def feed(self, token, test_newline=True):
"""Consume a token and calculat the new line & column.

As an optional optimization, set test_newline=False is token doesn't contain a newline.
"""
if test_newline:
newlines = token.count(self.newline_char)
if newlines:
self.line += newlines
self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1

self.char_pos += len(token)
self.column = self.char_pos - self.line_start_pos



class Lexer:
def __init__(self, tokens, ignore=()):
assert all(isinstance(t, TokenDef) for t in tokens), tokens

self.ignore = ignore
tokens = list(tokens)

# Sanitization
@@ -156,7 +132,7 @@ class Lexer:

# Init
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())]
self.ignore_types = [t for t in ignore]
self.ignore_types = list(ignore)

tokens.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))

@@ -206,6 +182,30 @@ class ContextualLexer:
l.lexer = self.lexers[self.parser_state]


###{lexer

class LineCounter:
def __init__(self):
self.newline_char = '\n'
self.char_pos = 0
self.line = 1
self.column = 0
self.line_start_pos = 0

def feed(self, token, test_newline=True):
"""Consume a token and calculate the new line & column.

As an optional optimization, set test_newline=False is token doesn't contain a newline.
"""
if test_newline:
newlines = token.count(self.newline_char)
if newlines:
self.line += newlines
self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1

self.char_pos += len(token)
self.column = self.char_pos - self.line_start_pos

class _Lex:
"Built to serve both Lexer and ContextualLexer"
def __init__(self, lexer):
@@ -235,4 +235,4 @@ class _Lex:
if line_ctr.char_pos < len(stream):
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
break
###}

+ 1
- 27
lark/load_grammar.py View File

@@ -12,6 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions

from .tree import Tree as T, Transformer, InlineTransformer, Visitor

@@ -494,33 +495,6 @@ class Grammar:



class RuleOptions:
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None):
self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1
self.create_token = create_token # used for scanless postprocessing
self.priority = priority

self.filter_out = filter_out # remove this rule from the tree
# used for "token"-rules in scanless
@classmethod
def from_rule(cls, name, *x):
if len(x) > 1:
priority, expansions = x
priority = int(priority)
else:
expansions ,= x
priority = None

keep_all_tokens = name.startswith('!')
name = name.lstrip('!')
expand1 = name.startswith('?')
name = name.lstrip('?')

return name, expansions, cls(keep_all_tokens, expand1, priority=priority)



_imported_grammars = {}
def import_grammar(grammar_path):
if grammar_path not in _imported_grammars:


+ 5
- 3
lark/parsers/lalr_analysis.py View File

@@ -15,13 +15,15 @@ from ..common import GrammarError, is_terminal
from .grammar_analysis import GrammarAnalyzer

class Action:
def __init__(self, name):
self.name = name
def __str__(self):
return self.__name__
return self.name
def __repr__(self):
return str(self)

class Shift(Action): pass
class Reduce(Action): pass
Shift = Action('Shift')
Reduce = Action('Reduce')

class ParseTable:
def __init__(self, states, start_state, end_state):


+ 1
- 4
lark/parsers/lalr_parser.py View File

@@ -7,10 +7,6 @@ from ..common import ParseError, UnexpectedToken

from .lalr_analysis import LALR_Analyzer, Shift

class FinalReduce:
def __init__(self, value):
self.value = value

class Parser:
def __init__(self, parser_conf):
assert all(r.options is None or r.options.priority is None
@@ -20,6 +16,7 @@ class Parser:
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in analysis.rules}

self.parser_conf = parser_conf
self.parser = _Parser(analysis.parse_table, callbacks)
self.parse = self.parser.parse



Loading…
Cancel
Save