Browse Source

Symbols instead of strings - initial

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 6 years ago
parent
commit
67f372c994
8 changed files with 56 additions and 28 deletions
  1. +1
    -1
      examples/python_parser.py
  2. +22
    -0
      lark/grammar.py
  3. +2
    -2
      lark/lexer.py
  4. +10
    -4
      lark/load_grammar.py
  5. +5
    -5
      lark/parse_tree_builder.py
  6. +10
    -10
      lark/parsers/grammar_analysis.py
  7. +5
    -5
      lark/parsers/lalr_analysis.py
  8. +1
    -1
      lark/parsers/lalr_parser.py

+ 1
- 1
examples/python_parser.py View File

@@ -78,6 +78,6 @@ def test_earley_equals_lalr():


if __name__ == '__main__': if __name__ == '__main__':
test_python_lib() test_python_lib()
test_earley_equals_lalr()
# test_earley_equals_lalr()
# python_parser3.parse(_read(sys.argv[1]) + '\n') # python_parser3.parse(_read(sys.argv[1]) + '\n')



+ 22
- 0
lark/grammar.py View File

@@ -1,3 +1,25 @@
class Symbol(object):
is_term = NotImplemented

def __init__(self, name):
self.name = name

def __eq__(self, other):
assert isinstance(other, Symbol), other
return self.is_term == other.is_term and self.name == other.name

def __hash__(self):
return hash(self.name)

class Terminal(Symbol):
is_term = True

@property
def filter_out(self):
return self.name.startswith('_')

class NonTerminal(Symbol):
is_term = False


class Rule(object): class Rule(object):
""" """


+ 2
- 2
lark/lexer.py View File

@@ -3,7 +3,7 @@
import re import re


from .utils import Str, classify from .utils import Str, classify
from .common import is_terminal, PatternStr, PatternRE, TokenDef
from .common import PatternStr, PatternRE, TokenDef


###{standalone ###{standalone
class LexError(Exception): class LexError(Exception):
@@ -234,7 +234,7 @@ class ContextualLexer:
lexer = lexer_by_tokens[key] lexer = lexer_by_tokens[key]
except KeyError: except KeyError:
accepts = set(accepts) | set(ignore) | set(always_accept) accepts = set(accepts) | set(ignore) | set(always_accept)
state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END']
state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END']
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks) lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
lexer_by_tokens[key] = lexer lexer_by_tokens[key] = lexer




+ 10
- 4
lark/load_grammar.py View File

@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions, Rule
from .grammar import RuleOptions, Rule, Terminal, NonTerminal


from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST


@@ -523,7 +523,9 @@ class Grammar:
if alias and name.startswith('_'): if alias and name.startswith('_'):
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))


rule = Rule(name, expansion, alias, options)
expansion = [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion]

rule = Rule(NonTerminal(name), expansion, alias, options)
compiled_rules.append(rule) compiled_rules.append(rule)


return tokens, compiled_rules, self.ignore return tokens, compiled_rules, self.ignore
@@ -578,12 +580,16 @@ def options_from_rule(name, *x):


return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority)



def symbols_from_strcase(expansion):
return [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion]

class GrammarLoader: class GrammarLoader:
def __init__(self): def __init__(self):
tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()] tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()]


rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs]
rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs]
callback = ParseTreeBuilder(rules, ST).create_callback() callback = ParseTreeBuilder(rules, ST).create_callback()
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'])




+ 5
- 5
lark/parse_tree_builder.py View File

@@ -84,7 +84,7 @@ class ChildFilterLALR(ChildFilter):
return self.node_builder(filtered) return self.node_builder(filtered)


def _should_expand(sym): def _should_expand(sym):
return not is_terminal(sym) and sym.startswith('_')
return not sym.is_term and sym.name.startswith('_')


def maybe_create_child_filter(expansion, filter_out, ambiguous): def maybe_create_child_filter(expansion, filter_out, ambiguous):
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out] to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out]
@@ -109,8 +109,8 @@ class ParseTreeBuilder:


def _init_builders(self, rules): def _init_builders(self, rules):
filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out}
filter_out |= {sym for rule in rules for sym in rule.expansion if is_terminal(sym) and sym.startswith('_')}
assert all(x.startswith('_') for x in filter_out)
filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out}
assert all(t.filter_out for t in filter_out)


for rule in rules: for rule in rules:
options = rule.options options = rule.options
@@ -132,9 +132,9 @@ class ParseTreeBuilder:
callback = Callback() callback = Callback()


for rule, wrapper_chain in self.rule_builders: for rule, wrapper_chain in self.rule_builders:
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion))
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(x.name for x in rule.expansion))


user_callback_name = rule.alias or rule.origin
user_callback_name = rule.alias or rule.origin.name
try: try:
f = transformer._get_func(user_callback_name) f = transformer._get_func(user_callback_name)
except AttributeError: except AttributeError:


+ 10
- 10
lark/parsers/grammar_analysis.py View File

@@ -1,7 +1,7 @@


from ..utils import bfs, fzset, classify from ..utils import bfs, fzset, classify
from ..common import GrammarError, is_terminal
from ..grammar import Rule
from ..common import GrammarError
from ..grammar import Rule, Terminal, NonTerminal




class RulePtr(object): class RulePtr(object):
@@ -67,7 +67,7 @@ def calculate_sets(rules):
FIRST = {} FIRST = {}
FOLLOW = {} FOLLOW = {}
for sym in symbols: for sym in symbols:
FIRST[sym]={sym} if is_terminal(sym) else set()
FIRST[sym]={sym} if sym.is_term else set()
FOLLOW[sym]=set() FOLLOW[sym]=set()


# Calculate NULLABLE and FIRST # Calculate NULLABLE and FIRST
@@ -108,16 +108,16 @@ class GrammarAnalyzer(object):
def __init__(self, parser_conf, debug=False): def __init__(self, parser_conf, debug=False):
self.debug = debug self.debug = debug


rules = parser_conf.rules + [Rule('$root', [parser_conf.start, '$END'])]
rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
self.rules_by_origin = classify(rules, lambda r: r.origin) self.rules_by_origin = classify(rules, lambda r: r.origin)


assert len(rules) == len(set(rules)) assert len(rules) == len(set(rules))
for r in rules: for r in rules:
for sym in r.expansion: for sym in r.expansion:
if not (is_terminal(sym) or sym in self.rules_by_origin):
if not (sym.is_term or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation


self.start_state = self.expand_rule('$root')
self.start_state = self.expand_rule(NonTerminal('$root'))


self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)


@@ -125,7 +125,7 @@ class GrammarAnalyzer(object):
"Returns all init_ptrs accessible by rule (recursive)" "Returns all init_ptrs accessible by rule (recursive)"
init_ptrs = set() init_ptrs = set()
def _expand_rule(rule): def _expand_rule(rule):
assert not is_terminal(rule), rule
assert not rule.is_term, rule


for r in self.rules_by_origin[rule]: for r in self.rules_by_origin[rule]:
init_ptr = RulePtr(r, 0) init_ptr = RulePtr(r, 0)
@@ -133,7 +133,7 @@ class GrammarAnalyzer(object):


if r.expansion: # if not empty rule if r.expansion: # if not empty rule
new_r = init_ptr.next new_r = init_ptr.next
if not is_terminal(new_r):
if not new_r.is_term:
yield new_r yield new_r


for _ in bfs([rule], _expand_rule): for _ in bfs([rule], _expand_rule):
@@ -142,8 +142,8 @@ class GrammarAnalyzer(object):
return fzset(init_ptrs) return fzset(init_ptrs)


def _first(self, r): def _first(self, r):
if is_terminal(r):
if r.is_term:
return {r} return {r}
else: else:
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}
return {rp.next for rp in self.expand_rule(r) if rp.next.is_term}



+ 5
- 5
lark/parsers/lalr_analysis.py View File

@@ -10,9 +10,9 @@ import logging
from collections import defaultdict from collections import defaultdict


from ..utils import classify, classify_bool, bfs, fzset from ..utils import classify, classify_bool, bfs, fzset
from ..common import GrammarError, is_terminal
from ..common import GrammarError


from .grammar_analysis import GrammarAnalyzer
from .grammar_analysis import GrammarAnalyzer, Terminal


class Action: class Action:
def __init__(self, name): def __init__(self, name):
@@ -70,12 +70,12 @@ class LALR_Analyzer(GrammarAnalyzer):
rps = {rp.advance(sym) for rp in rps} rps = {rp.advance(sym) for rp in rps}


for rp in set(rps): for rp in set(rps):
if not rp.is_satisfied and not is_terminal(rp.next):
if not rp.is_satisfied and not rp.next.is_term:
rps |= self.expand_rule(rp.next) rps |= self.expand_rule(rp.next)


new_state = fzset(rps) new_state = fzset(rps)
lookahead[sym].append((Shift, new_state)) lookahead[sym].append((Shift, new_state))
if sym == '$END':
if sym == Terminal('$END'):
self.end_states.append( new_state ) self.end_states.append( new_state )
yield new_state yield new_state


@@ -93,7 +93,7 @@ class LALR_Analyzer(GrammarAnalyzer):
if not len(v) == 1: if not len(v) == 1:
raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v]))) raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v])))


self.states[state] = {k:v[0] for k, v in lookahead.items()}
self.states[state] = {k.name:v[0] for k, v in lookahead.items()}


for _ in bfs([self.start_state], step): for _ in bfs([self.start_state], step):
pass pass


+ 1
- 1
lark/parsers/lalr_parser.py View File

@@ -59,7 +59,7 @@ class _Parser:


value = self.callbacks[rule](s) value = self.callbacks[rule](s)


_action, new_state = get_action(rule.origin)
_action, new_state = get_action(rule.origin.name)
assert _action is Shift assert _action is Shift
state_stack.append(new_state) state_stack.append(new_state)
value_stack.append(value) value_stack.append(value)


Loading…
Cancel
Save