Browse Source

Symbols instead of strings - initial

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 6 years ago
parent
commit
67f372c994
8 changed files with 56 additions and 28 deletions
  1. +1
    -1
      examples/python_parser.py
  2. +22
    -0
      lark/grammar.py
  3. +2
    -2
      lark/lexer.py
  4. +10
    -4
      lark/load_grammar.py
  5. +5
    -5
      lark/parse_tree_builder.py
  6. +10
    -10
      lark/parsers/grammar_analysis.py
  7. +5
    -5
      lark/parsers/lalr_analysis.py
  8. +1
    -1
      lark/parsers/lalr_parser.py

+ 1
- 1
examples/python_parser.py View File

@@ -78,6 +78,6 @@ def test_earley_equals_lalr():

if __name__ == '__main__':
test_python_lib()
test_earley_equals_lalr()
# test_earley_equals_lalr()
# python_parser3.parse(_read(sys.argv[1]) + '\n')


+ 22
- 0
lark/grammar.py View File

@@ -1,3 +1,25 @@
class Symbol(object):
is_term = NotImplemented

def __init__(self, name):
self.name = name

def __eq__(self, other):
assert isinstance(other, Symbol), other
return self.is_term == other.is_term and self.name == other.name

def __hash__(self):
return hash(self.name)

class Terminal(Symbol):
is_term = True

@property
def filter_out(self):
return self.name.startswith('_')

class NonTerminal(Symbol):
is_term = False

class Rule(object):
"""


+ 2
- 2
lark/lexer.py View File

@@ -3,7 +3,7 @@
import re

from .utils import Str, classify
from .common import is_terminal, PatternStr, PatternRE, TokenDef
from .common import PatternStr, PatternRE, TokenDef

###{standalone
class LexError(Exception):
@@ -234,7 +234,7 @@ class ContextualLexer:
lexer = lexer_by_tokens[key]
except KeyError:
accepts = set(accepts) | set(ignore) | set(always_accept)
state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END']
state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END']
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
lexer_by_tokens[key] = lexer



+ 10
- 4
lark/load_grammar.py View File

@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions, Rule
from .grammar import RuleOptions, Rule, Terminal, NonTerminal

from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST

@@ -523,7 +523,9 @@ class Grammar:
if alias and name.startswith('_'):
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))

rule = Rule(name, expansion, alias, options)
expansion = [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion]

rule = Rule(NonTerminal(name), expansion, alias, options)
compiled_rules.append(rule)

return tokens, compiled_rules, self.ignore
@@ -578,12 +580,16 @@ def options_from_rule(name, *x):

return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority)


def symbols_from_strcase(expansion):
return [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion]

class GrammarLoader:
def __init__(self):
tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()]

rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs]
rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs]
callback = ParseTreeBuilder(rules, ST).create_callback()
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'])



+ 5
- 5
lark/parse_tree_builder.py View File

@@ -84,7 +84,7 @@ class ChildFilterLALR(ChildFilter):
return self.node_builder(filtered)

def _should_expand(sym):
return not is_terminal(sym) and sym.startswith('_')
return not sym.is_term and sym.name.startswith('_')

def maybe_create_child_filter(expansion, filter_out, ambiguous):
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out]
@@ -109,8 +109,8 @@ class ParseTreeBuilder:

def _init_builders(self, rules):
filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out}
filter_out |= {sym for rule in rules for sym in rule.expansion if is_terminal(sym) and sym.startswith('_')}
assert all(x.startswith('_') for x in filter_out)
filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out}
assert all(t.filter_out for t in filter_out)

for rule in rules:
options = rule.options
@@ -132,9 +132,9 @@ class ParseTreeBuilder:
callback = Callback()

for rule, wrapper_chain in self.rule_builders:
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion))
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(x.name for x in rule.expansion))

user_callback_name = rule.alias or rule.origin
user_callback_name = rule.alias or rule.origin.name
try:
f = transformer._get_func(user_callback_name)
except AttributeError:


+ 10
- 10
lark/parsers/grammar_analysis.py View File

@@ -1,7 +1,7 @@

from ..utils import bfs, fzset, classify
from ..common import GrammarError, is_terminal
from ..grammar import Rule
from ..common import GrammarError
from ..grammar import Rule, Terminal, NonTerminal


class RulePtr(object):
@@ -67,7 +67,7 @@ def calculate_sets(rules):
FIRST = {}
FOLLOW = {}
for sym in symbols:
FIRST[sym]={sym} if is_terminal(sym) else set()
FIRST[sym]={sym} if sym.is_term else set()
FOLLOW[sym]=set()

# Calculate NULLABLE and FIRST
@@ -108,16 +108,16 @@ class GrammarAnalyzer(object):
def __init__(self, parser_conf, debug=False):
self.debug = debug

rules = parser_conf.rules + [Rule('$root', [parser_conf.start, '$END'])]
rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
self.rules_by_origin = classify(rules, lambda r: r.origin)

assert len(rules) == len(set(rules))
for r in rules:
for sym in r.expansion:
if not (is_terminal(sym) or sym in self.rules_by_origin):
if not (sym.is_term or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation

self.start_state = self.expand_rule('$root')
self.start_state = self.expand_rule(NonTerminal('$root'))

self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)

@@ -125,7 +125,7 @@ class GrammarAnalyzer(object):
"Returns all init_ptrs accessible by rule (recursive)"
init_ptrs = set()
def _expand_rule(rule):
assert not is_terminal(rule), rule
assert not rule.is_term, rule

for r in self.rules_by_origin[rule]:
init_ptr = RulePtr(r, 0)
@@ -133,7 +133,7 @@ class GrammarAnalyzer(object):

if r.expansion: # if not empty rule
new_r = init_ptr.next
if not is_terminal(new_r):
if not new_r.is_term:
yield new_r

for _ in bfs([rule], _expand_rule):
@@ -142,8 +142,8 @@ class GrammarAnalyzer(object):
return fzset(init_ptrs)

def _first(self, r):
if is_terminal(r):
if r.is_term:
return {r}
else:
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}
return {rp.next for rp in self.expand_rule(r) if rp.next.is_term}


+ 5
- 5
lark/parsers/lalr_analysis.py View File

@@ -10,9 +10,9 @@ import logging
from collections import defaultdict

from ..utils import classify, classify_bool, bfs, fzset
from ..common import GrammarError, is_terminal
from ..common import GrammarError

from .grammar_analysis import GrammarAnalyzer
from .grammar_analysis import GrammarAnalyzer, Terminal

class Action:
def __init__(self, name):
@@ -70,12 +70,12 @@ class LALR_Analyzer(GrammarAnalyzer):
rps = {rp.advance(sym) for rp in rps}

for rp in set(rps):
if not rp.is_satisfied and not is_terminal(rp.next):
if not rp.is_satisfied and not rp.next.is_term:
rps |= self.expand_rule(rp.next)

new_state = fzset(rps)
lookahead[sym].append((Shift, new_state))
if sym == '$END':
if sym == Terminal('$END'):
self.end_states.append( new_state )
yield new_state

@@ -93,7 +93,7 @@ class LALR_Analyzer(GrammarAnalyzer):
if not len(v) == 1:
raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v])))

self.states[state] = {k:v[0] for k, v in lookahead.items()}
self.states[state] = {k.name:v[0] for k, v in lookahead.items()}

for _ in bfs([self.start_state], step):
pass


+ 1
- 1
lark/parsers/lalr_parser.py View File

@@ -59,7 +59,7 @@ class _Parser:

value = self.callbacks[rule](s)

_action, new_state = get_action(rule.origin)
_action, new_state = get_action(rule.origin.name)
assert _action is Shift
state_stack.append(new_state)
value_stack.append(value)


Loading…
Cancel
Save