Ver a proveniência

All tests passing

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan há 6 anos
ascendente
cometimento
4a5aa745ea
6 ficheiros alterados com 37 adições e 50 eliminações
  1. +6
    -0
      lark/grammar.py
  2. +2
    -2
      lark/lexer.py
  3. +1
    -1
      lark/parse_tree_builder.py
  4. +10
    -3
      lark/parser_frontends.py
  5. +16
    -42
      lark/parsers/cyk.py
  6. +2
    -2
      lark/parsers/xearley.py

+ 6
- 0
lark/grammar.py Ver ficheiro

@@ -8,9 +8,15 @@ class Symbol(object):
assert isinstance(other, Symbol), other
return self.is_term == other.is_term and self.name == other.name

def __ne__(self, other):
return not (self == other)

def __hash__(self):
return hash(self.name)

def __repr__(self):
return '%s(%r)' % (type(self).__name__, self.name)

class Terminal(Symbol):
is_term = True



+ 2
- 2
lark/lexer.py Ver ficheiro

@@ -3,7 +3,7 @@
import re

from .utils import Str, classify
from .common import PatternStr, PatternRE, TokenDef
from .common import is_terminal, PatternStr, PatternRE, TokenDef

###{standalone
class LexError(Exception):
@@ -234,7 +234,7 @@ class ContextualLexer:
lexer = lexer_by_tokens[key]
except KeyError:
accepts = set(accepts) | set(ignore) | set(always_accept)
state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END']
state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END']
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
lexer_by_tokens[key] = lexer



+ 1
- 1
lark/parse_tree_builder.py Ver ficheiro

@@ -110,7 +110,7 @@ class ParseTreeBuilder:
def _init_builders(self, rules):
filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out}
filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out}
assert all(t.filter_out for t in filter_out)
assert all(t.name.startswith('_') for t in filter_out)

for rule in rules:
options = rule.options


+ 10
- 3
lark/parser_frontends.py Ver ficheiro

@@ -7,7 +7,11 @@ from .lexer import Lexer, ContextualLexer, Token
from .common import GrammarError
from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk
from .tree import Tree
from .grammar import Terminal
from .grammar import Terminal, NonTerminal

def terminals(seq):
# return [Terminal(t) for t in seq]
return seq

class WithLexer:
def init_traditional_lexer(self, lexer_conf):
@@ -18,7 +22,10 @@ class WithLexer:
self.lexer_conf = lexer_conf
states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, states, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks)
self.lexer = ContextualLexer(lexer_conf.tokens, states,
ignore=terminals(lexer_conf.ignore),
always_accept=terminals(always_accept),
user_callbacks=lexer_conf.callbacks)

def lex(self, text):
stream = self.lexer.lex(text)
@@ -74,7 +81,7 @@ class Earley_NoLex:


def match(self, term, text, index=0):
return self.regexps[term].match(text, index)
return self.regexps[term.name].match(text, index)

def _prepare_match(self, lexer_conf):
self.regexps = {}


+ 16
- 42
lark/parsers/cyk.py Ver ficheiro

@@ -8,47 +8,19 @@
from collections import defaultdict
import itertools

from ..common import ParseError, is_terminal
from ..common import ParseError
from ..lexer import Token
from ..tree import Tree
from ..grammar import Terminal as T, NonTerminal as NT, Symbol

try:
xrange
except NameError:
xrange = range

class Symbol(object):
"""Any grammar symbol."""

def __init__(self, s):
self.s = s

def __repr__(self):
return '%s(%s)' % (type(self).__name__, str(self))

def __str__(self):
return str(self.s)

def __eq__(self, other):
return self.s == str(other)

def __ne__(self, other):
return not self.__eq__(other)

def __hash__(self):
return hash((type(self), str(self.s)))


class T(Symbol):
"""Terminal."""

def match(self, s):
return self.s == s.type


class NT(Symbol):
"""Non-terminal."""
pass
def match(t, s):
assert isinstance(t, T)
return t.name == s.type


class Rule(object):
@@ -121,10 +93,12 @@ class Parser(object):

def _to_rule(self, lark_rule):
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
assert isinstance(lark_rule.origin, NT)
assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
return Rule(
NT(lark_rule.origin), [
T(x) if is_terminal(x) else NT(x) for x in lark_rule.expansion
], weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, alias=lark_rule.alias)
lark_rule.origin, lark_rule.expansion,
weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0,
alias=lark_rule.alias)

def parse(self, tokenized): # pylint: disable=invalid-name
"""Parses input, which is a list of tokens."""
@@ -132,7 +106,7 @@ class Parser(object):
# Check if the parse succeeded.
if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]):
raise ParseError('Parsing failed.')
parse = trees[(0, len(tokenized) - 1)][NT(self.start)]
parse = trees[(0, len(tokenized) - 1)][self.start]
return self._to_tree(revert_cnf(parse))

def _to_tree(self, rule_node):
@@ -143,8 +117,8 @@ class Parser(object):
if isinstance(child, RuleNode):
children.append(self._to_tree(child))
else:
assert isinstance(child.s, Token)
children.append(child.s)
assert isinstance(child.name, Token)
children.append(child.name)
t = Tree(orig_rule.origin, children)
t.rule=orig_rule
return t
@@ -169,7 +143,7 @@ def _parse(s, g):
# Populate base case with existing terminal production rules
for i, w in enumerate(s):
for terminal, rules in g.terminal_rules.items():
if terminal.match(w):
if match(terminal, w):
for rule in rules:
table[(i, i)].add(rule)
if (rule.lhs not in trees[(i, i)] or
@@ -349,13 +323,13 @@ def revert_cnf(node):
if isinstance(node, T):
return node
# Reverts TERM rule.
if node.rule.lhs.s.startswith('__T_'):
if node.rule.lhs.name.startswith('__T_'):
return node.children[0]
else:
children = []
for child in map(revert_cnf, node.children):
# Reverts BIN rule.
if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'):
if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
children += child.children
else:
children.append(child)


+ 2
- 2
lark/parsers/xearley.py Ver ficheiro

@@ -98,14 +98,14 @@ class Parser:
for item in to_scan:
m = match(item.expect, stream, i)
if m:
t = Token(item.expect, m.group(0), i, text_line, text_column)
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
delayed_matches[m.end()].append(item.advance(t))

s = m.group(0)
for j in range(1, len(s)):
m = match(item.expect, s[:-j])
if m:
t = Token(item.expect, m.group(0), i, text_line, text_column)
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
delayed_matches[i+m.end()].append(item.advance(t))

next_set = Column(i+1, self.FIRST, predict_all=self.predict_all)


Carregando…
Cancelar
Guardar