import re import sys from .utils import get_regexp_width, STRING_TYPE Py36 = (sys.version_info[:2] >= (3, 6)) ###{standalone def is_terminal(sym): return sym.isupper() class GrammarError(Exception): pass class ParseError(Exception): pass class UnexpectedToken(ParseError): def __init__(self, token, expected, seq, index, considered_rules=None, state=None): self.token = token self.expected = expected self.line = getattr(token, 'line', '?') self.column = getattr(token, 'column', '?') self.considered_rules = considered_rules self.state = state try: context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) except AttributeError: context = seq[index:index+5] except TypeError: context = "" message = ("Unexpected token %r at line %s, column %s.\n" "Expected: %s\n" "Context: %s" % (token, self.line, self.column, expected, context)) super(UnexpectedToken, self).__init__(message) def match_examples(self, parse_fn, examples): """ Given a parser instance and a dictionary mapping some label with some malformed syntax examples, it'll return the label for the example that bests matches the current error. """ assert self.state, "Not supported for this exception" candidate = None for label, example in examples.items(): assert not isinstance(example, STRING_TYPE) for malformed in example: try: parse_fn(malformed) except UnexpectedToken as ut: if ut.state == self.state: if ut.token == self.token: # Try exact match first return label elif not candidate: candidate = label return candidate def get_context(self, text, span=10): pos = self.token.pos_in_stream start = max(pos - span, 0) end = pos + span before = text[start:pos].rsplit('\n', 1)[-1] after = text[pos:end].split('\n', 1)[0] return before + after + '\n' + ' ' * len(before) + '^\n' ###} class LexerConf: def __init__(self, tokens, ignore=(), postlex=None, callbacks=None): self.tokens = tokens self.ignore = ignore self.postlex = postlex self.callbacks = callbacks or {} class ParserConf: def __init__(self, rules, callback, start): self.rules = rules self.callback = callback self.start = start class Pattern(object): def __init__(self, value, flags=()): self.value = value self.flags = frozenset(flags) def __repr__(self): return repr(self.to_regexp()) # Pattern Hashing assumes all subclasses have a different priority! def __hash__(self): return hash((type(self), self.value, self.flags)) def __eq__(self, other): return type(self) == type(other) and self.value == other.value and self.flags == other.flags if Py36: # Python 3.6 changed syntax for flags in regular expression def _get_flags(self, value): for f in self.flags: value = ('(?%s:%s)' % (f, value)) return value else: def _get_flags(self, value): for f in self.flags: value = ('(?%s)' % f) + value return value class PatternStr(Pattern): def to_regexp(self): return self._get_flags(re.escape(self.value)) @property def min_width(self): return len(self.value) max_width = min_width class PatternRE(Pattern): def to_regexp(self): return self._get_flags(self.value) @property def min_width(self): return get_regexp_width(self.to_regexp())[0] @property def max_width(self): return get_regexp_width(self.to_regexp())[1] class TokenDef(object): def __init__(self, name, pattern, priority=1): assert isinstance(pattern, Pattern), pattern self.name = name self.pattern = pattern self.priority = priority def __repr__(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)