Merge branch 'pwwang-master'

4 years ago · 7010f96825
--- a/docs/how_to_use.md
+++ b/docs/how_to_use.md
@@ -30,12 +30,13 @@ Use the reference pages for more in-depth explanations. (links in the [main page
 ## LALR usage
 By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure `logging` framework beforehand. For example:
 By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure the `logger` beforehand. For example:
 ```python
 from lark import Lark
 import logging
 logging.basicConfig(level=logging.DEBUG)
 from lark import Lark, logger
 logger.setLevel(logging.DEBUG)
 collision_grammar = '''
 start: as as
--- a/lark/init.py
+++ b/lark/init.py
@@ -1,3 +1,4 @@
 from .utils import logger
 from .tree import Tree
 from .visitors import Transformer, Visitor, v_args, Discard
 from .visitors import InlineTransformer, inline_args   # XXX Deprecated
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -1,7 +1,6 @@
 from .utils import STRING_TYPE
 from .utils import STRING_TYPE, logger
 ###{standalone
 import logging
 class LarkError(Exception):
@@ -62,24 +61,24 @@ class UnexpectedInput(LarkError):
                except UnexpectedInput as ut:
                    if ut.state == self.state:
                        if use_accepts and ut.accepts != self.accepts:
                            logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                            logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                                        (self.state, self.accepts, ut.accepts, i, j))
                            continue
                        try:
                            if ut.token == self.token:  # Try exact match first
                                logging.debug("Exact Match at example [%s][%s]" % (i, j))
                                logger.debug("Exact Match at example [%s][%s]" % (i, j))
                                return label
                            if token_type_match_fallback:
                                # Fallback to token types match
                                if (ut.token.type == self.token.type) and not candidate[-1]:
                                    logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
                                    logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
                                    candidate = label, True
                        except AttributeError:
                            pass
                        if not candidate[0]:
                            logging.debug("Same State match at example [%s][%s]" % (i, j))
                            logger.debug("Same State match at example [%s][%s]" % (i, j))
                            candidate = label, False
        return candidate[0]
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -1,10 +1,10 @@
 from __future__ import absolute_import
 import sys, os, pickle, hashlib, logging
 import sys, os, pickle, hashlib
 from io import open
 from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii
 from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
 from .load_grammar import load_grammar
 from .tree import Tree
 from .common import LexerConf, ParserConf
@@ -214,7 +214,7 @@ class Lark(Serialize):
                cache_fn = '.lark_cache_%s.tmp' % md5
            if FS.exists(cache_fn):
                logging.debug('Loading grammar from cache: %s', cache_fn)
                logger.debug('Loading grammar from cache: %s', cache_fn)
                with FS.open(cache_fn, 'rb') as f:
                    self._load(f, self.options.transformer, self.options.postlex)
                return
@@ -291,7 +291,7 @@ class Lark(Serialize):
            self.lexer = self._build_lexer()
        if cache_fn:
            logging.debug('Saving grammar to cache: %s', cache_fn)
            logger.debug('Saving grammar to cache: %s', cache_fn)
            with FS.open(cache_fn, 'wb') as f:
                self.save(f)
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -10,11 +10,11 @@ is better documented here:
    http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
 """
 import logging
 from collections import deque
 from ..visitors import Transformer_InPlace, v_args
 from ..exceptions import UnexpectedEOF, UnexpectedToken
 from ..utils import logger
 from .grammar_analysis import GrammarAnalyzer
 from ..grammar import NonTerminal
 from .earley_common import Item, TransitiveItem
@@ -301,7 +301,7 @@ class Parser:
            try:
                debug_walker = ForestToPyDotVisitor()
            except ImportError:
                logging.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
                logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
            else:
                debug_walker.visit(solutions[0], "sppf.png")
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -6,10 +6,9 @@ For now, shift/reduce conflicts are automatically resolved as shifts.
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com
 import logging
 from collections import defaultdict, deque
 from collections import defaultdict
 from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
 from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
 from ..exceptions import GrammarError
 from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
@@ -256,8 +255,8 @@ class LALR_Analyzer(GrammarAnalyzer):
                    raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
                if la in actions:
                    if self.debug:
                        logging.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
                        logging.warning(' * %s', list(rules)[0])
                        logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
                        logger.warning(' * %s', list(rules)[0])
                else:
                    actions[la] = (Reduce, list(rules)[0])
            m[state] = { k.name: v for k, v in actions.items() }
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -4,51 +4,15 @@ from functools import reduce
 from ast import literal_eval
 from collections import deque
 class fzset(frozenset):
    def __repr__(self):
        return '{%s}' % ', '.join(map(repr, self))
 def classify_bool(seq, pred):
    true_elems = []
    false_elems = []
    for elem in seq:
        if pred(elem):
            true_elems.append(elem)
        else:
            false_elems.append(elem)
    return true_elems, false_elems
 def bfs(initial, expand):
    open_q = deque(list(initial))
    visited = set(open_q)
    while open_q:
        node = open_q.popleft()
        yield node
        for next_node in expand(node):
            if next_node not in visited:
                visited.add(next_node)
                open_q.append(next_node)
 ###{standalone
 import logging
 logger = logging.getLogger("lark")
 logger.addHandler(logging.StreamHandler())
 # Set to highest level, since we have some warnings amongst the code
 # By default, we should not output any log messages
 logger.setLevel(logging.CRITICAL)
 def _serialize(value, memo):
    if isinstance(value, Serialize):
        return value.serialize(memo)
    elif isinstance(value, list):
        return [_serialize(elem, memo) for elem in value]
    elif isinstance(value, frozenset):
        return list(value)  # TODO reversible?
    elif isinstance(value, dict):
        return {key:_serialize(elem, memo) for key, elem in value.items()}
    return value
 ###{standalone
 def classify(seq, key=None, value=None):
    d = {}
    for item in seq:
@@ -302,13 +266,11 @@ def combine_alternatives(lists):
    return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
 class FS:
    open = open
    exists = os.path.exists
 def isascii(s):
    """ str.isascii only exists in python3.7+ """
    try:
@@ -318,4 +280,46 @@ def isascii(s):
            s.encode('ascii')
            return True
        except (UnicodeDecodeError, UnicodeEncodeError):
            return False
            return False
 class fzset(frozenset):
    def __repr__(self):
        return '{%s}' % ', '.join(map(repr, self))
 def classify_bool(seq, pred):
    true_elems = []
    false_elems = []
    for elem in seq:
        if pred(elem):
            true_elems.append(elem)
        else:
            false_elems.append(elem)
    return true_elems, false_elems
 def bfs(initial, expand):
    open_q = deque(list(initial))
    visited = set(open_q)
    while open_q:
        node = open_q.popleft()
        yield node
        for next_node in expand(node):
            if next_node not in visited:
                visited.add(next_node)
                open_q.append(next_node)
 def _serialize(value, memo):
    if isinstance(value, Serialize):
        return value.serialize(memo)
    elif isinstance(value, list):
        return [_serialize(elem, memo) for elem in value]
    elif isinstance(value, frozenset):
        return list(value)  # TODO reversible?
    elif isinstance(value, dict):
        return {key:_serialize(elem, memo) for key, elem in value.items()}
    return value
--- a/tests/main.py
+++ b/tests/main.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function
 import unittest
 import logging
 from lark import logger
 from .test_trees import TestTrees
 from .test_tools import TestStandalone
@@ -11,11 +12,13 @@ from .test_reconstructor import TestReconstructor
 try:
    from .test_nearley.test_nearley import TestNearley
 except ImportError:
    logging.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
    logger.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
 # from .test_selectors import TestSelectors
 # from .test_grammars import TestPythonG, TestConfigG
 from .test_logger import Testlogger
 from .test_parser import (
        TestLalrStandard,
        TestEarleyStandard,
@@ -31,7 +34,7 @@ from .test_parser import (
        TestParsers,
        )
 logging.basicConfig(level=logging.INFO)
 logger.setLevel(logging.INFO)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_logger.py
+++ b/tests/test_logger.py
@@ -0,0 +1,65 @@
 import logging
 from contextlib import contextmanager
 from lark import Lark, logger
 from unittest import TestCase, main
 try:
    from StringIO import StringIO
 except ImportError:
    from io import StringIO
@contextmanager
 def capture_log():
    stream = StringIO()
    orig_handler = logger.handlers[0]
    del logger.handlers[:]
    logger.addHandler(logging.StreamHandler(stream))
    yield stream
    del logger.handlers[:]
    logger.addHandler(orig_handler)
 class Testlogger(TestCase):
    def test_debug(self):
        logger.setLevel(logging.DEBUG)
        collision_grammar = '''
        start: as as
        as: a*
        a: "a"
        '''
        with capture_log() as log:
            Lark(collision_grammar, parser='lalr', debug=True)
        log = log.getvalue()
        # since there are conflicts about A
        # symbol A should appear in the log message for hint
        self.assertIn("A", log)
    def test_non_debug(self):
        logger.setLevel(logging.DEBUG)
        collision_grammar = '''
        start: as as
        as: a*
        a: "a"
        '''
        with capture_log() as log:
            Lark(collision_grammar, parser='lalr', debug=False)
        log = log.getvalue()
        # no log messge
        self.assertEqual(len(log), 0)
    def test_loglevel_higher(self):
        logger.setLevel(logging.ERROR)
        collision_grammar = '''
        start: as as
        as: a*
        a: "a"
        '''
        with capture_log() as log:
            Lark(collision_grammar, parser='lalr', debug=True)
        log = log.getvalue()
        # no log messge
        self.assertEqual(len(log), 0)
 if __name__ == '__main__':
    main()
--- a/tests/test_nearley/test_nearley.py
+++ b/tests/test_nearley/test_nearley.py
@@ -6,16 +6,17 @@ import logging
 import os
 import codecs
 logging.basicConfig(level=logging.INFO)
 from lark import logger
 from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main
 logger.setLevel(logging.INFO)
 TEST_PATH    = os.path.abspath(os.path.dirname(__file__))
 NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley')
 BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin')
 if not os.path.exists(NEARLEY_PATH):
    logging.warn("Nearley not installed. Skipping Nearley tests!")
    logger.warn("Nearley not installed. Skipping Nearley tests!")
    raise ImportError("Skipping Nearley tests!")
 import js2py    # Ensures that js2py exists, to avoid failing tests
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -23,13 +23,13 @@ from io import (
        open,
    )
 logging.basicConfig(level=logging.INFO)
 try:
    import regex
 except ImportError:
    regex = None
 from lark import logger
 from lark.lark import Lark
 from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
 from lark.tree import Tree
@@ -37,6 +37,7 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args
 from lark.grammar import Rule
 from lark.lexer import TerminalDef, Lexer, TraditionalLexer
 logger.setLevel(logging.INFO)
 __path__ = os.path.dirname(__file__)