Browse Source

Merge branch 'pwwang-master'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Sh 4 years ago
parent
commit
7010f96825
11 changed files with 146 additions and 72 deletions
  1. +4
    -3
      docs/how_to_use.md
  2. +1
    -0
      lark/__init__.py
  3. +5
    -6
      lark/exceptions.py
  4. +4
    -4
      lark/lark.py
  5. +2
    -2
      lark/parsers/earley.py
  6. +4
    -5
      lark/parsers/lalr_analysis.py
  7. +50
    -46
      lark/utils.py
  8. +5
    -2
      tests/__main__.py
  9. +65
    -0
      tests/test_logger.py
  10. +4
    -3
      tests/test_nearley/test_nearley.py
  11. +2
    -1
      tests/test_parser.py

+ 4
- 3
docs/how_to_use.md View File

@@ -30,12 +30,13 @@ Use the reference pages for more in-depth explanations. (links in the [main page


## LALR usage ## LALR usage


By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure `logging` framework beforehand. For example:
By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure the `logger` beforehand. For example:


```python ```python
from lark import Lark
import logging import logging
logging.basicConfig(level=logging.DEBUG)
from lark import Lark, logger

logger.setLevel(logging.DEBUG)


collision_grammar = ''' collision_grammar = '''
start: as as start: as as


+ 1
- 0
lark/__init__.py View File

@@ -1,3 +1,4 @@
from .utils import logger
from .tree import Tree from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard from .visitors import Transformer, Visitor, v_args, Discard
from .visitors import InlineTransformer, inline_args # XXX Deprecated from .visitors import InlineTransformer, inline_args # XXX Deprecated


+ 5
- 6
lark/exceptions.py View File

@@ -1,7 +1,6 @@
from .utils import STRING_TYPE
from .utils import STRING_TYPE, logger


###{standalone ###{standalone
import logging




class LarkError(Exception): class LarkError(Exception):
@@ -62,24 +61,24 @@ class UnexpectedInput(LarkError):
except UnexpectedInput as ut: except UnexpectedInput as ut:
if ut.state == self.state: if ut.state == self.state:
if use_accepts and ut.accepts != self.accepts: if use_accepts and ut.accepts != self.accepts:
logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j)) (self.state, self.accepts, ut.accepts, i, j))
continue continue
try: try:
if ut.token == self.token: # Try exact match first if ut.token == self.token: # Try exact match first
logging.debug("Exact Match at example [%s][%s]" % (i, j))
logger.debug("Exact Match at example [%s][%s]" % (i, j))
return label return label


if token_type_match_fallback: if token_type_match_fallback:
# Fallback to token types match # Fallback to token types match
if (ut.token.type == self.token.type) and not candidate[-1]: if (ut.token.type == self.token.type) and not candidate[-1]:
logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
candidate = label, True candidate = label, True


except AttributeError: except AttributeError:
pass pass
if not candidate[0]: if not candidate[0]:
logging.debug("Same State match at example [%s][%s]" % (i, j))
logger.debug("Same State match at example [%s][%s]" % (i, j))
candidate = label, False candidate = label, False


return candidate[0] return candidate[0]


+ 4
- 4
lark/lark.py View File

@@ -1,10 +1,10 @@
from __future__ import absolute_import from __future__ import absolute_import


import sys, os, pickle, hashlib, logging
import sys, os, pickle, hashlib
from io import open from io import open




from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar from .load_grammar import load_grammar
from .tree import Tree from .tree import Tree
from .common import LexerConf, ParserConf from .common import LexerConf, ParserConf
@@ -214,7 +214,7 @@ class Lark(Serialize):
cache_fn = '.lark_cache_%s.tmp' % md5 cache_fn = '.lark_cache_%s.tmp' % md5


if FS.exists(cache_fn): if FS.exists(cache_fn):
logging.debug('Loading grammar from cache: %s', cache_fn)
logger.debug('Loading grammar from cache: %s', cache_fn)
with FS.open(cache_fn, 'rb') as f: with FS.open(cache_fn, 'rb') as f:
self._load(f, self.options.transformer, self.options.postlex) self._load(f, self.options.transformer, self.options.postlex)
return return
@@ -291,7 +291,7 @@ class Lark(Serialize):
self.lexer = self._build_lexer() self.lexer = self._build_lexer()


if cache_fn: if cache_fn:
logging.debug('Saving grammar to cache: %s', cache_fn)
logger.debug('Saving grammar to cache: %s', cache_fn)
with FS.open(cache_fn, 'wb') as f: with FS.open(cache_fn, 'wb') as f:
self.save(f) self.save(f)




+ 2
- 2
lark/parsers/earley.py View File

@@ -10,11 +10,11 @@ is better documented here:
http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/ http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
""" """


import logging
from collections import deque from collections import deque


from ..visitors import Transformer_InPlace, v_args from ..visitors import Transformer_InPlace, v_args
from ..exceptions import UnexpectedEOF, UnexpectedToken from ..exceptions import UnexpectedEOF, UnexpectedToken
from ..utils import logger
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal from ..grammar import NonTerminal
from .earley_common import Item, TransitiveItem from .earley_common import Item, TransitiveItem
@@ -301,7 +301,7 @@ class Parser:
try: try:
debug_walker = ForestToPyDotVisitor() debug_walker = ForestToPyDotVisitor()
except ImportError: except ImportError:
logging.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
else: else:
debug_walker.visit(solutions[0], "sppf.png") debug_walker.visit(solutions[0], "sppf.png")




+ 4
- 5
lark/parsers/lalr_analysis.py View File

@@ -6,10 +6,9 @@ For now, shift/reduce conflicts are automatically resolved as shifts.
# Author: Erez Shinan (2017) # Author: Erez Shinan (2017)
# Email : erezshin@gmail.com # Email : erezshin@gmail.com


import logging
from collections import defaultdict, deque
from collections import defaultdict


from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
from ..exceptions import GrammarError from ..exceptions import GrammarError


from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
@@ -256,8 +255,8 @@ class LALR_Analyzer(GrammarAnalyzer):
raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ]))) raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
if la in actions: if la in actions:
if self.debug: if self.debug:
logging.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
logging.warning(' * %s', list(rules)[0])
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
logger.warning(' * %s', list(rules)[0])
else: else:
actions[la] = (Reduce, list(rules)[0]) actions[la] = (Reduce, list(rules)[0])
m[state] = { k.name: v for k, v in actions.items() } m[state] = { k.name: v for k, v in actions.items() }


+ 50
- 46
lark/utils.py View File

@@ -4,51 +4,15 @@ from functools import reduce
from ast import literal_eval from ast import literal_eval
from collections import deque from collections import deque


class fzset(frozenset):
def __repr__(self):
return '{%s}' % ', '.join(map(repr, self))


def classify_bool(seq, pred):
true_elems = []
false_elems = []

for elem in seq:
if pred(elem):
true_elems.append(elem)
else:
false_elems.append(elem)

return true_elems, false_elems



def bfs(initial, expand):
open_q = deque(list(initial))
visited = set(open_q)
while open_q:
node = open_q.popleft()
yield node
for next_node in expand(node):
if next_node not in visited:
visited.add(next_node)
open_q.append(next_node)


###{standalone
import logging
logger = logging.getLogger("lark")
logger.addHandler(logging.StreamHandler())
# Set to highest level, since we have some warnings amongst the code
# By default, we should not output any log messages
logger.setLevel(logging.CRITICAL)




def _serialize(value, memo):
if isinstance(value, Serialize):
return value.serialize(memo)
elif isinstance(value, list):
return [_serialize(elem, memo) for elem in value]
elif isinstance(value, frozenset):
return list(value) # TODO reversible?
elif isinstance(value, dict):
return {key:_serialize(elem, memo) for key, elem in value.items()}
return value

###{standalone
def classify(seq, key=None, value=None): def classify(seq, key=None, value=None):
d = {} d = {}
for item in seq: for item in seq:
@@ -302,13 +266,11 @@ def combine_alternatives(lists):
return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)





class FS: class FS:
open = open open = open
exists = os.path.exists exists = os.path.exists





def isascii(s): def isascii(s):
""" str.isascii only exists in python3.7+ """ """ str.isascii only exists in python3.7+ """
try: try:
@@ -318,4 +280,46 @@ def isascii(s):
s.encode('ascii') s.encode('ascii')
return True return True
except (UnicodeDecodeError, UnicodeEncodeError): except (UnicodeDecodeError, UnicodeEncodeError):
return False
return False


class fzset(frozenset):
def __repr__(self):
return '{%s}' % ', '.join(map(repr, self))


def classify_bool(seq, pred):
true_elems = []
false_elems = []

for elem in seq:
if pred(elem):
true_elems.append(elem)
else:
false_elems.append(elem)

return true_elems, false_elems


def bfs(initial, expand):
open_q = deque(list(initial))
visited = set(open_q)
while open_q:
node = open_q.popleft()
yield node
for next_node in expand(node):
if next_node not in visited:
visited.add(next_node)
open_q.append(next_node)


def _serialize(value, memo):
if isinstance(value, Serialize):
return value.serialize(memo)
elif isinstance(value, list):
return [_serialize(elem, memo) for elem in value]
elif isinstance(value, frozenset):
return list(value) # TODO reversible?
elif isinstance(value, dict):
return {key:_serialize(elem, memo) for key, elem in value.items()}
return value

+ 5
- 2
tests/__main__.py View File

@@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function


import unittest import unittest
import logging import logging
from lark import logger


from .test_trees import TestTrees from .test_trees import TestTrees
from .test_tools import TestStandalone from .test_tools import TestStandalone
@@ -11,11 +12,13 @@ from .test_reconstructor import TestReconstructor
try: try:
from .test_nearley.test_nearley import TestNearley from .test_nearley.test_nearley import TestNearley
except ImportError: except ImportError:
logging.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
logger.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")


# from .test_selectors import TestSelectors # from .test_selectors import TestSelectors
# from .test_grammars import TestPythonG, TestConfigG # from .test_grammars import TestPythonG, TestConfigG


from .test_logger import Testlogger

from .test_parser import ( from .test_parser import (
TestLalrStandard, TestLalrStandard,
TestEarleyStandard, TestEarleyStandard,
@@ -31,7 +34,7 @@ from .test_parser import (
TestParsers, TestParsers,
) )


logging.basicConfig(level=logging.INFO)
logger.setLevel(logging.INFO)


if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

+ 65
- 0
tests/test_logger.py View File

@@ -0,0 +1,65 @@
import logging
from contextlib import contextmanager
from lark import Lark, logger
from unittest import TestCase, main

try:
from StringIO import StringIO
except ImportError:
from io import StringIO

@contextmanager
def capture_log():
stream = StringIO()
orig_handler = logger.handlers[0]
del logger.handlers[:]
logger.addHandler(logging.StreamHandler(stream))
yield stream
del logger.handlers[:]
logger.addHandler(orig_handler)

class Testlogger(TestCase):

def test_debug(self):
logger.setLevel(logging.DEBUG)
collision_grammar = '''
start: as as
as: a*
a: "a"
'''
with capture_log() as log:
Lark(collision_grammar, parser='lalr', debug=True)

log = log.getvalue()
# since there are conflicts about A
# symbol A should appear in the log message for hint
self.assertIn("A", log)

def test_non_debug(self):
logger.setLevel(logging.DEBUG)
collision_grammar = '''
start: as as
as: a*
a: "a"
'''
with capture_log() as log:
Lark(collision_grammar, parser='lalr', debug=False)
log = log.getvalue()
# no log messge
self.assertEqual(len(log), 0)

def test_loglevel_higher(self):
logger.setLevel(logging.ERROR)
collision_grammar = '''
start: as as
as: a*
a: "a"
'''
with capture_log() as log:
Lark(collision_grammar, parser='lalr', debug=True)
log = log.getvalue()
# no log messge
self.assertEqual(len(log), 0)

if __name__ == '__main__':
main()

+ 4
- 3
tests/test_nearley/test_nearley.py View File

@@ -6,16 +6,17 @@ import logging
import os import os
import codecs import codecs


logging.basicConfig(level=logging.INFO)

from lark import logger
from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main


logger.setLevel(logging.INFO)

TEST_PATH = os.path.abspath(os.path.dirname(__file__)) TEST_PATH = os.path.abspath(os.path.dirname(__file__))
NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley') NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley')
BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin')


if not os.path.exists(NEARLEY_PATH): if not os.path.exists(NEARLEY_PATH):
logging.warn("Nearley not installed. Skipping Nearley tests!")
logger.warn("Nearley not installed. Skipping Nearley tests!")
raise ImportError("Skipping Nearley tests!") raise ImportError("Skipping Nearley tests!")


import js2py # Ensures that js2py exists, to avoid failing tests import js2py # Ensures that js2py exists, to avoid failing tests


+ 2
- 1
tests/test_parser.py View File

@@ -23,13 +23,13 @@ from io import (
open, open,
) )


logging.basicConfig(level=logging.INFO)


try: try:
import regex import regex
except ImportError: except ImportError:
regex = None regex = None


from lark import logger
from lark.lark import Lark from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree from lark.tree import Tree
@@ -37,6 +37,7 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args
from lark.grammar import Rule from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer from lark.lexer import TerminalDef, Lexer, TraditionalLexer


logger.setLevel(logging.INFO)




__path__ = os.path.dirname(__file__) __path__ = os.path.dirname(__file__)


Loading…
Cancel
Save