Browse Source

Merge branch 'pwwang-master'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Sh 4 years ago
parent
commit
7010f96825
11 changed files with 146 additions and 72 deletions
  1. +4
    -3
      docs/how_to_use.md
  2. +1
    -0
      lark/__init__.py
  3. +5
    -6
      lark/exceptions.py
  4. +4
    -4
      lark/lark.py
  5. +2
    -2
      lark/parsers/earley.py
  6. +4
    -5
      lark/parsers/lalr_analysis.py
  7. +50
    -46
      lark/utils.py
  8. +5
    -2
      tests/__main__.py
  9. +65
    -0
      tests/test_logger.py
  10. +4
    -3
      tests/test_nearley/test_nearley.py
  11. +2
    -1
      tests/test_parser.py

+ 4
- 3
docs/how_to_use.md View File

@@ -30,12 +30,13 @@ Use the reference pages for more in-depth explanations. (links in the [main page

## LALR usage

By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure `logging` framework beforehand. For example:
By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure the `logger` beforehand. For example:

```python
from lark import Lark
import logging
logging.basicConfig(level=logging.DEBUG)
from lark import Lark, logger

logger.setLevel(logging.DEBUG)

collision_grammar = '''
start: as as


+ 1
- 0
lark/__init__.py View File

@@ -1,3 +1,4 @@
from .utils import logger
from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard
from .visitors import InlineTransformer, inline_args # XXX Deprecated


+ 5
- 6
lark/exceptions.py View File

@@ -1,7 +1,6 @@
from .utils import STRING_TYPE
from .utils import STRING_TYPE, logger

###{standalone
import logging


class LarkError(Exception):
@@ -62,24 +61,24 @@ class UnexpectedInput(LarkError):
except UnexpectedInput as ut:
if ut.state == self.state:
if use_accepts and ut.accepts != self.accepts:
logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j))
continue
try:
if ut.token == self.token: # Try exact match first
logging.debug("Exact Match at example [%s][%s]" % (i, j))
logger.debug("Exact Match at example [%s][%s]" % (i, j))
return label

if token_type_match_fallback:
# Fallback to token types match
if (ut.token.type == self.token.type) and not candidate[-1]:
logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
candidate = label, True

except AttributeError:
pass
if not candidate[0]:
logging.debug("Same State match at example [%s][%s]" % (i, j))
logger.debug("Same State match at example [%s][%s]" % (i, j))
candidate = label, False

return candidate[0]


+ 4
- 4
lark/lark.py View File

@@ -1,10 +1,10 @@
from __future__ import absolute_import

import sys, os, pickle, hashlib, logging
import sys, os, pickle, hashlib
from io import open


from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar
from .tree import Tree
from .common import LexerConf, ParserConf
@@ -214,7 +214,7 @@ class Lark(Serialize):
cache_fn = '.lark_cache_%s.tmp' % md5

if FS.exists(cache_fn):
logging.debug('Loading grammar from cache: %s', cache_fn)
logger.debug('Loading grammar from cache: %s', cache_fn)
with FS.open(cache_fn, 'rb') as f:
self._load(f, self.options.transformer, self.options.postlex)
return
@@ -291,7 +291,7 @@ class Lark(Serialize):
self.lexer = self._build_lexer()

if cache_fn:
logging.debug('Saving grammar to cache: %s', cache_fn)
logger.debug('Saving grammar to cache: %s', cache_fn)
with FS.open(cache_fn, 'wb') as f:
self.save(f)



+ 2
- 2
lark/parsers/earley.py View File

@@ -10,11 +10,11 @@ is better documented here:
http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
"""

import logging
from collections import deque

from ..visitors import Transformer_InPlace, v_args
from ..exceptions import UnexpectedEOF, UnexpectedToken
from ..utils import logger
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item, TransitiveItem
@@ -301,7 +301,7 @@ class Parser:
try:
debug_walker = ForestToPyDotVisitor()
except ImportError:
logging.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
else:
debug_walker.visit(solutions[0], "sppf.png")



+ 4
- 5
lark/parsers/lalr_analysis.py View File

@@ -6,10 +6,9 @@ For now, shift/reduce conflicts are automatically resolved as shifts.
# Author: Erez Shinan (2017)
# Email : erezshin@gmail.com

import logging
from collections import defaultdict, deque
from collections import defaultdict

from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
from ..exceptions import GrammarError

from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
@@ -256,8 +255,8 @@ class LALR_Analyzer(GrammarAnalyzer):
raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
if la in actions:
if self.debug:
logging.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
logging.warning(' * %s', list(rules)[0])
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
logger.warning(' * %s', list(rules)[0])
else:
actions[la] = (Reduce, list(rules)[0])
m[state] = { k.name: v for k, v in actions.items() }


+ 50
- 46
lark/utils.py View File

@@ -4,51 +4,15 @@ from functools import reduce
from ast import literal_eval
from collections import deque

class fzset(frozenset):
def __repr__(self):
return '{%s}' % ', '.join(map(repr, self))


def classify_bool(seq, pred):
true_elems = []
false_elems = []

for elem in seq:
if pred(elem):
true_elems.append(elem)
else:
false_elems.append(elem)

return true_elems, false_elems



def bfs(initial, expand):
open_q = deque(list(initial))
visited = set(open_q)
while open_q:
node = open_q.popleft()
yield node
for next_node in expand(node):
if next_node not in visited:
visited.add(next_node)
open_q.append(next_node)


###{standalone
import logging
logger = logging.getLogger("lark")
logger.addHandler(logging.StreamHandler())
# Set to highest level, since we have some warnings amongst the code
# By default, we should not output any log messages
logger.setLevel(logging.CRITICAL)


def _serialize(value, memo):
if isinstance(value, Serialize):
return value.serialize(memo)
elif isinstance(value, list):
return [_serialize(elem, memo) for elem in value]
elif isinstance(value, frozenset):
return list(value) # TODO reversible?
elif isinstance(value, dict):
return {key:_serialize(elem, memo) for key, elem in value.items()}
return value

###{standalone
def classify(seq, key=None, value=None):
d = {}
for item in seq:
@@ -302,13 +266,11 @@ def combine_alternatives(lists):
return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)



class FS:
open = open
exists = os.path.exists



def isascii(s):
""" str.isascii only exists in python3.7+ """
try:
@@ -318,4 +280,46 @@ def isascii(s):
s.encode('ascii')
return True
except (UnicodeDecodeError, UnicodeEncodeError):
return False
return False


class fzset(frozenset):
def __repr__(self):
return '{%s}' % ', '.join(map(repr, self))


def classify_bool(seq, pred):
true_elems = []
false_elems = []

for elem in seq:
if pred(elem):
true_elems.append(elem)
else:
false_elems.append(elem)

return true_elems, false_elems


def bfs(initial, expand):
open_q = deque(list(initial))
visited = set(open_q)
while open_q:
node = open_q.popleft()
yield node
for next_node in expand(node):
if next_node not in visited:
visited.add(next_node)
open_q.append(next_node)


def _serialize(value, memo):
if isinstance(value, Serialize):
return value.serialize(memo)
elif isinstance(value, list):
return [_serialize(elem, memo) for elem in value]
elif isinstance(value, frozenset):
return list(value) # TODO reversible?
elif isinstance(value, dict):
return {key:_serialize(elem, memo) for key, elem in value.items()}
return value

+ 5
- 2
tests/__main__.py View File

@@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function

import unittest
import logging
from lark import logger

from .test_trees import TestTrees
from .test_tools import TestStandalone
@@ -11,11 +12,13 @@ from .test_reconstructor import TestReconstructor
try:
from .test_nearley.test_nearley import TestNearley
except ImportError:
logging.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
logger.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")

# from .test_selectors import TestSelectors
# from .test_grammars import TestPythonG, TestConfigG

from .test_logger import Testlogger

from .test_parser import (
TestLalrStandard,
TestEarleyStandard,
@@ -31,7 +34,7 @@ from .test_parser import (
TestParsers,
)

logging.basicConfig(level=logging.INFO)
logger.setLevel(logging.INFO)

if __name__ == '__main__':
unittest.main()

+ 65
- 0
tests/test_logger.py View File

@@ -0,0 +1,65 @@
import logging
from contextlib import contextmanager
from lark import Lark, logger
from unittest import TestCase, main

try:
from StringIO import StringIO
except ImportError:
from io import StringIO

@contextmanager
def capture_log():
stream = StringIO()
orig_handler = logger.handlers[0]
del logger.handlers[:]
logger.addHandler(logging.StreamHandler(stream))
yield stream
del logger.handlers[:]
logger.addHandler(orig_handler)

class Testlogger(TestCase):

def test_debug(self):
logger.setLevel(logging.DEBUG)
collision_grammar = '''
start: as as
as: a*
a: "a"
'''
with capture_log() as log:
Lark(collision_grammar, parser='lalr', debug=True)

log = log.getvalue()
# since there are conflicts about A
# symbol A should appear in the log message for hint
self.assertIn("A", log)

def test_non_debug(self):
logger.setLevel(logging.DEBUG)
collision_grammar = '''
start: as as
as: a*
a: "a"
'''
with capture_log() as log:
Lark(collision_grammar, parser='lalr', debug=False)
log = log.getvalue()
# no log messge
self.assertEqual(len(log), 0)

def test_loglevel_higher(self):
logger.setLevel(logging.ERROR)
collision_grammar = '''
start: as as
as: a*
a: "a"
'''
with capture_log() as log:
Lark(collision_grammar, parser='lalr', debug=True)
log = log.getvalue()
# no log messge
self.assertEqual(len(log), 0)

if __name__ == '__main__':
main()

+ 4
- 3
tests/test_nearley/test_nearley.py View File

@@ -6,16 +6,17 @@ import logging
import os
import codecs

logging.basicConfig(level=logging.INFO)

from lark import logger
from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main

logger.setLevel(logging.INFO)

TEST_PATH = os.path.abspath(os.path.dirname(__file__))
NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley')
BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin')

if not os.path.exists(NEARLEY_PATH):
logging.warn("Nearley not installed. Skipping Nearley tests!")
logger.warn("Nearley not installed. Skipping Nearley tests!")
raise ImportError("Skipping Nearley tests!")

import js2py # Ensures that js2py exists, to avoid failing tests


+ 2
- 1
tests/test_parser.py View File

@@ -23,13 +23,13 @@ from io import (
open,
)

logging.basicConfig(level=logging.INFO)

try:
import regex
except ImportError:
regex = None

from lark import logger
from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree
@@ -37,6 +37,7 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args
from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer

logger.setLevel(logging.INFO)


__path__ = os.path.dirname(__file__)


Loading…
Cancel
Save