diff --git a/tests/test_nearley/grammars/unicode.ne b/tests/test_nearley/grammars/unicode.ne new file mode 100644 index 0000000..4fd5158 --- /dev/null +++ b/tests/test_nearley/grammars/unicode.ne @@ -0,0 +1 @@ +main -> "±a" diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py index 0fbe239..450f7b9 100644 --- a/tests/test_nearley/test_nearley.py +++ b/tests/test_nearley/test_nearley.py @@ -1,14 +1,17 @@ +# -*- coding: utf-8 -*- from __future__ import absolute_import import unittest import logging import os +import codecs logging.basicConfig(level=logging.INFO) from lark.tools.nearley import create_code_for_nearley_grammar -NEARLEY_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'nearley')) +TEST_PATH = os.path.dirname(__file__) +NEARLEY_PATH = os.path.abspath(os.path.join(TEST_PATH, 'nearley')) BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') class TestNearley(unittest.TestCase): @@ -59,6 +62,26 @@ class TestNearley(unittest.TestCase): parse('b') parse('c') + def test_utf8(self): + grammar = u'main -> "±a"' + code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') + d = {} + exec (code, d) + parse = d['parse'] + + parse(u'±a') + + def test_utf8_2(self): + fn = os.path.join(TEST_PATH, 'grammars/unicode.ne') + with codecs.open(fn, encoding='utf8') as f: + grammar = f.read() + code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') + d = {} + exec (code, d) + parse = d['parse'] + + parse(u'±a') + if __name__ == '__main__': unittest.main()