From 25399b4a218a478c997282d75392af7e769d5f19 Mon Sep 17 00:00:00 2001 From: Kaspar Emanuel Date: Mon, 16 Oct 2017 14:53:42 +0100 Subject: [PATCH 1/4] Add tests for utf8 with nearley --- tests/test_nearley/grammars/unicode.ne | 1 + tests/test_nearley/test_nearley.py | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 tests/test_nearley/grammars/unicode.ne diff --git a/tests/test_nearley/grammars/unicode.ne b/tests/test_nearley/grammars/unicode.ne new file mode 100644 index 0000000..4fd5158 --- /dev/null +++ b/tests/test_nearley/grammars/unicode.ne @@ -0,0 +1 @@ +main -> "±a" diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py index 0fbe239..450f7b9 100644 --- a/tests/test_nearley/test_nearley.py +++ b/tests/test_nearley/test_nearley.py @@ -1,14 +1,17 @@ +# -*- coding: utf-8 -*- from __future__ import absolute_import import unittest import logging import os +import codecs logging.basicConfig(level=logging.INFO) from lark.tools.nearley import create_code_for_nearley_grammar -NEARLEY_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'nearley')) +TEST_PATH = os.path.dirname(__file__) +NEARLEY_PATH = os.path.abspath(os.path.join(TEST_PATH, 'nearley')) BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') class TestNearley(unittest.TestCase): @@ -59,6 +62,26 @@ class TestNearley(unittest.TestCase): parse('b') parse('c') + def test_utf8(self): + grammar = u'main -> "±a"' + code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') + d = {} + exec (code, d) + parse = d['parse'] + + parse(u'±a') + + def test_utf8_2(self): + fn = os.path.join(TEST_PATH, 'grammars/unicode.ne') + with codecs.open(fn, encoding='utf8') as f: + grammar = f.read() + code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') + d = {} + exec (code, d) + parse = d['parse'] + + parse(u'±a') + if __name__ == '__main__': unittest.main() From 809ac8c9ead43201c728dc241382615da996b108 Mon Sep 17 00:00:00 2001 From: Kaspar Emanuel Date: Mon, 16 Oct 2017 14:55:13 +0100 Subject: [PATCH 2/4] Switch to codecs.open for nearley tool --- lark/tools/nearley.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py index 6db2dd9..892fbf9 100644 --- a/lark/tools/nearley.py +++ b/lark/tools/nearley.py @@ -2,6 +2,7 @@ import os.path import sys +import codecs from lark import Lark, InlineTransformer, Transformer @@ -113,7 +114,7 @@ def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes): path = os.path.join(folder, arg[1:-1]) if path not in includes: includes.add(path) - with open(path) as f: + with codecs.open(path, encoding='utf8') as f: text = f.read() rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes) else: @@ -175,7 +176,7 @@ def main(): return fn, start, nearley_lib = sys.argv[1:] - with open(fn) as f: + with codecs.open(fn, encoding='utf8') as f: grammar = f.read() print(create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))) From b491138d9a38e95ca7c41a31c03bfb4646babf5d Mon Sep 17 00:00:00 2001 From: Kaspar Emanuel Date: Mon, 16 Oct 2017 16:10:21 +0100 Subject: [PATCH 3/4] Make nearley tool main testable and test it --- lark/tools/nearley.py | 17 +++++++++-------- tests/test_nearley/test_nearley.py | 15 ++++----------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py index 892fbf9..4adac3a 100644 --- a/lark/tools/nearley.py +++ b/lark/tools/nearley.py @@ -169,17 +169,18 @@ def create_code_for_nearley_grammar(g, start, builtin_path, folder_path): return ''.join(emit_code) -def main(): - if len(sys.argv) < 3: - print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.") - print("Usage: %s " % sys.argv[0]) - return - - fn, start, nearley_lib = sys.argv[1:] +def main(fn, start, nearley_lib): with codecs.open(fn, encoding='utf8') as f: grammar = f.read() print(create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))) if __name__ == '__main__': - main() + if len(sys.argv) < 4: + print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.") + print("Usage: %s " % sys.argv[0]) + sys.exit(1) + + fn, start, nearley_lib = sys.argv[1:] + + main(fn, start, nearley_lib) diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py index 450f7b9..a8d7d19 100644 --- a/tests/test_nearley/test_nearley.py +++ b/tests/test_nearley/test_nearley.py @@ -8,10 +8,10 @@ import codecs logging.basicConfig(level=logging.INFO) -from lark.tools.nearley import create_code_for_nearley_grammar +from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main -TEST_PATH = os.path.dirname(__file__) -NEARLEY_PATH = os.path.abspath(os.path.join(TEST_PATH, 'nearley')) +TEST_PATH = os.path.abspath(os.path.dirname(__file__)) +NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley') BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') class TestNearley(unittest.TestCase): @@ -73,14 +73,7 @@ class TestNearley(unittest.TestCase): def test_utf8_2(self): fn = os.path.join(TEST_PATH, 'grammars/unicode.ne') - with codecs.open(fn, encoding='utf8') as f: - grammar = f.read() - code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') - d = {} - exec (code, d) - parse = d['parse'] - - parse(u'±a') + nearley_tool_main(fn, 'main', NEARLEY_PATH) if __name__ == '__main__': From e05c62ca061d8b2a0fe5accf5efac33924dded07 Mon Sep 17 00:00:00 2001 From: Kaspar Emanuel Date: Mon, 16 Oct 2017 16:17:40 +0100 Subject: [PATCH 4/4] Add test for nearley include of unicode --- tests/test_nearley/grammars/include_unicode.ne | 3 +++ tests/test_nearley/grammars/unicode.ne | 2 +- tests/test_nearley/test_nearley.py | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/test_nearley/grammars/include_unicode.ne diff --git a/tests/test_nearley/grammars/include_unicode.ne b/tests/test_nearley/grammars/include_unicode.ne new file mode 100644 index 0000000..b04c2a9 --- /dev/null +++ b/tests/test_nearley/grammars/include_unicode.ne @@ -0,0 +1,3 @@ +@include "unicode.ne" + +main -> x diff --git a/tests/test_nearley/grammars/unicode.ne b/tests/test_nearley/grammars/unicode.ne index 4fd5158..c930830 100644 --- a/tests/test_nearley/grammars/unicode.ne +++ b/tests/test_nearley/grammars/unicode.ne @@ -1 +1 @@ -main -> "±a" +x -> "±a" diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py index a8d7d19..e980f9f 100644 --- a/tests/test_nearley/test_nearley.py +++ b/tests/test_nearley/test_nearley.py @@ -73,6 +73,10 @@ class TestNearley(unittest.TestCase): def test_utf8_2(self): fn = os.path.join(TEST_PATH, 'grammars/unicode.ne') + nearley_tool_main(fn, 'x', NEARLEY_PATH) + + def test_include_utf8(self): + fn = os.path.join(TEST_PATH, 'grammars/include_unicode.ne') nearley_tool_main(fn, 'main', NEARLEY_PATH)