Browse Source

Merge pull request #491 from ElectronicBabylonianLiterature/fix-utf8-import

Open imported grammars with UTF-8 encoding
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Shinan 5 years ago
committed by GitHub
parent
commit
8792c99f39
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 13 additions and 2 deletions
  1. +3
    -2
      lark/load_grammar.py
  2. +1
    -0
      tests/grammars/test_unicode.lark
  3. +6
    -0
      tests/test_parser.py
  4. +3
    -0
      tests/test_relative_import_unicode.lark

+ 3
- 2
lark/load_grammar.py View File

@@ -4,6 +4,7 @@ import os.path
import sys import sys
from ast import literal_eval from ast import literal_eval
from copy import copy, deepcopy from copy import copy, deepcopy
from io import open


from .utils import bfs from .utils import bfs
from .lexer import Token, TerminalDef, PatternStr, PatternRE from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -580,13 +581,13 @@ def import_grammar(grammar_path, base_paths=[]):
for import_path in import_paths: for import_path in import_paths:
with suppress(IOError): with suppress(IOError):
joined_path = os.path.join(import_path, grammar_path) joined_path = os.path.join(import_path, grammar_path)
with open(joined_path) as f:
with open(joined_path, encoding='utf8') as f:
text = f.read() text = f.read()
grammar = load_grammar(text, joined_path) grammar = load_grammar(text, joined_path)
_imported_grammars[grammar_path] = grammar _imported_grammars[grammar_path] = grammar
break break
else: else:
open(grammar_path)
open(grammar_path, encoding='utf8')
assert False assert False


return _imported_grammars[grammar_path] return _imported_grammars[grammar_path]


+ 1
- 0
tests/grammars/test_unicode.lark View File

@@ -0,0 +1 @@
UNICODE : /[a-zØ-öø-ÿ]/

+ 6
- 0
tests/test_parser.py View File

@@ -1126,6 +1126,12 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(x.children, ['12', 'lions']) self.assertEqual(x.children, ['12', 'lions'])




def test_relative_import_unicode(self):
l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
x = l.parse(u'Ø')
self.assertEqual(x.children, [u'Ø'])


def test_relative_import_rename(self): def test_relative_import_rename(self):
l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__) l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
x = l.parse('12 lions') x = l.parse('12 lions')


+ 3
- 0
tests/test_relative_import_unicode.lark View File

@@ -0,0 +1,3 @@
start: UNICODE

%import .grammars.test_unicode.UNICODE

Loading…
Cancel
Save