Browse Source

Merge pull request #491 from ElectronicBabylonianLiterature/fix-utf8-import

Open imported grammars with UTF-8 encoding
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Shinan 5 years ago
committed by GitHub
parent
commit
8792c99f39
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 13 additions and 2 deletions
  1. +3
    -2
      lark/load_grammar.py
  2. +1
    -0
      tests/grammars/test_unicode.lark
  3. +6
    -0
      tests/test_parser.py
  4. +3
    -0
      tests/test_relative_import_unicode.lark

+ 3
- 2
lark/load_grammar.py View File

@@ -4,6 +4,7 @@ import os.path
import sys
from ast import literal_eval
from copy import copy, deepcopy
from io import open

from .utils import bfs
from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -580,13 +581,13 @@ def import_grammar(grammar_path, base_paths=[]):
for import_path in import_paths:
with suppress(IOError):
joined_path = os.path.join(import_path, grammar_path)
with open(joined_path) as f:
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = load_grammar(text, joined_path)
_imported_grammars[grammar_path] = grammar
break
else:
open(grammar_path)
open(grammar_path, encoding='utf8')
assert False

return _imported_grammars[grammar_path]


+ 1
- 0
tests/grammars/test_unicode.lark View File

@@ -0,0 +1 @@
UNICODE : /[a-zØ-öø-ÿ]/

+ 6
- 0
tests/test_parser.py View File

@@ -1126,6 +1126,12 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(x.children, ['12', 'lions'])


def test_relative_import_unicode(self):
l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
x = l.parse(u'Ø')
self.assertEqual(x.children, [u'Ø'])


def test_relative_import_rename(self):
l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
x = l.parse('12 lions')


+ 3
- 0
tests/test_relative_import_unicode.lark View File

@@ -0,0 +1,3 @@
start: UNICODE

%import .grammars.test_unicode.UNICODE

Loading…
Cancel
Save