Browse Source

Merge pull request #33 from kasbah/fix-nearley-utf8

Fix unicode issues in Nearly transformation
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
committed by GitHub
parent
commit
358d54adad
4 changed files with 38 additions and 12 deletions
  1. +12
    -10
      lark/tools/nearley.py
  2. +3
    -0
      tests/test_nearley/grammars/include_unicode.ne
  3. +1
    -0
      tests/test_nearley/grammars/unicode.ne
  4. +22
    -2
      tests/test_nearley/test_nearley.py

+ 12
- 10
lark/tools/nearley.py View File

@@ -2,6 +2,7 @@


import os.path import os.path
import sys import sys
import codecs




from lark import Lark, InlineTransformer, Transformer from lark import Lark, InlineTransformer, Transformer
@@ -113,7 +114,7 @@ def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
path = os.path.join(folder, arg[1:-1]) path = os.path.join(folder, arg[1:-1])
if path not in includes: if path not in includes:
includes.add(path) includes.add(path)
with open(path) as f:
with codecs.open(path, encoding='utf8') as f:
text = f.read() text = f.read()
rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes) rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
else: else:
@@ -168,17 +169,18 @@ def create_code_for_nearley_grammar(g, start, builtin_path, folder_path):


return ''.join(emit_code) return ''.join(emit_code)


def main():
if len(sys.argv) < 3:
print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.")
print("Usage: %s <nearley_grammar_path> <start_rule> <nearley_lib_path>" % sys.argv[0])
return

fn, start, nearley_lib = sys.argv[1:]
with open(fn) as f:
def main(fn, start, nearley_lib):
with codecs.open(fn, encoding='utf8') as f:
grammar = f.read() grammar = f.read()
print(create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))) print(create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn))))




if __name__ == '__main__': if __name__ == '__main__':
main()
if len(sys.argv) < 4:
print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.")
print("Usage: %s <nearley_grammar_path> <start_rule> <nearley_lib_path>" % sys.argv[0])
sys.exit(1)

fn, start, nearley_lib = sys.argv[1:]

main(fn, start, nearley_lib)

+ 3
- 0
tests/test_nearley/grammars/include_unicode.ne View File

@@ -0,0 +1,3 @@
@include "unicode.ne"

main -> x

+ 1
- 0
tests/test_nearley/grammars/unicode.ne View File

@@ -0,0 +1 @@
x -> "±a"

+ 22
- 2
tests/test_nearley/test_nearley.py View File

@@ -1,14 +1,17 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import


import unittest import unittest
import logging import logging
import os import os
import codecs


logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)


from lark.tools.nearley import create_code_for_nearley_grammar
from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main


NEARLEY_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'nearley'))
TEST_PATH = os.path.abspath(os.path.dirname(__file__))
NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley')
BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin')


class TestNearley(unittest.TestCase): class TestNearley(unittest.TestCase):
@@ -59,6 +62,23 @@ class TestNearley(unittest.TestCase):
parse('b') parse('b')
parse('c') parse('c')


def test_utf8(self):
grammar = u'main -> "±a"'
code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './')
d = {}
exec (code, d)
parse = d['parse']

parse(u'±a')

def test_utf8_2(self):
fn = os.path.join(TEST_PATH, 'grammars/unicode.ne')
nearley_tool_main(fn, 'x', NEARLEY_PATH)

def test_include_utf8(self):
fn = os.path.join(TEST_PATH, 'grammars/include_unicode.ne')
nearley_tool_main(fn, 'main', NEARLEY_PATH)



if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save