Browse Source

Added more terminals to grammars/ folder

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh 3 years ago
parent
commit
bc3923aed8
4 changed files with 36 additions and 16 deletions
  1. +5
    -13
      examples/advanced/python3.lark
  2. +10
    -1
      lark/grammars/common.lark
  3. +19
    -0
      lark/grammars/python.lark
  4. +2
    -2
      lark/lark.py

+ 5
- 13
examples/advanced/python3.lark View File

@@ -163,22 +163,14 @@ yield_arg: "from" test | testlist

number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
string: STRING | LONG_STRING
// Tokens

NAME: /[a-zA-Z_]\w*/
COMMENT: /#[^\n]*/
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+

// Import terminals from standard library (grammars/python.lark)
%import python (NAME, COMMENT, STRING, LONG_STRING)
%import python (DEC_NUMBER, HEX_NUMBER, OCT_NUMBER, BIN_NUMBER, FLOAT_NUMBER, IMAG_NUMBER)

STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
// Other terminals

DEC_NUMBER: /0|[1-9]\d*/i
HEX_NUMBER.2: /0x[\da-f]*/i
OCT_NUMBER.2: /0o[0-7]*/i
BIN_NUMBER.2 : /0b[0-1]*/i
FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i
IMAG_NUMBER.2: /\d+j/i | FLOAT_NUMBER "j"i
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+

%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT


+ 10
- 1
lark/grammars/common.lark View File

@@ -1,3 +1,6 @@
// Basic terminals for common use


//
// Numbers
//
@@ -21,7 +24,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER
// Strings
//
_STRING_INNER: /.*?/
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/

ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""

@@ -48,3 +51,9 @@ CR : /\r/
LF : /\n/
NEWLINE: (CR? LF)+


// Comments
SH_COMMENT: /#[^\n]*/
CPP_COMMENT: /\/\/[^\n]*/
C_COMMENT: "/*" /.*?/s "*/"
SQL_COMMENT: /--[^\n]*/

+ 19
- 0
lark/grammars/python.lark View File

@@ -0,0 +1,19 @@
// Python terminals

NAME: /[a-zA-Z_]\w*/
COMMENT: /#[^\n]*/

STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is

DEC_NUMBER: /0|[1-9]\d*/i
HEX_NUMBER.2: /0x[\da-f]*/i
OCT_NUMBER.2: /0o[0-7]*/i
BIN_NUMBER.2 : /0b[0-1]*/i
FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i
IMAG_NUMBER.2: /\d+j/i | FLOAT_NUMBER "j"i


// Comma-separated list (with an optional trailing comma)
cs_list{item}: item ("," item)* ","?
_cs_list{item}: item ("," item)* ","?

+ 2
- 2
lark/lark.py View File

@@ -294,8 +294,8 @@ class Lark(Serialize):
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, self.options.import_paths, self.options.keep_all_tokens)
# Parse the grammar file and compose the grammars
self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)

if self.options.postlex is not None:
terminals_to_keep = set(self.options.postlex.always_accept)


Loading…
Cancel
Save