Browse Source

Added support for all RE flags.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 6 years ago
parent
commit
ad30c324f5
4 changed files with 9 additions and 7 deletions
  1. +2
    -2
      examples/python2.g
  2. +1
    -1
      examples/python3.g
  3. +5
    -3
      lark/load_grammar.py
  4. +1
    -1
      lark/tools/nearley.py

+ 2
- 2
examples/python2.g View File

@@ -145,11 +145,11 @@ number: DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | FLOAT | IMAG_NUMBER
string: STRING | LONG_STRING
// Tokens

COMMENT: /\#[^\n]*/
COMMENT: /#[^\n]*/
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+

%ignore /[\t \f]+/ // WS
%ignore /\\\\[\t \f]*\r?\n/ // LINE_CONT
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT




+ 1
- 1
examples/python3.g View File

@@ -170,7 +170,7 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+


%ignore /[\t \f]+/ // WS
%ignore /\\\\[\t \f]*\r?\n/ // LINE_CONT
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT




+ 5
- 3
lark/load_grammar.py View File

@@ -18,6 +18,8 @@ from .tree import Tree as T, Transformer, InlineTransformer, Visitor
__path__ = os.path.dirname(__file__)
IMPORT_PATHS = [os.path.join(__path__, 'grammars')]

_RE_FLAGS = 'imslux'

_TOKEN_NAMES = {
'.' : 'DOT',
',' : 'COMMA',
@@ -70,7 +72,7 @@ TOKENS = {
'RULE': '!?[_?]?[a-z][_a-z0-9]*',
'TOKEN': '_?[A-Z][_A-Z0-9]*',
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/i?',
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]?' % _RE_FLAGS,
'_NL': r'(\r?\n)+\s*',
'WS': r'[ \t]+',
'COMMENT': r'//[^\n]*',
@@ -287,7 +289,7 @@ class ExtractAnonTokens(InlineTransformer):

def _literal_to_pattern(literal):
v = literal.value
if v[-1] in 'i':
if v[-1] in _RE_FLAGS:
flags = v[-1]
v = v[:-1]
else:
@@ -295,7 +297,7 @@ def _literal_to_pattern(literal):

assert v[0] == v[-1] and v[0] in '"/'
x = v[1:-1]
x = re.sub(r'(\\[wd/]|\\\[|\\\])', r'\\\1', x)
x = re.sub(r'(\\[wd/ ]|\\\[|\\\])', r'\\\1', x)
x = x.replace("'", r"\'")
s = literal_eval("u'''%s'''" % x)
return { 'STRING': PatternStr,


+ 1
- 1
lark/tools/nearley.py View File

@@ -26,7 +26,7 @@ nearley_grammar = r"""
rule: NAME
string: STRING
regexp: REGEXP
JS: /(?s){%.*?%}/
JS: /{%.*?%}/s
js: JS?

NAME: /[a-zA-Z_$]\w*/


Loading…
Cancel
Save