Browse Source

Merge remote-tracking branch 'origin/v1.0' into 1.0b

gm/2021-09-23T00Z/github.com--lark-parser-lark/1.0b
Erez Sh 3 years ago
parent
commit
554835f19e
16 changed files with 333 additions and 104 deletions
  1. +1
    -1
      README.md
  2. +2
    -0
      docs/classes.rst
  3. +1
    -1
      docs/index.rst
  4. +5
    -0
      docs/visitors.rst
  5. +88
    -53
      examples/advanced/python3.lark
  6. +3
    -1
      examples/standalone/json_parser_main.py
  7. +2
    -2
      lark/ast_utils.py
  8. +23
    -7
      lark/exceptions.py
  9. +7
    -1
      lark/lark.py
  10. +0
    -1
      lark/lexer.py
  11. +123
    -12
      lark/load_grammar.py
  12. +1
    -1
      lark/parser_frontends.py
  13. +1
    -1
      lark/parsers/lalr_interactive_parser.py
  14. +28
    -2
      lark/utils.py
  15. +48
    -1
      tests/test_grammar.py
  16. +0
    -20
      tests/test_parser.py

+ 1
- 1
README.md View File

@@ -26,7 +26,7 @@ Most importantly, Lark will save you time and prevent you from getting parsing h

- [Documentation @readthedocs](https://lark-parser.readthedocs.io/)
- [Cheatsheet (PDF)](/docs/_static/lark_cheatsheet.pdf)
- [Online IDE (very basic)](https://lark-parser.github.io/lark/ide/app.html)
- [Online IDE](https://lark-parser.github.io/ide)
- [Tutorial](/docs/json_tutorial.md) for writing a JSON parser.
- Blog post: [How to write a DSL with Lark](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/)
- [Gitter chat](https://gitter.im/lark-parser/Lobby)


+ 2
- 0
docs/classes.rst View File

@@ -66,6 +66,8 @@ UnexpectedInput

.. autoclass:: lark.exceptions.UnexpectedCharacters

.. autoclass:: lark.exceptions.UnexpectedEOF

InteractiveParser
-----------------



+ 1
- 1
docs/index.rst View File

@@ -113,7 +113,7 @@ Resources

.. _Examples: https://github.com/lark-parser/lark/tree/master/examples
.. _Third-party examples: https://github.com/ligurio/lark-grammars
.. _Online IDE: https://lark-parser.github.io/lark/ide/app.html
.. _Online IDE: https://lark-parser.github.io/ide
.. _How to write a DSL: http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/
.. _Program Synthesis is Possible: https://www.cs.cornell.edu/~asampson/blog/minisynth.html
.. _Cheatsheet (PDF): _static/lark_cheatsheet.pdf


+ 5
- 0
docs/visitors.rst View File

@@ -107,3 +107,8 @@ Discard
-------

.. autoclass:: lark.visitors.Discard

VisitError
-------

.. autoclass:: lark.exceptions.VisitError

+ 88
- 53
examples/advanced/python3.lark View File

@@ -21,7 +21,7 @@ decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)

async_funcdef: "async" funcdef
funcdef: "def" NAME "(" parameters? ")" ["->" test] ":" suite
funcdef: "def" NAME "(" [parameters] ")" ["->" test] ":" suite

parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams]]
| starparams
@@ -29,25 +29,36 @@ parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams

SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result
starparams: "*" typedparam? ("," paramvalue)* ["," kwparams]
kwparams: "**" typedparam
kwparams: "**" typedparam ","?

?paramvalue: typedparam ["=" test]
?typedparam: NAME [":" test]
?paramvalue: typedparam ("=" test)?
?typedparam: NAME (":" test)?

varargslist: (vfpdef ["=" test] ("," vfpdef ["=" test])* ["," [ "*" [vfpdef] ("," vfpdef ["=" test])* ["," ["**" vfpdef [","]]] | "**" vfpdef [","]]]
| "*" [vfpdef] ("," vfpdef ["=" test])* ["," ["**" vfpdef [","]]]
| "**" vfpdef [","])

vfpdef: NAME
lambdef: "lambda" [lambda_params] ":" test
lambdef_nocond: "lambda" [lambda_params] ":" test_nocond
lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]]
| lambda_starparams
| lambda_kwparams
?lambda_paramvalue: NAME ("=" test)?
lambda_starparams: "*" [NAME] ("," lambda_paramvalue)* ["," [lambda_kwparams]]
lambda_kwparams: "**" NAME ","?


?stmt: simple_stmt | compound_stmt
?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
?small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
?expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist)
| ("=" (yield_expr|testlist_star_expr))*)
annassign: ":" test ["=" test]
?testlist_star_expr: (test|star_expr) ("," (test|star_expr))* [","]
!augassign: ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=")
?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr
assign_stmt: annassign | augassign | assign

annassign: testlist_star_expr ":" test ["=" test]
assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+
augassign: testlist_star_expr augassign_op (yield_expr|testlist)
!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//="
?testlist_star_expr: test_or_star_expr
| test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple
| test_or_star_expr "," -> tuple

// For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: "del" exprlist
pass_stmt: "pass"
@@ -71,43 +82,52 @@ global_stmt: "global" NAME ("," NAME)*
nonlocal_stmt: "nonlocal" NAME ("," NAME)*
assert_stmt: "assert" test ["," test]

compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: "async" (funcdef | with_stmt | for_stmt)
if_stmt: "if" test ":" suite ("elif" test ":" suite)* ["else" ":" suite]
if_stmt: "if" test ":" suite elifs ["else" ":" suite]
elifs: elif_*
elif_: "elif" test ":" suite
while_stmt: "while" test ":" suite ["else" ":" suite]
for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
try_stmt: ("try" ":" suite ((except_clause ":" suite)+ ["else" ":" suite] ["finally" ":" suite] | "finally" ":" suite))
with_stmt: "with" with_item ("," with_item)* ":" suite
try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally]
| "try" ":" suite finally -> try_finally
finally: "finally" ":" suite
except_clauses: except_clause+
except_clause: "except" [test ["as" NAME]] ":" suite

with_stmt: "with" with_items ":" suite
with_items: with_item ("," with_item)*
with_item: test ["as" expr]
// NB compile.c makes sure that the default except clause is last
except_clause: "except" [test ["as" NAME]]
suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT

?test: or_test ("if" or_test "else" test)? | lambdef
?test: or_test ("if" or_test "else" test)?
| lambdef
?test_nocond: or_test | lambdef_nocond
lambdef: "lambda" [varargslist] ":" test
lambdef_nocond: "lambda" [varargslist] ":" test_nocond

?or_test: and_test ("or" and_test)*
?and_test: not_test ("and" not_test)*
?not_test: "not" not_test -> not
?not_test: "not" not_test -> not_test
| comparison
?comparison: expr (_comp_op expr)*
?comparison: expr (comp_op expr)*
star_expr: "*" expr
?expr: xor_expr ("|" xor_expr)*

?expr: or_expr
?or_expr: xor_expr ("|" xor_expr)*
?xor_expr: and_expr ("^" and_expr)*
?and_expr: shift_expr ("&" shift_expr)*
?shift_expr: arith_expr (_shift_op arith_expr)*
?arith_expr: term (_add_op term)*
?term: factor (_mul_op factor)*
?factor: _factor_op factor | power
?factor: _unary_op factor | power

!_factor_op: "+"|"-"|"~"
!_unary_op: "+"|"-"|"~"
!_add_op: "+"|"-"
!_shift_op: "<<"|">>"
!_mul_op: "*"|"@"|"/"|"%"|"//"
// <> isn't actually a valid comparison operator in Python. It's here for the
// sake of a __future__ import described in PEP 401 (which really works :-)
!_comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"

?power: await_expr ("**" factor)?
?await_expr: AWAIT? atom_expr
@@ -118,61 +138,75 @@ AWAIT: "await"
| atom_expr "." NAME -> getattr
| atom

?atom: "(" [yield_expr|tuplelist_comp] ")" -> tuple
| "[" [testlist_comp] "]" -> list
| "{" [dict_comp] "}" -> dict
| "{" set_comp "}" -> set
?atom: "(" yield_expr ")"
| "(" _tuple_inner? ")" -> tuple
| "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension
| "[" _testlist_comp? "]" -> list
| "[" comprehension{test_or_star_expr} "]" -> list_comprehension
| "{" _dict_exprlist? "}" -> dict
| "{" comprehension{key_value} "}" -> dict_comprehension
| "{" _set_exprlist "}" -> set
| "{" comprehension{test} "}" -> set_comprehension
| NAME -> var
| number | string+
| number
| string_concat
| "(" test ")"
| "..." -> ellipsis
| "None" -> const_none
| "True" -> const_true
| "False" -> const_false

?testlist_comp: test | tuplelist_comp
tuplelist_comp: (test|star_expr) (comp_for | ("," (test|star_expr))+ [","] | ",")

?string_concat: string+

_testlist_comp: test | _tuple_inner
_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",")

?test_or_star_expr: test
| star_expr

?subscriptlist: subscript
| subscript (("," subscript)+ [","] | ",") -> subscript_tuple
subscript: test | ([test] ":" [test] [sliceop]) -> slice
?subscript: test | ([test] ":" [test] [sliceop]) -> slice
sliceop: ":" [test]
exprlist: (expr|star_expr)
| (expr|star_expr) (("," (expr|star_expr))+ [","]|",") -> exprlist_tuple
testlist: test | testlist_tuple
?exprlist: (expr|star_expr)
| (expr|star_expr) (("," (expr|star_expr))+ [","]|",")
?testlist: test | testlist_tuple
testlist_tuple: test (("," test)+ [","] | ",")
dict_comp: key_value comp_for
| (key_value | "**" expr) ("," (key_value | "**" expr))* [","]
_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","]

key_value: test ":" test

set_comp: test comp_for
| (test|star_expr) ("," (test | star_expr))* [","]
_set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","]

classdef: "class" NAME ["(" [arguments] ")"] ":" suite



arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])?
| starargs
| kwargs
| test comp_for
| comprehension{test}

starargs: "*" test ("," "*" test)* ("," argvalue)* ["," kwargs]
starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs]
stararg: "*" test
kwargs: "**" test

?argvalue: test ("=" test)?


comp_iter: comp_for | comp_if | async_for
async_for: "async" "for" exprlist "in" or_test [comp_iter]
comp_for: "for" exprlist "in" or_test [comp_iter]
comp_if: "if" test_nocond [comp_iter]
comprehension{comp_result}: comp_result comp_fors [comp_if]
comp_fors: comp_for+
comp_for: [ASYNC] "for" exprlist "in" or_test
ASYNC: "async"
?comp_if: "if" test_nocond

// not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME

yield_expr: "yield" [yield_arg]
yield_arg: "from" test | testlist

yield_expr: "yield" [testlist]
| "yield" "from" test -> yield_from

number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
string: STRING | LONG_STRING
@@ -181,6 +215,7 @@ string: STRING | LONG_STRING
%import python (NAME, COMMENT, STRING, LONG_STRING)
%import python (DEC_NUMBER, HEX_NUMBER, OCT_NUMBER, BIN_NUMBER, FLOAT_NUMBER, IMAG_NUMBER)


// Other terminals

_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+


+ 3
- 1
examples/standalone/json_parser_main.py View File

@@ -10,7 +10,9 @@ Standalone Parser

import sys

from json_parser import Lark_StandAlone, Transformer, inline_args
from json_parser import Lark_StandAlone, Transformer, v_args

inline_args = v_args(inline=True)

class TreeToJson(Transformer):
@inline_args


+ 2
- 2
lark/ast_utils.py View File

@@ -38,8 +38,8 @@ def create_transformer(ast_module: types.ModuleType, transformer: Optional[Trans
Classes starting with an underscore (`_`) will be skipped.

Parameters:
ast_module - A Python module containing all the subclasses of `ast_utils.Ast`
transformer (Optional[Transformer]) - An initial transformer. Its attributes may be overwritten.
ast_module: A Python module containing all the subclasses of ``ast_utils.Ast``
transformer (Optional[Transformer]): An initial transformer. Its attributes may be overwritten.
"""
t = transformer or Transformer()



+ 23
- 7
lark/exceptions.py View File

@@ -40,8 +40,9 @@ class UnexpectedInput(LarkError):

Used as a base class for the following exceptions:

- ``UnexpectedToken``: The parser received an unexpected token
- ``UnexpectedCharacters``: The lexer encountered an unexpected string
- ``UnexpectedToken``: The parser received an unexpected token
- ``UnexpectedEOF``: The parser expected a token, but the input ended

After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
"""
@@ -135,7 +136,8 @@ class UnexpectedInput(LarkError):


class UnexpectedEOF(ParseError, UnexpectedInput):

"""An exception that is raised by the parser, when the input ends while it still expects a token.
"""
expected: 'List[Token]'

def __init__(self, expected, state=None, terminals_by_name=None):
@@ -158,6 +160,9 @@ class UnexpectedEOF(ParseError, UnexpectedInput):


class UnexpectedCharacters(LexError, UnexpectedInput):
"""An exception that is raised by the lexer, when it cannot match the next
string of characters to any of its terminals.
"""

allowed: Set[str]
considered_tokens: Set[Any]
@@ -199,10 +204,15 @@ class UnexpectedToken(ParseError, UnexpectedInput):
"""An exception that is raised by the parser, when the token it received
doesn't match any valid step forward.

The parser provides an interactive instance through `interactive_parser`,
which is initialized to the point of failture, and can be used for debugging and error handling.
Parameters:
token: The mismatched token
expected: The set of expected tokens
considered_rules: Which rules were considered, to deduce the expected tokens
state: A value representing the parser state. Do not rely on its value or type.
interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failture,
and can be used for debugging and error handling.

see: ``InteractiveParser``.
Note: These parameters are available as attributes of the instance.
"""

expected: Set[str]
@@ -247,8 +257,13 @@ class VisitError(LarkError):
"""VisitError is raised when visitors are interrupted by an exception

It provides the following attributes for inspection:
- obj: the tree node or token it was processing when the exception was raised
- orig_exc: the exception that cause it to fail

Parameters:
rule: the name of the visit rule that failed
obj: the tree-node or token that was being processed
orig_exc: the exception that cause it to fail

Note: These parameters are available as attributes
"""

obj: 'Union[Tree, Token]'
@@ -258,6 +273,7 @@ class VisitError(LarkError):
message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
super(VisitError, self).__init__(message)

self.rule = rule
self.obj = obj
self.orig_exc = orig_exc



+ 7
- 1
lark/lark.py View File

@@ -137,7 +137,7 @@ class LarkOptions(Serialize):
A List of either paths or loader functions to specify from where grammars are imported
source_path
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
**=== End Options ===**
**=== End of Options ===**
"""
if __doc__:
__doc__ += OPTIONS_DOC
@@ -560,6 +560,8 @@ class Lark(Serialize):
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'

When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.

:raises UnexpectedCharacters: In case the lexer cannot find a suitable match.
"""
if not hasattr(self, 'lexer') or dont_ignore:
lexer = self._build_lexer(dont_ignore)
@@ -602,6 +604,10 @@ class Lark(Serialize):
If a transformer is supplied to ``__init__``, returns whatever is the
result of the transformation. Otherwise, returns a Tree instance.

:raises UnexpectedInput: On a parse error, one of these sub-exceptions will rise:
``UnexpectedCharacters``, ``UnexpectedToken``, or ``UnexpectedEOF``.
For convenience, these sub-exceptions also inherit from ``ParserError`` and ``LexerError``.

"""
return self.parser.parse(text, start=start, on_error=on_error)



+ 0
- 1
lark/lexer.py View File

@@ -281,7 +281,6 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes):
return new_terminals, callback



class Scanner:
def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
self.terminals = terminals


+ 123
- 12
lark/load_grammar.py View File

@@ -10,7 +10,7 @@ from numbers import Integral
from contextlib import suppress
from typing import List, Tuple, Union, Callable, Dict, Optional

from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique
from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors
from .lexer import Token, TerminalDef, PatternStr, PatternRE

from .parse_tree_builder import ParseTreeBuilder
@@ -176,27 +176,136 @@ RULES = {
}


# Value 5 keeps the number of states in the lalr parser somewhat minimal
# It isn't optimal, but close to it. See PR #949
SMALL_FACTOR_THRESHOLD = 5
# The Threshold whether repeat via ~ are split up into different rules
# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low,
# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts.
# (See PR #949)
REPEAT_BREAK_THRESHOLD = 50


@inline_args
class EBNF_to_BNF(Transformer_InPlace):
def __init__(self):
self.new_rules = []
self.rules_by_expr = {}
self.rules_cache = {}
self.prefix = 'anon'
self.i = 0
self.rule_options = None

def _add_recurse_rule(self, type_, expr):
if expr in self.rules_by_expr:
return self.rules_by_expr[expr]

new_name = '__%s_%s_%d' % (self.prefix, type_, self.i)
def _name_rule(self, inner):
new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)
self.i += 1
t = NonTerminal(new_name)
tree = ST('expansions', [ST('expansion', [expr]), ST('expansion', [t, expr])])
self.new_rules.append((new_name, tree, self.rule_options))
self.rules_by_expr[expr] = t
return new_name

def _add_rule(self, key, name, expansions):
t = NonTerminal(name)
self.new_rules.append((name, expansions, self.rule_options))
self.rules_cache[key] = t
return t

def _add_recurse_rule(self, type_, expr):
try:
return self.rules_cache[expr]
except KeyError:
new_name = self._name_rule(type_)
t = NonTerminal(new_name)
tree = ST('expansions', [
ST('expansion', [expr]),
ST('expansion', [t, expr])
])
return self._add_rule(expr, new_name, tree)

def _add_repeat_rule(self, a, b, target, atom):
"""Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times.

When called recursively (into target), it repeats atom for x(n) times, where:
x(0) = 1
x(n) = a(n) * x(n-1) + b

Example rule when a=3, b=4:

new_rule: target target target atom atom atom atom

"""
key = (a, b, target, atom)
try:
return self.rules_cache[key]
except KeyError:
new_name = self._name_rule('repeat_a%d_b%d' % (a, b))
tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])
return self._add_rule(key, new_name, tree)

def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):
"""Creates a rule that matches atom 0 to (a*n+b)-1 times.

When target matches n times atom, and target_opt 0 to n-1 times target_opt,

First we generate target * i followed by target_opt, for i from 0 to a-1
These match 0 to n*a - 1 times atom

Then we generate target * a followed by atom * i, for i from 0 to b-1
These match n*a to n*a + b-1 times atom

The created rule will not have any shift/reduce conflicts so that it can be used with lalr

Example rule when a=3, b=4:

new_rule: target_opt
| target target_opt
| target target target_opt

| target target target
| target target target atom
| target target target atom atom
| target target target atom atom atom

"""
key = (a, b, target, atom, "opt")
try:
return self.rules_cache[key]
except KeyError:
new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b))
tree = ST('expansions', [
ST('expansion', [target]*i + [target_opt]) for i in range(a)
] + [
ST('expansion', [target]*a + [atom]*i) for i in range(b)
])
return self._add_rule(key, new_name, tree)

def _generate_repeats(self, rule, mn, mx):
"""Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times.
"""
# For a small number of repeats, we can take the naive approach
if mx < REPEAT_BREAK_THRESHOLD:
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])

# For large repeat values, we break the repetition into sub-rules.
# We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``.
# We then use small_factors to split up mn and diff up into values [(a, b), ...]
# This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt
# to generate a complete rule/expression that matches the corresponding number of repeats
mn_target = rule
for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD):
mn_target = self._add_repeat_rule(a, b, mn_target, rule)
if mx == mn:
return mn_target

diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less
diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD)
diff_target = rule # Match rule 1 times
diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times)
for a, b in diff_factors[:-1]:
diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
diff_target = self._add_repeat_rule(a, b, diff_target, rule)

a, b = diff_factors[-1]
diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)

return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])

def expr(self, rule, op, *args):
if op.value == '?':
empty = ST('expansion', [])
@@ -221,7 +330,9 @@ class EBNF_to_BNF(Transformer_InPlace):
mn, mx = map(int, args)
if mx < mn or mn < 0:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)])

return self._generate_repeats(rule, mn, mx)

assert False, op

def maybe(self, rule):


+ 1
- 1
lark/parser_frontends.py View File

@@ -238,4 +238,4 @@ class MakeParsingFrontend(MakeParsingFrontend):
assert isinstance(parser_conf, ParserConf)
parser_conf.parser_type = self.parser_type
lexer_conf.lexer_type = self.lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options)
return ParsingFrontend(lexer_conf, parser_conf, options)

+ 1
- 1
lark/parsers/lalr_interactive_parser.py View File

@@ -65,7 +65,7 @@ class InteractiveParser(object):
"""Print the output of ``choices()`` in a way that's easier to read."""
out = ["Parser choices:"]
for k, v in self.choices().items():
out.append('\t- %s -> %s' % (k, v))
out.append('\t- %s -> %r' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack))
return '\n'.join(out)



+ 28
- 2
lark/utils.py View File

@@ -163,7 +163,7 @@ def get_regexp_width(expr):
return 1, sre_constants.MAXREPEAT
else:
return 0, sre_constants.MAXREPEAT
###}


@@ -245,7 +245,7 @@ except ImportError:

class FS:
exists = os.path.exists
@staticmethod
def open(name, mode="r", **kwargs):
if atomicwrites and "w" in mode:
@@ -316,3 +316,29 @@ def _serialize(value, memo):
return {key:_serialize(elem, memo) for key, elem in value.items()}
# assert value is None or isinstance(value, (int, float, str, tuple)), value
return value




def small_factors(n, max_factor):
"""
Splits n up into smaller factors and summands <= max_factor.
Returns a list of [(a, b), ...]
so that the following code returns n:

n = 1
for a, b in values:
n = n * a + b

Currently, we also keep a + b <= max_factor, but that might change
"""
assert n >= 0
assert max_factor > 2
if n <= max_factor:
return [(n, 0)]

for a in range(max_factor, 1, -1):
r, b = divmod(n, a)
if a + b <= max_factor:
return small_factors(r, max_factor) + [(a, b)]
assert False, "Failed to factorize %s" % n

+ 48
- 1
tests/test_grammar.py View File

@@ -3,7 +3,7 @@ from __future__ import absolute_import
import sys
from unittest import TestCase, main

from lark import Lark, Token, Tree
from lark import Lark, Token, Tree, ParseError, UnexpectedInput
from lark.load_grammar import GrammarError, GRAMMAR_ERRORS, find_grammar_errors
from lark.load_grammar import FromPackageLoader

@@ -198,6 +198,53 @@ class TestGrammar(TestCase):
x = find_grammar_errors(text)
assert [e.line for e, _s in find_grammar_errors(text)] == [2, 6]

def test_ranged_repeat_terms(self):
g = u"""!start: AAA
AAA: "A"~3
"""
l = Lark(g, parser='lalr')
self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')

g = u"""!start: AABB CC
AABB: "A"~0..2 "B"~2
CC: "C"~1..2
"""
l = Lark(g, parser='lalr')
self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')

def test_ranged_repeat_large(self):
g = u"""!start: "A"~60
"""
l = Lark(g, parser='lalr')
self.assertGreater(len(l.rules), 1, "Expected that more than one rule will be generated")
self.assertEqual(l.parse(u'A' * 60), Tree('start', ["A"] * 60))
self.assertRaises(ParseError, l.parse, u'A' * 59)
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'A' * 61)

g = u"""!start: "A"~15..100
"""
l = Lark(g, parser='lalr')
for i in range(0, 110):
if 15 <= i <= 100:
self.assertEqual(l.parse(u'A' * i), Tree('start', ['A']*i))
else:
self.assertRaises(UnexpectedInput, l.parse, u'A' * i)

# 8191 is a Mersenne prime
g = u"""start: "A"~8191
"""
l = Lark(g, parser='lalr')
self.assertEqual(l.parse(u'A' * 8191), Tree('start', []))
self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190)
self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192)


if __name__ == '__main__':


+ 0
- 20
tests/test_parser.py View File

@@ -2203,27 +2203,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')


def test_ranged_repeat_terms(self):
g = u"""!start: AAA
AAA: "A"~3
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')

g = u"""!start: AABB CC
AABB: "A"~0..2 "B"~2
CC: "C"~1..2
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')

@unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
def test_priority_vs_embedded(self):


Loading…
Cancel
Save