From eabb86860dfbbf9b20a6f67c316411227849a2d8 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 11 Feb 2017 20:00:35 +0200 Subject: [PATCH] Added parsimonious to benchmarks --- README.md | 4 ++-- docs/json_tutorial.md | 6 ++++-- lark/parsers/lalr_analysis.py | 6 +++++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1e5d655..4586ee0 100644 --- a/README.md +++ b/README.md @@ -75,13 +75,13 @@ These features are planned to be implemented in the near future: This is a feature comparison. For benchmarks vs pyparsing, check out the [JSON tutorial](/docs/json_tutorial.md#conclusion). -| Library | Algorithm | LOC | Grammar | Builds AST +| Library | Algorithm | LOC | Grammar | Builds tree? |:--------|:----------|:----|:--------|:------------ | Lark | Earley/LALR(1) | 0.5K | EBNF+ | Yes! | | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | 4.6K | Yacc-like BNF | No | | [PyParsing](http://pyparsing.wikispaces.com/) | PEG | 5.7K | Parser combinators | No | | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | 3.3K | EBNF-like | No | -| [funcparserlib](https://github.com/vlasovskikh/funcparserlib) | Recursive-Descent | 0.5K | Parser combinators | No | +| [funcparserlib](https://github.com/vlasovskikh/funcparserlib) | Recursive-Descent | 0.5K | Parser combinators | No | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | ? | EBNF | Yes | (*LOC measures lines of code of the parsing algorithm(s), without accompanying files*) diff --git a/docs/json_tutorial.md b/docs/json_tutorial.md index bd7d0bf..be475ba 100644 --- a/docs/json_tutorial.md +++ b/docs/json_tutorial.md @@ -407,10 +407,12 @@ I measured memory consumption using a little script called [memusg](https://gist | Lark - Earley | 36s | 4.3s | 6.2M | 1.2M | | Lark - LALR(1) | 7s | 1.3s | 0.6M | 0.3M | | Lark - LALR(1) tree-less | 4.2s | 1.1s | 0.4M | 0.3M | +|:-----|:-------------|:------------|:----------|:--------- | PyParsing ([Parser](http://pyparsing.wikispaces.com/file/view/jsonParser.py)) | 32s | 4.1s | 0.4M | 0.2M | -| funcparselibr ([Parser](https://github.com/vlasovskikh/funcparserlib/blob/master/funcparserlib/tests/json.py)) | 11s | 1.9s | 0.5M | 0.3M | +| funcparserlib ([Parser](https://github.com/vlasovskikh/funcparserlib/blob/master/funcparserlib/tests/json.py)) | 11s | 1.9s | 0.5M | 0.3M | +| Parsimonious ([Parser](https://gist.githubusercontent.com/reclosedev/5222560/raw/5e97cf7eb62c3a3671885ec170577285e891f7d5/parsimonious_json.py)) | ? | 7s | ? | 1.4M | -I added PyParsing and funcparselib for comparison. They fair pretty well in their memory usage (they don't build a tree), but they can't compete with the run-time speed of LALR(1). +I added a few other parsers for comparison. PyParsing and funcparselib fair pretty well in their memory usage (they don't build a tree), but they can't compete with the run-time speed of LALR(1). These benchmarks are for Lark's alpha version. I already have several optimizations planned that will significantly improve run-time speed. diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 569c6f3..6ed7b4c 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -1,3 +1,4 @@ +import logging from collections import defaultdict, deque from ..utils import classify, classify_bool, bfs, fzset @@ -57,8 +58,9 @@ def update_set(set1, set2): return set1 != copy class GrammarAnalyzer(object): - def __init__(self, rule_tuples, start_symbol): + def __init__(self, rule_tuples, start_symbol, debug=False): self.start_symbol = start_symbol + self.debug = debug rule_tuples = list(rule_tuples) rule_tuples.append(('$root', [start_symbol, '$end'])) rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] @@ -177,6 +179,8 @@ class GrammarAnalyzer(object): for k, v in lookahead.items(): if len(v) > 1: + if self.debug: + logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) for x in v: # XXX resolving shift/reduce into shift, like PLY # Give a proper warning