@@ -75,13 +75,13 @@ These features are planned to be implemented in the near future: | |||||
This is a feature comparison. For benchmarks vs pyparsing, check out the [JSON tutorial](/docs/json_tutorial.md#conclusion). | This is a feature comparison. For benchmarks vs pyparsing, check out the [JSON tutorial](/docs/json_tutorial.md#conclusion). | ||||
| Library | Algorithm | LOC | Grammar | Builds AST | |||||
| Library | Algorithm | LOC | Grammar | Builds tree? | |||||
|:--------|:----------|:----|:--------|:------------ | |:--------|:----------|:----|:--------|:------------ | ||||
| Lark | Earley/LALR(1) | 0.5K | EBNF+ | Yes! | | | Lark | Earley/LALR(1) | 0.5K | EBNF+ | Yes! | | ||||
| [PLY](http://www.dabeaz.com/ply/) | LALR(1) | 4.6K | Yacc-like BNF | No | | | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | 4.6K | Yacc-like BNF | No | | ||||
| [PyParsing](http://pyparsing.wikispaces.com/) | PEG | 5.7K | Parser combinators | No | | | [PyParsing](http://pyparsing.wikispaces.com/) | PEG | 5.7K | Parser combinators | No | | ||||
| [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | 3.3K | EBNF-like | No | | | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | 3.3K | EBNF-like | No | | ||||
| [funcparserlib](https://github.com/vlasovskikh/funcparserlib) | Recursive-Descent | 0.5K | Parser combinators | No | | |||||
| [funcparserlib](https://github.com/vlasovskikh/funcparserlib) | Recursive-Descent | 0.5K | Parser combinators | No | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | ? | EBNF | Yes | | |||||
(*LOC measures lines of code of the parsing algorithm(s), without accompanying files*) | (*LOC measures lines of code of the parsing algorithm(s), without accompanying files*) | ||||
@@ -407,10 +407,12 @@ I measured memory consumption using a little script called [memusg](https://gist | |||||
| Lark - Earley | 36s | 4.3s | 6.2M | 1.2M | | | Lark - Earley | 36s | 4.3s | 6.2M | 1.2M | | ||||
| Lark - LALR(1) | 7s | 1.3s | 0.6M | 0.3M | | | Lark - LALR(1) | 7s | 1.3s | 0.6M | 0.3M | | ||||
| Lark - LALR(1) tree-less | 4.2s | 1.1s | 0.4M | 0.3M | | | Lark - LALR(1) tree-less | 4.2s | 1.1s | 0.4M | 0.3M | | ||||
|:-----|:-------------|:------------|:----------|:--------- | |||||
| PyParsing ([Parser](http://pyparsing.wikispaces.com/file/view/jsonParser.py)) | 32s | 4.1s | 0.4M | 0.2M | | | PyParsing ([Parser](http://pyparsing.wikispaces.com/file/view/jsonParser.py)) | 32s | 4.1s | 0.4M | 0.2M | | ||||
| funcparselibr ([Parser](https://github.com/vlasovskikh/funcparserlib/blob/master/funcparserlib/tests/json.py)) | 11s | 1.9s | 0.5M | 0.3M | | |||||
| funcparserlib ([Parser](https://github.com/vlasovskikh/funcparserlib/blob/master/funcparserlib/tests/json.py)) | 11s | 1.9s | 0.5M | 0.3M | | |||||
| Parsimonious ([Parser](https://gist.githubusercontent.com/reclosedev/5222560/raw/5e97cf7eb62c3a3671885ec170577285e891f7d5/parsimonious_json.py)) | ? | 7s | ? | 1.4M | | |||||
I added PyParsing and funcparselib for comparison. They fair pretty well in their memory usage (they don't build a tree), but they can't compete with the run-time speed of LALR(1). | |||||
I added a few other parsers for comparison. PyParsing and funcparselib fair pretty well in their memory usage (they don't build a tree), but they can't compete with the run-time speed of LALR(1). | |||||
These benchmarks are for Lark's alpha version. I already have several optimizations planned that will significantly improve run-time speed. | These benchmarks are for Lark's alpha version. I already have several optimizations planned that will significantly improve run-time speed. | ||||
@@ -1,3 +1,4 @@ | |||||
import logging | |||||
from collections import defaultdict, deque | from collections import defaultdict, deque | ||||
from ..utils import classify, classify_bool, bfs, fzset | from ..utils import classify, classify_bool, bfs, fzset | ||||
@@ -57,8 +58,9 @@ def update_set(set1, set2): | |||||
return set1 != copy | return set1 != copy | ||||
class GrammarAnalyzer(object): | class GrammarAnalyzer(object): | ||||
def __init__(self, rule_tuples, start_symbol): | |||||
def __init__(self, rule_tuples, start_symbol, debug=False): | |||||
self.start_symbol = start_symbol | self.start_symbol = start_symbol | ||||
self.debug = debug | |||||
rule_tuples = list(rule_tuples) | rule_tuples = list(rule_tuples) | ||||
rule_tuples.append(('$root', [start_symbol, '$end'])) | rule_tuples.append(('$root', [start_symbol, '$end'])) | ||||
rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] | rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] | ||||
@@ -177,6 +179,8 @@ class GrammarAnalyzer(object): | |||||
for k, v in lookahead.items(): | for k, v in lookahead.items(): | ||||
if len(v) > 1: | if len(v) > 1: | ||||
if self.debug: | |||||
logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) | |||||
for x in v: | for x in v: | ||||
# XXX resolving shift/reduce into shift, like PLY | # XXX resolving shift/reduce into shift, like PLY | ||||
# Give a proper warning | # Give a proper warning | ||||