From e201beab5fbdd0730c80cfda20c6afbda1df0a3a Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sat, 11 Feb 2017 00:50:48 +0200
Subject: [PATCH] Fixed lexer newline counter and other stuff

---
 docs/json_tutorial.md       | 2 ++
 docs/reference.md           | 6 +++---
 examples/json_parser.py     | 2 +-
 lark/indenter.py            | 8 +++++---
 lark/lexer.py               | 9 ++++++++-
 lark/parsers/lalr_parser.py | 7 +++++--
 6 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/docs/json_tutorial.md b/docs/json_tutorial.md
index bf740cc..734a70b 100644
--- a/docs/json_tutorial.md
+++ b/docs/json_tutorial.md
@@ -411,6 +411,8 @@ I measured memory consumption using a little script called [memusg](https://gist
 
 I added PyParsing for comparison. It fairs pretty well in its memory usage, but it can't compete with the run-time speed of LALR(1).
 
+These benchmarks are for Lark's alpha version. I already have several optimizations planned that will significantly improve run-time speed.
+
 Once again, shout-out to PyPy for being so effective.
 
 ## Afterword
diff --git a/docs/reference.md b/docs/reference.md
index fc2b7af..1c57766 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -101,7 +101,7 @@ Tokens that *will* appear in the tree are:
 
 ## Shaping the tree
 
-1. Rules whose name begins with an underscore will be inlined into their containing rule.
+a. Rules whose name begins with an underscore will be inlined into their containing rule.
 
 Example:
 
@@ -115,7 +115,7 @@ Lark will parse "(hello world)" as:
         "world"
 
 
-2. Rules that recieve a question mark (?) at the beginning of their definition, will be inlined if they have a single child.
+b. Rules that recieve a question mark (?) at the beginning of their definition, will be inlined if they have a single child.
 
 Example:
 
@@ -131,7 +131,7 @@ Lark will parse "hello world (planet)" as:
             "world"
         "planet"
 
-3. Aliases - options in a rule can receive an alias. It will be then used as the branch name for the option.
+c. Aliases - options in a rule can receive an alias. It will be then used as the branch name for the option.
 
 Example:
 
diff --git a/examples/json_parser.py b/examples/json_parser.py
index 485ea52..9e4b952 100644
--- a/examples/json_parser.py
+++ b/examples/json_parser.py
@@ -20,7 +20,7 @@ json_grammar = r"""
     number : /-?\d+(\.\d+)?([eE][+-]?\d+)?/
     string : /".*?(?<!\\)"/
 
-    WS.ignore.newline: /[ \t\n]+/
+    WS.ignore: /[ \t\n]+/
 """
 
 class TreeToJson(Transformer):
diff --git a/lark/indenter.py b/lark/indenter.py
index 59f3c80..594c45f 100644
--- a/lark/indenter.py
+++ b/lark/indenter.py
@@ -11,6 +11,8 @@ class Indenter:
         if (self.paren_level > 0):
             return
 
+        yield token
+
         indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
         indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
 
@@ -20,18 +22,18 @@ class Indenter:
         else:
             while indent < self.indent_level[-1]:
                 self.indent_level.pop()
-                yield Token(self.DEDENT_type, indent_str)
+                yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
 
             assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1])
 
 
     def process(self, stream):
         for token in stream:
-            yield token
-
             if token.type == self.NL_type:
                 for t in self.handle_NL(token):
                     yield t
+            else:
+                yield token
 
             if token.type in self.OPEN_PAREN_types:
                 self.paren_level += 1
diff --git a/lark/lexer.py b/lark/lexer.py
index 1fc6a1c..cd32117 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -15,6 +15,13 @@ class Token(Str):
         inst.value = value
         return inst
 
+    @classmethod
+    def new_borrow_pos(cls, type, value, borrow_t):
+        inst = cls(type, value, borrow_t.pos_in_stream)
+        inst.line = borrow_t.line
+        inst.column = borrow_t.column
+        return inst
+
     def __repr__(self):
         return 'Token(%s, %s)' % (self.type, self.value)
 
@@ -46,7 +53,7 @@ class Lexer(object):
         self.token_types = list(token_names)
         self.type_index = {name:i for i,name in enumerate(self.token_types)}
 
-        self.newline_types = [self.type_index[t[0]] for t in tokens if '\n' in t[1] or '\\n' in t[1]]
+        self.newline_types = [self.type_index[t[0]] for t in tokens if '\n' in t[1] or '\\n' in t[1] or '(?s)' in t[1]]
         self.ignore_types = [self.type_index[t] for t in ignore]
 
         self.mres = self._build_mres(tokens, len(tokens))
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index 57bc5ae..2827925 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -26,9 +26,12 @@ class Parser(object):
                 except IndexError:
                     assert key == '$end'
                     token = seq[-1]
-                raise ParseError("Unexpected input %r at line %d, column %d.\n"
+                raise ParseError("Unexpected input %r at line %s, column %s.\n"
                                  "Expected: %s\n"
-                                 "Context: %s" % (token.value, token.line, token.column, expected, context))
+                                 "Context: %s" % (token.value, 
+                                        getattr(token, 'line', '?'),
+                                        getattr(token, 'column', '?'), 
+                                        expected, context))
 
         def reduce(rule):
             if rule.expansion: