diff parse.py @ 1:20424d7e99e4

handle sheet names more carefully
author Henry S. Thompson <ht@markup.co.uk>
date Fri, 24 Mar 2017 22:41:28 +0000
parents ddd5f7539abc
children 263a1935d37d
line wrap: on
line diff
--- a/parse.py	Fri Mar 24 20:27:15 2017 +0000
+++ b/parse.py	Fri Mar 24 22:41:28 2017 +0000
@@ -7,7 +7,7 @@
 
 import sys,re
 
-cw = "[^=\-+*/();:,.$<>^]"
+cw = "=\-+*/();:,.$<>^!"
 #cWW = "[=\-+*/();:,.$<>^]"
 
 sListSeparator = ","
@@ -18,24 +18,26 @@
 #   in Array. (Semi-colon is row separator)
 # See https://www.ablebits.com/office-addins-blog/2015/02/25/array-formulas-functions-excel/
 
-pats=["\"[^\"]*\"|'[^']*'", ##      "'[^']*'!",
+pats=["\"[^\"]*\"",
       "\{[^}]+}",
       sListSeparator,
-      cw + "+(?:\." + cw + "+)*\\(",
+      "[^" + cw + "]+(?:\.[^" + cw + "]+)*\\(",
       "\\)",
       "^=|\\(",
+      "'[^']+'!",
       "."]
 
 ## They perform the following tasks, in order:
-## 1.    Represents a text (delimited by double quotes) or a sheet
-##        name (delimited by single quotes)
+## 1.    Represents a text (delimited by double quotes) 
 ## 2.    Represents a constant matrix
 ## 3.    Represents a list (function parameter) separator
 ## 4.    Represents a function name followed by an opening parenthesis
 ## 5.    Represents a closing parenthesis
 ## 6.    Represents the beginning of the formula or an opening
 ##        parenthesis (not part of a function)
-## 7.    Each characters not matched by the previous patterns
+## 7.    A sheet name (delimited by single quotes) and a !
+## 8.    Each characters not matched by the previous patterns
+
 
 tokPat=re.compile("("+(")|(".join(pats))+")",re.IGNORECASE)
 
@@ -65,13 +67,24 @@
 
 def mergeMatches(l):
   res=""
+  rtype=None
   cur=""
-  for txt,cm,sep,ofun,close,opn,misc in l:
+  for txt,cm,sep,ofun,close,opn,sheet,misc in l:
     if txt is not '':
+      if res is not '' and rtype is not 1:
+        yield res
+        rtype=1
+        res=''
       res+=txt
+      rtype=1
       continue
-    if misc is not '':
-      res+=misc
+    if misc is not '' or sheet is not '':
+      if res is not '' and rtype is not 2:
+        yield res
+        rtype=2
+        res=''
+      res+=misc if misc is not '' else sheet
+      rtype=2
       continue
     if cm is not '':
       cur=cm
@@ -85,6 +98,7 @@
       cur=opn
     if res is not '':
       yield res
+      rtype=None
       res=''
     yield cur