changeset 2:263a1935d37d

more aggressive - (non-intl) splitting of refs and names, leave sheet ! for later
author Henry S. Thompson <ht@markup.co.uk>
date Sat, 25 Mar 2017 10:02:37 +0000
parents 20424d7e99e4
children 2c115aefde6b
files parse.py
diffstat 1 files changed, 19 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/parse.py	Fri Mar 24 22:41:28 2017 +0000
+++ b/parse.py	Sat Mar 25 10:02:37 2017 +0000
@@ -24,7 +24,9 @@
       "[^" + cw + "]+(?:\.[^" + cw + "]+)*\\(",
       "\\)",
       "^=|\\(",
-      "'[^']+'!",
+      "'[^']+'",
+      "\$?[A-Z]+\$?[0-9]+",
+      "[a-zA-Z_\\\\][a-zA-Z0-9._]*",
       "."]
 
 ## They perform the following tasks, in order:
@@ -36,8 +38,13 @@
 ## 6.    Represents the beginning of the formula or an opening
 ##        parenthesis (not part of a function)
 ## 7.    A sheet name (delimited by single quotes) and a !
-## 8.    Each characters not matched by the previous patterns
+## 8.    A cell reference
+## 9.    A name
+## 10.   Each characters not matched by the previous patterns
 
+# As it stands this combines operators with following refs or vars
+# Should we try ref="\$?[A-Z]+\$?[0-9]+" and var=[a-zA-Z_\\][a-zA-Z0-9._]* ?
+#  (suitably int'lised)
 
 tokPat=re.compile("("+(")|(".join(pats))+")",re.IGNORECASE)
 
@@ -69,7 +76,7 @@
   res=""
   rtype=None
   cur=""
-  for txt,cm,sep,ofun,close,opn,sheet,misc in l:
+  for txt,cm,sep,ofun,close,opn,sheet,ref,var,misc in l:
     if txt is not '':
       if res is not '' and rtype is not 1:
         yield res
@@ -78,12 +85,12 @@
       res+=txt
       rtype=1
       continue
-    if misc is not '' or sheet is not '':
+    if misc is not '':
       if res is not '' and rtype is not 2:
         yield res
         rtype=2
         res=''
-      res+=misc if misc is not '' else sheet
+      res+=misc
       rtype=2
       continue
     if cm is not '':
@@ -96,6 +103,12 @@
       cur=close
     elif opn is not '':
       cur=opn
+    elif sheet is not '':
+      cur=sheet
+    elif ref is not '':
+      cur=ref
+    elif var is not '':
+      cur=var
     if res is not '':
       yield res
       rtype=None
@@ -103,3 +116,4 @@
     yield cur
 
 toks=list(mergeMatches(l))
+print (''.join(toks))==f