# HG changeset patch # User Henry S. Thompson # Date 1490395288 0 # Node ID 20424d7e99e417d4e91957d2d9782e78311ea222 # Parent ddd5f7539abc984624e3a97733437534d8ab553e handle sheet names more carefully diff -r ddd5f7539abc -r 20424d7e99e4 parse.py --- a/parse.py Fri Mar 24 20:27:15 2017 +0000 +++ b/parse.py Fri Mar 24 22:41:28 2017 +0000 @@ -7,7 +7,7 @@ import sys,re -cw = "[^=\-+*/();:,.$<>^]" +cw = "=\-+*/();:,.$<>^!" #cWW = "[=\-+*/();:,.$<>^]" sListSeparator = "," @@ -18,24 +18,26 @@ # in Array. (Semi-colon is row separator) # See https://www.ablebits.com/office-addins-blog/2015/02/25/array-formulas-functions-excel/ -pats=["\"[^\"]*\"|'[^']*'", ## "'[^']*'!", +pats=["\"[^\"]*\"", "\{[^}]+}", sListSeparator, - cw + "+(?:\." + cw + "+)*\\(", + "[^" + cw + "]+(?:\.[^" + cw + "]+)*\\(", "\\)", "^=|\\(", + "'[^']+'!", "."] ## They perform the following tasks, in order: -## 1. Represents a text (delimited by double quotes) or a sheet -## name (delimited by single quotes) +## 1. Represents a text (delimited by double quotes) ## 2. Represents a constant matrix ## 3. Represents a list (function parameter) separator ## 4. Represents a function name followed by an opening parenthesis ## 5. Represents a closing parenthesis ## 6. Represents the beginning of the formula or an opening ## parenthesis (not part of a function) -## 7. Each characters not matched by the previous patterns +## 7. A sheet name (delimited by single quotes) and a ! +## 8. Each characters not matched by the previous patterns + tokPat=re.compile("("+(")|(".join(pats))+")",re.IGNORECASE) @@ -65,13 +67,24 @@ def mergeMatches(l): res="" + rtype=None cur="" - for txt,cm,sep,ofun,close,opn,misc in l: + for txt,cm,sep,ofun,close,opn,sheet,misc in l: if txt is not '': + if res is not '' and rtype is not 1: + yield res + rtype=1 + res='' res+=txt + rtype=1 continue - if misc is not '': - res+=misc + if misc is not '' or sheet is not '': + if res is not '' and rtype is not 2: + yield res + rtype=2 + res='' + res+=misc if misc is not '' else sheet + rtype=2 continue if cm is not '': cur=cm @@ -85,6 +98,7 @@ cur=opn if res is not '': yield res + rtype=None res='' yield cur