Mercurial > hg > ooxml
diff parse.py @ 1:20424d7e99e4
handle sheet names more carefully
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Fri, 24 Mar 2017 22:41:28 +0000 |
parents | ddd5f7539abc |
children | 263a1935d37d |
line wrap: on
line diff
--- a/parse.py Fri Mar 24 20:27:15 2017 +0000 +++ b/parse.py Fri Mar 24 22:41:28 2017 +0000 @@ -7,7 +7,7 @@ import sys,re -cw = "[^=\-+*/();:,.$<>^]" +cw = "=\-+*/();:,.$<>^!" #cWW = "[=\-+*/();:,.$<>^]" sListSeparator = "," @@ -18,24 +18,26 @@ # in Array. (Semi-colon is row separator) # See https://www.ablebits.com/office-addins-blog/2015/02/25/array-formulas-functions-excel/ -pats=["\"[^\"]*\"|'[^']*'", ## "'[^']*'!", +pats=["\"[^\"]*\"", "\{[^}]+}", sListSeparator, - cw + "+(?:\." + cw + "+)*\\(", + "[^" + cw + "]+(?:\.[^" + cw + "]+)*\\(", "\\)", "^=|\\(", + "'[^']+'!", "."] ## They perform the following tasks, in order: -## 1. Represents a text (delimited by double quotes) or a sheet -## name (delimited by single quotes) +## 1. Represents a text (delimited by double quotes) ## 2. Represents a constant matrix ## 3. Represents a list (function parameter) separator ## 4. Represents a function name followed by an opening parenthesis ## 5. Represents a closing parenthesis ## 6. Represents the beginning of the formula or an opening ## parenthesis (not part of a function) -## 7. Each characters not matched by the previous patterns +## 7. A sheet name (delimited by single quotes) and a ! +## 8. Each characters not matched by the previous patterns + tokPat=re.compile("("+(")|(".join(pats))+")",re.IGNORECASE) @@ -65,13 +67,24 @@ def mergeMatches(l): res="" + rtype=None cur="" - for txt,cm,sep,ofun,close,opn,misc in l: + for txt,cm,sep,ofun,close,opn,sheet,misc in l: if txt is not '': + if res is not '' and rtype is not 1: + yield res + rtype=1 + res='' res+=txt + rtype=1 continue - if misc is not '': - res+=misc + if misc is not '' or sheet is not '': + if res is not '' and rtype is not 2: + yield res + rtype=2 + res='' + res+=misc if misc is not '' else sheet + rtype=2 continue if cm is not '': cur=cm @@ -85,6 +98,7 @@ cur=opn if res is not '': yield res + rtype=None res='' yield cur