Mercurial > hg > ooxml
changeset 2:263a1935d37d
more aggressive - (non-intl) splitting of refs and names,
leave sheet ! for later
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Sat, 25 Mar 2017 10:02:37 +0000 |
parents | 20424d7e99e4 |
children | 2c115aefde6b |
files | parse.py |
diffstat | 1 files changed, 19 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/parse.py Fri Mar 24 22:41:28 2017 +0000 +++ b/parse.py Sat Mar 25 10:02:37 2017 +0000 @@ -24,7 +24,9 @@ "[^" + cw + "]+(?:\.[^" + cw + "]+)*\\(", "\\)", "^=|\\(", - "'[^']+'!", + "'[^']+'", + "\$?[A-Z]+\$?[0-9]+", + "[a-zA-Z_\\\\][a-zA-Z0-9._]*", "."] ## They perform the following tasks, in order: @@ -36,8 +38,13 @@ ## 6. Represents the beginning of the formula or an opening ## parenthesis (not part of a function) ## 7. A sheet name (delimited by single quotes) and a ! -## 8. Each characters not matched by the previous patterns +## 8. A cell reference +## 9. A name +## 10. Each characters not matched by the previous patterns +# As it stands this combines operators with following refs or vars +# Should we try ref="\$?[A-Z]+\$?[0-9]+" and var=[a-zA-Z_\\][a-zA-Z0-9._]* ? +# (suitably int'lised) tokPat=re.compile("("+(")|(".join(pats))+")",re.IGNORECASE) @@ -69,7 +76,7 @@ res="" rtype=None cur="" - for txt,cm,sep,ofun,close,opn,sheet,misc in l: + for txt,cm,sep,ofun,close,opn,sheet,ref,var,misc in l: if txt is not '': if res is not '' and rtype is not 1: yield res @@ -78,12 +85,12 @@ res+=txt rtype=1 continue - if misc is not '' or sheet is not '': + if misc is not '': if res is not '' and rtype is not 2: yield res rtype=2 res='' - res+=misc if misc is not '' else sheet + res+=misc rtype=2 continue if cm is not '': @@ -96,6 +103,12 @@ cur=close elif opn is not '': cur=opn + elif sheet is not '': + cur=sheet + elif ref is not '': + cur=ref + elif var is not '': + cur=var if res is not '': yield res rtype=None @@ -103,3 +116,4 @@ yield cur toks=list(mergeMatches(l)) +print (''.join(toks))==f