Mercurial > hg > ooxml
comparison parse.py @ 0:ddd5f7539abc
reconstruct the original more or less
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Fri, 24 Mar 2017 20:27:15 +0000 |
parents | |
children | 20424d7e99e4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ddd5f7539abc |
---|---|
1 ''' Tokenise Excel formulae | |
2 Starting from regexps and concat rules in | |
3 https://sites.google.com/site/e90e50/random-topics/tool-for-parsing-formulas-in-excel | |
4 and | |
5 parser_formule_with_textbox_v01_2003.xla | |
6 linked to therein''' | |
7 | |
8 import sys,re | |
9 | |
10 cw = "[^=\-+*/();:,.$<>^]" | |
11 #cWW = "[=\-+*/();:,.$<>^]" | |
12 | |
13 sListSeparator = "," | |
14 sRowSeparator = ";" | |
15 | |
16 # If comma is decimal symbol, then \ is used in place of comma in Array. | |
17 # If semi-colon is decimal symbol, then \ is used in place of semi-colon | |
18 # in Array. (Semi-colon is row separator) | |
19 # See https://www.ablebits.com/office-addins-blog/2015/02/25/array-formulas-functions-excel/ | |
20 | |
21 pats=["\"[^\"]*\"|'[^']*'", ## "'[^']*'!", | |
22 "\{[^}]+}", | |
23 sListSeparator, | |
24 cw + "+(?:\." + cw + "+)*\\(", | |
25 "\\)", | |
26 "^=|\\(", | |
27 "."] | |
28 | |
29 ## They perform the following tasks, in order: | |
30 ## 1. Represents a text (delimited by double quotes) or a sheet | |
31 ## name (delimited by single quotes) | |
32 ## 2. Represents a constant matrix | |
33 ## 3. Represents a list (function parameter) separator | |
34 ## 4. Represents a function name followed by an opening parenthesis | |
35 ## 5. Represents a closing parenthesis | |
36 ## 6. Represents the beginning of the formula or an opening | |
37 ## parenthesis (not part of a function) | |
38 ## 7. Each characters not matched by the previous patterns | |
39 | |
40 tokPat=re.compile("("+(")|(".join(pats))+")",re.IGNORECASE) | |
41 | |
42 f="""=IF(I$1>$C$2,VLOOKUP($C52,GP_input!$C$187:$CM$196,VLOOKUP($C$9,lists!$A$34:$B$39,2,FALSE)+BC$4,FALSE),SUMIF('BEX2011'!$C$32:$C$5000,$B$9&"sl"&$B52&$C52&$B53,'BEX2001'!Q$32))""" | |
43 | |
44 l=tokPat.findall(f) | |
45 | |
46 ## Visual basic code: | |
47 ## set M = RE.Execute(s) | |
48 ## s = "" | |
49 ## For Each SM In M | |
50 ## Set SB = SM.SubMatches | |
51 ## If Len(SB(0) & SB(6)) Then | |
52 ## t = SB(0) & SB(6) [HST doesn't understand why atoms | |
53 ## and strings are concatenated] | |
54 ## ElseIf Len(SB(1)) Then | |
55 ## t = Array_Const_Wrap(SB(1), sRowSeparator) & vbCr | |
56 ## ElseIf Len(SB(2) & SB(5)) Then | |
57 ## t = SB(2) & SB(5) & vbCr | |
58 ## ElseIf Len(SB(3)) Then | |
59 ## t = vbCr & SB(3) & vbCr | |
60 ## ElseIf Len(SB(4)) Then | |
61 ## t = vbCr & SB(4) | |
62 ## End If | |
63 ## s = s & t | |
64 ## Next | |
65 | |
66 def mergeMatches(l): | |
67 res="" | |
68 cur="" | |
69 for txt,cm,sep,ofun,close,opn,misc in l: | |
70 if txt is not '': | |
71 res+=txt | |
72 continue | |
73 if misc is not '': | |
74 res+=misc | |
75 continue | |
76 if cm is not '': | |
77 cur=cm | |
78 elif sep is not '': | |
79 cur=sep | |
80 elif ofun is not '': | |
81 cur=ofun | |
82 elif close is not '': | |
83 cur=close | |
84 elif opn is not '': | |
85 cur=opn | |
86 if res is not '': | |
87 yield res | |
88 res='' | |
89 yield cur | |
90 | |
91 toks=list(mergeMatches(l)) |