comparison parse.py @ 0:ddd5f7539abc

reconstruct the original more or less
author Henry Thompson <ht@markup.co.uk>
date Fri, 24 Mar 2017 20:27:15 +0000
parents
children 20424d7e99e4
comparison
equal deleted inserted replaced
-1:000000000000 0:ddd5f7539abc
1 ''' Tokenise Excel formulae
2 Starting from regexps and concat rules in
3 https://sites.google.com/site/e90e50/random-topics/tool-for-parsing-formulas-in-excel
4 and
5 parser_formule_with_textbox_v01_2003.xla
6 linked to therein'''
7
8 import sys,re
9
10 cw = "[^=\-+*/();:,.$<>^]"
11 #cWW = "[=\-+*/();:,.$<>^]"
12
13 sListSeparator = ","
14 sRowSeparator = ";"
15
16 # If comma is decimal symbol, then \ is used in place of comma in Array.
17 # If semi-colon is decimal symbol, then \ is used in place of semi-colon
18 # in Array. (Semi-colon is row separator)
19 # See https://www.ablebits.com/office-addins-blog/2015/02/25/array-formulas-functions-excel/
20
21 pats=["\"[^\"]*\"|'[^']*'", ## "'[^']*'!",
22 "\{[^}]+}",
23 sListSeparator,
24 cw + "+(?:\." + cw + "+)*\\(",
25 "\\)",
26 "^=|\\(",
27 "."]
28
29 ## They perform the following tasks, in order:
30 ## 1. Represents a text (delimited by double quotes) or a sheet
31 ## name (delimited by single quotes)
32 ## 2. Represents a constant matrix
33 ## 3. Represents a list (function parameter) separator
34 ## 4. Represents a function name followed by an opening parenthesis
35 ## 5. Represents a closing parenthesis
36 ## 6. Represents the beginning of the formula or an opening
37 ## parenthesis (not part of a function)
38 ## 7. Each characters not matched by the previous patterns
39
40 tokPat=re.compile("("+(")|(".join(pats))+")",re.IGNORECASE)
41
42 f="""=IF(I$1>$C$2,VLOOKUP($C52,GP_input!$C$187:$CM$196,VLOOKUP($C$9,lists!$A$34:$B$39,2,FALSE)+BC$4,FALSE),SUMIF('BEX2011'!$C$32:$C$5000,$B$9&"sl"&$B52&$C52&$B53,'BEX2001'!Q$32))"""
43
44 l=tokPat.findall(f)
45
46 ## Visual basic code:
47 ## set M = RE.Execute(s)
48 ## s = ""
49 ## For Each SM In M
50 ## Set SB = SM.SubMatches
51 ## If Len(SB(0) & SB(6)) Then
52 ## t = SB(0) & SB(6) [HST doesn't understand why atoms
53 ## and strings are concatenated]
54 ## ElseIf Len(SB(1)) Then
55 ## t = Array_Const_Wrap(SB(1), sRowSeparator) & vbCr
56 ## ElseIf Len(SB(2) & SB(5)) Then
57 ## t = SB(2) & SB(5) & vbCr
58 ## ElseIf Len(SB(3)) Then
59 ## t = vbCr & SB(3) & vbCr
60 ## ElseIf Len(SB(4)) Then
61 ## t = vbCr & SB(4)
62 ## End If
63 ## s = s & t
64 ## Next
65
66 def mergeMatches(l):
67 res=""
68 cur=""
69 for txt,cm,sep,ofun,close,opn,misc in l:
70 if txt is not '':
71 res+=txt
72 continue
73 if misc is not '':
74 res+=misc
75 continue
76 if cm is not '':
77 cur=cm
78 elif sep is not '':
79 cur=sep
80 elif ofun is not '':
81 cur=ofun
82 elif close is not '':
83 cur=close
84 elif opn is not '':
85 cur=opn
86 if res is not '':
87 yield res
88 res=''
89 yield cur
90
91 toks=list(mergeMatches(l))