Mercurial > hg > ooxml
annotate excel.py @ 74:7827e686be75 default tip
refactoring again...
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Wed, 05 Jul 2017 18:26:27 +0100 |
parents | 4bd5de7ac247 |
children |
rev | line source |
---|---|
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
1 #!/usr/bin/python3 |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
2 '''Class model for analysis of Excel spreadsheets''' |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
3 from jsonweb.encode import to_object, dumper |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
4 from jsonweb.decode import from_object, loader |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
5 import json |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
6 import eDecoder |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
7 |
73 | 8 import re |
9 | |
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
10 try: |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
11 string_types=basestring |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
12 except NameError: |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
13 string_types=str |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
14 |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
15 @from_object() |
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
16 @to_object(exclude_nulls=True) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
17 class Book(object): |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
18 def __init__(self,source,sheets=[],formats=[]): |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
19 assert(isinstance(source,string_types)) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
20 self.source=source |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
21 sheets=list(sheets) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
22 assert(all(isinstance(s,Sheet) for s in sheets)) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
23 self.sheets=sheets |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
24 formats=list(formats) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
25 self.formats=formats |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
26 |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
27 def addSheet(self,sheet): |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
28 assert(isinstance(sheet,Sheet)) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
29 self.sheets.append(sheet) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
30 |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
31 @from_object() |
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
32 @to_object(exclude_nulls=True,suppress=["book"]) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
33 class Sheet(object): |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
34 def __init__(self,name,book=None,tables=[],docs=[],misc=[]): |
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
35 assert(isinstance(name,string_types)) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
36 self.name=name |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
37 tables=list(tables) |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
38 assert(all(isinstance(s,Table) for s in tables)) |
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
39 self.tables=tables |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
40 docs=list(docs) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
41 assert(all(isinstance(s,Region) for s in docs)) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
42 self.docs=docs |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
43 misc=list(misc) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
44 assert(all(isinstance(s,Region) for s in misc)) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
45 self.misc=misc |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
46 if book is not None: |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
47 assert(isinstance(book,Book)) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
48 book.addSheet(self) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
49 self.book=book |
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
50 |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
51 def addTable(self,table): |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
52 assert(isinstance(table,Region)) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
53 self.tables.append(table) |
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
54 |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
55 @from_object() |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
56 @to_object(exclude_nulls=True,suppress=["parent"]) |
70
0003fe7b6b67
beginning work on class structure for excel annotation
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
57 class Region(object): |
74 | 58 def __init__(self,name,bbox,parent=None): |
59 assert(isinstance(name,string_types)) | |
60 self.name=name | |
61 assert(isinstance(bbox,string_types)) | |
62 self.bbox=Range(bbox) | |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
63 if parent is not None: |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
64 assert(isinstance(parent,(Region,Sheet))) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
65 self.parent=parent |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
66 if parent is not None: |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
67 parent.addRegion(self) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
68 |
74 | 69 class CompoundRegion(Region): |
70 def __init__(self,name,bbox,content,parent=None): | |
71 Region.__init__(self,name,bbox,parent) | |
72 content=list(content) | |
73 assert(all(isinstance(s,Region) for s in content)) | |
74 self.content=content | |
75 | |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
76 def addRegion(self,content): |
74 | 77 assert(isinstance(content,Region)) |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
78 self.content.append(content) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
79 |
74 | 80 class SimpleRegion(Region): |
81 def __init__(self,name,bbox,hasNulls=False,parent=None): | |
82 Region.__init__(self,name,bbox,parent) | |
83 self.hasNulls=hasNulls | |
84 | |
73 | 85 cell=r'(\$?)([A-Z]+)(\$?)([1-9][0-9]*)' |
86 RANGE=re.compile(cell+(r'(:%s)?'%cell)) | |
87 C=0 # Column | |
88 R=1 # Row | |
89 F=0 # Fixed ($) | |
90 V=1 # Value | |
91 @to_object() | |
92 class Range(object): | |
93 def __init__(self,estr): | |
94 m=RANGE.match(estr) | |
95 if m is None: | |
96 raise ValueError("string %s doesn't represent a range"%estr) | |
97 rgrps=m.groups() | |
98 self.tl=((rgrps[0]=='$',rgrps[1]),((rgrps[2]=='$',int(rgrps[3])))) | |
99 if rgrps[4] is None: | |
100 self.br=None | |
101 self.dim=0 | |
102 self.vertical=None | |
103 else: | |
104 self.br=((rgrps[5]=='$',rgrps[6]),((rgrps[7]=='$',int(rgrps[8])))) | |
105 if self.tl[C][V]==self.br[C][V]: | |
106 self.dim=1 | |
107 self.vertical=True | |
108 elif self.tl[R][V]==self.br[R][V]: | |
109 self.dim=1 | |
110 self.vertical=False | |
111 else: | |
112 self.dim=2 | |
113 self.vertical=None | |
114 | |
115 def __str__(self): | |
116 tls=_cellStr(self.tl[C])+_cellStr(self.tl[R]) | |
117 if self.br is None: | |
118 return tls | |
119 else: | |
120 return '%s:%s'%(tls,_cellStr(self.br[C])+_cellStr(self.br[R])) | |
121 | |
122 def _cellStr(dvp): | |
123 (dollar,val)=dvp | |
124 return '%s%s'%('$' if dollar else '',str(val)) | |
125 | |
126 @from_object() | |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
127 @to_object(exclude_nulls=True,suppress=["sheet"]) |
74 | 128 class Table(CompoundRegion): |
129 def __init__(self,name,bbox,data,shape='mixed',sheet=None,labels=[]): | |
130 data=list(data) | |
131 if sheet is not None: | |
132 assert(isinstance(sheet,Sheet)) | |
133 CompoundRegion.__init__(self,name,bbox,data,sheet) | |
73 | 134 assert(shape in ('columns','rows','mixed')) |
135 self.shape=shape # if columns or rows, that's what correspond to DB columns | |
74 | 136 if labels is not None: |
137 labels=list(labels) | |
138 assert(all(isinstance(s,Label) for s in labels)) | |
139 for l in labels: | |
140 self.addRegion(l) | |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
141 self.labels=labels |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
142 self.data=data |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
143 |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
144 def addLabel(self,label): |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
145 assert(isinstance(label,Label)) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
146 self.labels.append(label) |
74 | 147 Region.addRegion(self,label) |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
148 |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
149 def addData(self,data): |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
150 assert(isinstance(data,Data)) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
151 self.data.append(data) |
74 | 152 Region.addRegion(self.data) |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
153 |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
154 def addRegion(self,region): |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
155 assert(isinstance(region,(Label,Data))) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
156 if isinstance(region,Label): |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
157 self.addLabel(region) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
158 else: |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
159 self.addData(region) |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
160 |
74 | 161 class Label(Region): |
162 '''Abstract base class, see {Compound,Simple}Label''' | |
163 pass | |
164 | |
165 @from_object() | |
166 @to_object(exclude_nulls=True,suppress=["parent"]) | |
167 class CompoundLabel(Label,CompoundRegion): | |
168 def __init__(self,name,bbox,content,parent=None): | |
169 assert(all(isinstance(s,Label) for s in content)) | |
170 CompoundRegion.__init__(self,name,bbox,content,parent) | |
171 | |
172 @from_object() | |
173 @to_object(exclude_nulls=True,suppress=["parent"]) | |
174 class SimpleLabel(Label,SimpleRegion): | |
175 def __init__(self,name,bbox,type='string',parent=None): | |
176 SimpleRegion.__init__(self,name,bbox,parent) | |
177 assert(isinstance(type,string_types)) | |
178 self.type=type | |
179 | |
71
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
180 def lt(filename): |
54bb53434887
begin work on decoder that allows identifiers as keys and values
Henry S. Thompson <ht@markup.co.uk>
parents:
70
diff
changeset
|
181 with open(filename,'r') as js: |
73 | 182 return loader(js.read(),cls=eDecoder.eDecoder) |