annotate xldiff.py @ 51:44fea514ca45

foo
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sun, 19 Feb 2023 16:44:06 +0000
parents 27d73fcbd781
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
21
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/python3
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 '''Compare two excel books
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 Courtesy of https://kanoki.org/2019/02/26/compare-two-excel-files-for-difference-using-python/'''
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 import pandas as pd
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 import numpy as np
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 import math, sys
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 def usage():
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 print('''Usage: xldiff.py f1 f2''',file=sys.stderr)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 if len(sys.argv)==3:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 f1=sys.argv[1]
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 f2=sys.argv[2]
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 try:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 with open(f1) as _:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 pass
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 except:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 print("Can't open %s"%f1,file=sys.stderr)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 exit(2)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19 try:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 with open(f2) as _:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 pass
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22 except:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 print("Can't open %s"%f2,file=sys.stderr)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24 exit(2)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 else:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26 usage()
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
27 exit(1)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
28
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
29 def check(s1,s2):
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
30 lv=(s1.columns==s2.columns)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
31 ii=np.where(lv==False)[0]
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
32 for i in ii:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
33 if not(pd.isnull(s1.columns[i]) and
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
34 pd.isnull(s2.columns[i])):
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
35 print('',"0,%s: |%s| ~= |%s|"%(i,
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
36 s1.columns[i],
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
37 s2.columns[i]))
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
38 cv=(s1.values==s2.values)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
39 rows,cols=np.where(cv==False)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
40 for i,j in zip(rows,cols):
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
41 if not(pd.isnull(s1.iloc[i,j]) and
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
42 pd.isnull(s2.iloc[i,j])):
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
43 print('',"%s,%s: |%s| ~= |%s|"%(i+1,j,
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
44 s1.iloc[i,j],
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
45 s2.iloc[i,j]))
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
46
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
47 b1=pd.read_excel(f1,None)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
48 b2=pd.read_excel(f2,None)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
49
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
50 keys=set(b1.keys()).union(set(b2.keys()))
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
51 for k in keys:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
52 if k not in b2:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
53 print("Sheet %s in %s but not in %s"%(k,f1,f2))
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
54 continue
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
55 if k not in b1:
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
56 print("Sheet %s in %s but not in %s"%(k,f2,f1))
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
57 continue
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
58 s1=b1[k]
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
59 s2=b2[k]
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
60 print(k)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
61 if not s1.equals(s2):
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
62 print(" (Sheet not of equal 'shape', may be false positive)")
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
63 check(s1,s2)
27d73fcbd781 simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
64