comparison xldiff.py @ 21:27d73fcbd781

simple cell-by-cell value comparison of xl spreadsheets
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sun, 31 Jan 2021 19:36:31 +0000
parents
children
comparison
equal deleted inserted replaced
20:b1ec44d254c6 21:27d73fcbd781
1 #!/usr/bin/python3
2 '''Compare two excel books
3 Courtesy of https://kanoki.org/2019/02/26/compare-two-excel-files-for-difference-using-python/'''
4 import pandas as pd
5 import numpy as np
6 import math, sys
7 def usage():
8 print('''Usage: xldiff.py f1 f2''',file=sys.stderr)
9
10 if len(sys.argv)==3:
11 f1=sys.argv[1]
12 f2=sys.argv[2]
13 try:
14 with open(f1) as _:
15 pass
16 except:
17 print("Can't open %s"%f1,file=sys.stderr)
18 exit(2)
19 try:
20 with open(f2) as _:
21 pass
22 except:
23 print("Can't open %s"%f2,file=sys.stderr)
24 exit(2)
25 else:
26 usage()
27 exit(1)
28
29 def check(s1,s2):
30 lv=(s1.columns==s2.columns)
31 ii=np.where(lv==False)[0]
32 for i in ii:
33 if not(pd.isnull(s1.columns[i]) and
34 pd.isnull(s2.columns[i])):
35 print('',"0,%s: |%s| ~= |%s|"%(i,
36 s1.columns[i],
37 s2.columns[i]))
38 cv=(s1.values==s2.values)
39 rows,cols=np.where(cv==False)
40 for i,j in zip(rows,cols):
41 if not(pd.isnull(s1.iloc[i,j]) and
42 pd.isnull(s2.iloc[i,j])):
43 print('',"%s,%s: |%s| ~= |%s|"%(i+1,j,
44 s1.iloc[i,j],
45 s2.iloc[i,j]))
46
47 b1=pd.read_excel(f1,None)
48 b2=pd.read_excel(f2,None)
49
50 keys=set(b1.keys()).union(set(b2.keys()))
51 for k in keys:
52 if k not in b2:
53 print("Sheet %s in %s but not in %s"%(k,f1,f2))
54 continue
55 if k not in b1:
56 print("Sheet %s in %s but not in %s"%(k,f2,f1))
57 continue
58 s1=b1[k]
59 s2=b2[k]
60 print(k)
61 if not s1.equals(s2):
62 print(" (Sheet not of equal 'shape', may be false positive)")
63 check(s1,s2)
64