Mercurial > hg > python
comparison xldiff.py @ 21:27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 31 Jan 2021 19:36:31 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
20:b1ec44d254c6 | 21:27d73fcbd781 |
---|---|
1 #!/usr/bin/python3 | |
2 '''Compare two excel books | |
3 Courtesy of https://kanoki.org/2019/02/26/compare-two-excel-files-for-difference-using-python/''' | |
4 import pandas as pd | |
5 import numpy as np | |
6 import math, sys | |
7 def usage(): | |
8 print('''Usage: xldiff.py f1 f2''',file=sys.stderr) | |
9 | |
10 if len(sys.argv)==3: | |
11 f1=sys.argv[1] | |
12 f2=sys.argv[2] | |
13 try: | |
14 with open(f1) as _: | |
15 pass | |
16 except: | |
17 print("Can't open %s"%f1,file=sys.stderr) | |
18 exit(2) | |
19 try: | |
20 with open(f2) as _: | |
21 pass | |
22 except: | |
23 print("Can't open %s"%f2,file=sys.stderr) | |
24 exit(2) | |
25 else: | |
26 usage() | |
27 exit(1) | |
28 | |
29 def check(s1,s2): | |
30 lv=(s1.columns==s2.columns) | |
31 ii=np.where(lv==False)[0] | |
32 for i in ii: | |
33 if not(pd.isnull(s1.columns[i]) and | |
34 pd.isnull(s2.columns[i])): | |
35 print('',"0,%s: |%s| ~= |%s|"%(i, | |
36 s1.columns[i], | |
37 s2.columns[i])) | |
38 cv=(s1.values==s2.values) | |
39 rows,cols=np.where(cv==False) | |
40 for i,j in zip(rows,cols): | |
41 if not(pd.isnull(s1.iloc[i,j]) and | |
42 pd.isnull(s2.iloc[i,j])): | |
43 print('',"%s,%s: |%s| ~= |%s|"%(i+1,j, | |
44 s1.iloc[i,j], | |
45 s2.iloc[i,j])) | |
46 | |
47 b1=pd.read_excel(f1,None) | |
48 b2=pd.read_excel(f2,None) | |
49 | |
50 keys=set(b1.keys()).union(set(b2.keys())) | |
51 for k in keys: | |
52 if k not in b2: | |
53 print("Sheet %s in %s but not in %s"%(k,f1,f2)) | |
54 continue | |
55 if k not in b1: | |
56 print("Sheet %s in %s but not in %s"%(k,f2,f1)) | |
57 continue | |
58 s1=b1[k] | |
59 s2=b2[k] | |
60 print(k) | |
61 if not s1.equals(s2): | |
62 print(" (Sheet not of equal 'shape', may be false positive)") | |
63 check(s1,s2) | |
64 |