Mercurial > hg > python
annotate xldiff.py @ 69:157f012ffab7 default tip
from local
author | Henry S Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 17 Jan 2025 15:45:26 +0000 |
parents | 27d73fcbd781 |
children |
rev | line source |
---|---|
21
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 #!/usr/bin/python3 |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 '''Compare two excel books |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 Courtesy of https://kanoki.org/2019/02/26/compare-two-excel-files-for-difference-using-python/''' |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 import pandas as pd |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 import numpy as np |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 import math, sys |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 def usage(): |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 print('''Usage: xldiff.py f1 f2''',file=sys.stderr) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
9 |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
10 if len(sys.argv)==3: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 f1=sys.argv[1] |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
12 f2=sys.argv[2] |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
13 try: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
14 with open(f1) as _: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
15 pass |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
16 except: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
17 print("Can't open %s"%f1,file=sys.stderr) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
18 exit(2) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
19 try: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
20 with open(f2) as _: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
21 pass |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
22 except: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
23 print("Can't open %s"%f2,file=sys.stderr) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
24 exit(2) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
25 else: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
26 usage() |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
27 exit(1) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
28 |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
29 def check(s1,s2): |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
30 lv=(s1.columns==s2.columns) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
31 ii=np.where(lv==False)[0] |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
32 for i in ii: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
33 if not(pd.isnull(s1.columns[i]) and |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
34 pd.isnull(s2.columns[i])): |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
35 print('',"0,%s: |%s| ~= |%s|"%(i, |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
36 s1.columns[i], |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
37 s2.columns[i])) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
38 cv=(s1.values==s2.values) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
39 rows,cols=np.where(cv==False) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
40 for i,j in zip(rows,cols): |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
41 if not(pd.isnull(s1.iloc[i,j]) and |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
42 pd.isnull(s2.iloc[i,j])): |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
43 print('',"%s,%s: |%s| ~= |%s|"%(i+1,j, |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
44 s1.iloc[i,j], |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
45 s2.iloc[i,j])) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
46 |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
47 b1=pd.read_excel(f1,None) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
48 b2=pd.read_excel(f2,None) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
49 |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
50 keys=set(b1.keys()).union(set(b2.keys())) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
51 for k in keys: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
52 if k not in b2: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
53 print("Sheet %s in %s but not in %s"%(k,f1,f2)) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
54 continue |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
55 if k not in b1: |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
56 print("Sheet %s in %s but not in %s"%(k,f2,f1)) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
57 continue |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
58 s1=b1[k] |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
59 s2=b2[k] |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
60 print(k) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
61 if not s1.equals(s2): |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
62 print(" (Sheet not of equal 'shape', may be false positive)") |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
63 check(s1,s2) |
27d73fcbd781
simple cell-by-cell value comparison of xl spreadsheets
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
64 |