changeset 21:27d73fcbd781

simple cell-by-cell value comparison of xl spreadsheets
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sun, 31 Jan 2021 19:36:31 +0000
parents b1ec44d254c6
children 69a494ef1a58
files xldiff.py
diffstat 1 files changed, 64 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xldiff.py	Sun Jan 31 19:36:31 2021 +0000
@@ -0,0 +1,64 @@
+#!/usr/bin/python3
+'''Compare two excel books
+   Courtesy of https://kanoki.org/2019/02/26/compare-two-excel-files-for-difference-using-python/'''
+import pandas as pd
+import numpy as np
+import math, sys
+def usage():
+  print('''Usage: xldiff.py f1 f2''',file=sys.stderr)
+
+if len(sys.argv)==3:
+  f1=sys.argv[1]
+  f2=sys.argv[2]
+  try:
+    with open(f1) as _:
+      pass
+  except:
+    print("Can't open %s"%f1,file=sys.stderr)
+    exit(2)
+  try:
+    with open(f2) as _:
+      pass
+  except:
+    print("Can't open %s"%f2,file=sys.stderr)
+    exit(2)
+else:
+  usage()
+  exit(1)
+
+def check(s1,s2):
+  lv=(s1.columns==s2.columns)
+  ii=np.where(lv==False)[0]
+  for i in ii:
+    if not(pd.isnull(s1.columns[i]) and
+           pd.isnull(s2.columns[i])):
+      print('',"0,%s: |%s| ~= |%s|"%(i,
+                                   s1.columns[i],
+                                   s2.columns[i]))
+  cv=(s1.values==s2.values)
+  rows,cols=np.where(cv==False)
+  for i,j in zip(rows,cols):
+    if not(pd.isnull(s1.iloc[i,j]) and
+           pd.isnull(s2.iloc[i,j])):
+      print('',"%s,%s: |%s| ~= |%s|"%(i+1,j,
+                                      s1.iloc[i,j],
+                                      s2.iloc[i,j]))
+
+b1=pd.read_excel(f1,None)
+b2=pd.read_excel(f2,None)
+
+keys=set(b1.keys()).union(set(b2.keys()))
+for k in keys:
+  if k not in b2:
+    print("Sheet %s in %s but not in %s"%(k,f1,f2))
+    continue
+  if k not in b1:
+    print("Sheet %s in %s but not in %s"%(k,f2,f1))
+    continue
+  s1=b1[k]
+  s2=b2[k]
+  print(k)
+  if not s1.equals(s2):
+    print(" (Sheet not of equal 'shape', may be false positive)")
+  check(s1,s2)
+