view xldiff.py @ 31:1252f8100c6a

printouts to aid further testing
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 29 Jul 2021 12:44:00 +0100
parents 27d73fcbd781
children
line wrap: on
line source

#!/usr/bin/python3
'''Compare two excel books
   Courtesy of https://kanoki.org/2019/02/26/compare-two-excel-files-for-difference-using-python/'''
import pandas as pd
import numpy as np
import math, sys
def usage():
  print('''Usage: xldiff.py f1 f2''',file=sys.stderr)

if len(sys.argv)==3:
  f1=sys.argv[1]
  f2=sys.argv[2]
  try:
    with open(f1) as _:
      pass
  except:
    print("Can't open %s"%f1,file=sys.stderr)
    exit(2)
  try:
    with open(f2) as _:
      pass
  except:
    print("Can't open %s"%f2,file=sys.stderr)
    exit(2)
else:
  usage()
  exit(1)

def check(s1,s2):
  lv=(s1.columns==s2.columns)
  ii=np.where(lv==False)[0]
  for i in ii:
    if not(pd.isnull(s1.columns[i]) and
           pd.isnull(s2.columns[i])):
      print('',"0,%s: |%s| ~= |%s|"%(i,
                                   s1.columns[i],
                                   s2.columns[i]))
  cv=(s1.values==s2.values)
  rows,cols=np.where(cv==False)
  for i,j in zip(rows,cols):
    if not(pd.isnull(s1.iloc[i,j]) and
           pd.isnull(s2.iloc[i,j])):
      print('',"%s,%s: |%s| ~= |%s|"%(i+1,j,
                                      s1.iloc[i,j],
                                      s2.iloc[i,j]))

b1=pd.read_excel(f1,None)
b2=pd.read_excel(f2,None)

keys=set(b1.keys()).union(set(b2.keys()))
for k in keys:
  if k not in b2:
    print("Sheet %s in %s but not in %s"%(k,f1,f2))
    continue
  if k not in b1:
    print("Sheet %s in %s but not in %s"%(k,f2,f1))
    continue
  s1=b1[k]
  s2=b2[k]
  print(k)
  if not s1.equals(s2):
    print(" (Sheet not of equal 'shape', may be false positive)")
  check(s1,s2)