Mercurial > hg > cc > azure
changeset 66:b04870ab3035
don't over-count duplicate URIs in multiple properties, produce composite keys instead
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 04 Jun 2020 16:10:55 +0000 |
parents | e1f61f94b196 |
children | 13182e98a1ab |
files | master/src/wecu/sac_schemes.py |
diffstat | 1 files changed, 37 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/master/src/wecu/sac_schemes.py Thu Jun 04 12:08:29 2020 +0000 +++ b/master/src/wecu/sac_schemes.py Thu Jun 04 16:10:55 2020 +0000 @@ -9,7 +9,6 @@ 2: one dictionary indexed by ".".join(keys)''' import sys, json, regex -from collections.abc import Iterable if len(sys.argv)>1 and sys.argv[1]=='-d': sys.argv.pop(1) @@ -28,13 +27,15 @@ EMPTY='' +D={} + def walk(o,f,r,path=None): - '''Apply f to every key+leaf of a json object in region r''' + '''Apply f to every key+leaf of a json object reached via p in region r''' if isinstance(o,dict): for k,v in o.items(): if isinstance(v,dict): walk(v,f,r,(path,k)) - elif isinstance(v,Iterable): + elif isinstance(v,(list,tuple)): walked=False for i in v: if isinstance(i,dict): @@ -47,13 +48,25 @@ if not walked: f(v,k,path,r) else: - f(v,k,path,r) - elif isinstance(o,Iterable): + kk=f(v,k,path,r,o) + if kk is not None: + #print(v,D,kk,file=sys.stderr) + if v in D: + (rr,pp,jj,ss)=D[v] + D[v]=(rr,pp,(jj,k),ss) + else: + D[v]=kk + if D: + for kk in D.values(): + res[kk]=res.get(kk,0)+1 + D.clear() + elif isinstance(o,(list,tuple)): for i in o: walk(i,f,r,path) -def pp(v,k,p,r): - '''Uses nested dictionaries''' +def pp(v,k,p,r,parent=None): + '''Handle a leaf value v, with key k in parent, under path p from r + Uses nested dictionaries''' if isinstance(v,str): m=SCHEME.match(v) if m is not None: @@ -70,8 +83,9 @@ d=d.setdefault(k,dict()) d[s]=d.get(s,0)+1 -def pp_tuple(v,k,p,r): - '''Uses one dict and 4-tuple''' +def pp_tuple(v,k,p,r,parent=None): + '''Handle a leaf value v, with key k in parent, under path p from r + Uses one dict and 4-tuple''' if isinstance(v,str): m=SCHEME.match(v) if m is not None: @@ -84,14 +98,18 @@ if p is not None: assert p[0] is None p=p[1] - k=(r,p,k,s) - res[k]=res.get(k,0)+1 + if parent is None: + res[kk]=res.get(kk,0)+1 + else: + return (r,p,k,s) + SEP='\x00' DOT='.' -def pp_concat(v,k,p,r): - '''Uses one dict and one string''' +def pp_concat(v,k,p,r,parent=None): + '''Handle a leaf value v, with key k in parent, under path p from r + Uses one dict and one string''' if isinstance(v,str): m=SCHEME.match(v) if m is not None: @@ -133,6 +151,9 @@ print(EMPTY,end='\t') else: print(DOT,p,sep=EMPTY,end='\t') + while isinstance(k,tuple): + print(k[1],end='&') + k=k[0] print(k,end='\t') print(s,c,sep='\t') @@ -152,12 +173,15 @@ if len(sys.argv)==2: res=dict() if sys.argv[1]=='1': + print('using tuple',file=sys.stderr) pp=pp_tuple dump=dump_tuple else: + print('using concat',file=sys.stderr) pp=pp_concat dump=dump_concat else: + print('using nested',file=sys.stderr) res=dict((r,dict()) for r in PATHS.keys()) def main():