# HG changeset patch # User Henry S. Thompson # Date 1657126853 -3600 # Node ID f5e2211b50bd3174bc15f7c6df469c98712a0d18 # Parent cac9586291ad919a1a62f5aa0735c02201087e2c do whole line diff -r cac9586291ad -r f5e2211b50bd bin/uniq_merge.py --- a/bin/uniq_merge.py Mon Jul 04 18:14:41 2022 +0100 +++ b/bin/uniq_merge.py Wed Jul 06 18:00:53 2022 +0100 @@ -1,16 +1,13 @@ #!/usr/bin/env python3 -# Merge counts by key from the output of "uniq -c" and sort in descending order +# Merge counts by key from the output of "uniq -c" (or sus) and sort in descending order # An alternative to sus when the scale is too big for the initial sort, or if uniq -c already does a lot # of the work # Usage: ... | uniq -c | uniq-merge.py import sys -s={} +from collections import defaultdict +s=defaultdict(int) for l in sys.stdin: - (i,d)=l.split() - i=int(i) - if d in s: - s[d]+=i - else: - s[d]=i + (i,d)=l.split(maxsplit=1) + s[d]+=int(i) for (d,n) in sorted(s.items(),key=lambda j:j[1],reverse=True): - print('%5d\t%s'%(n,d)) + sys.stdout.write('%5d\t%s'%(n,d))