annotate bin/uniq_merge.py @ 143:ddff993994be
too clever by half, keys won't work in parallel for e.g. media types
author |
Henry S. Thompson <ht@inf.ed.ac.uk> |
date |
Wed, 20 Oct 2021 15:47:55 +0000 |
parents |
464d2dfb99c9 |
children |
f5e2211b50bd |
rev |
line source |
88
|
1 #!/usr/bin/env python3
|
|
2 # Merge counts by key from the output of "uniq -c" and sort in descending order
|
|
3 # An alternative to sus when the scale is too big for the initial sort, or if uniq -c already does a lot
|
|
4 # of the work
|
|
5 # Usage: ... | uniq -c | uniq-merge.py
|
|
6 import sys
|
|
7 s={}
|
|
8 for l in sys.stdin:
|
|
9 (i,d)=l.split()
|
|
10 i=int(i)
|
|
11 if d in s:
|
|
12 s[d]+=i
|
|
13 else:
|
|
14 s[d]=i
|
|
15 for (d,n) in sorted(s.items(),key=lambda j:j[1],reverse=True):
|
|
16 print('%5d\t%s'%(n,d))
|