annotate bin/uniq_merge.py @ 158:f5e2211b50bd

do whole line
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 06 Jul 2022 18:00:53 +0100
parents 464d2dfb99c9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
88
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/env python3
158
f5e2211b50bd do whole line
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 88
diff changeset
2 # Merge counts by key from the output of "uniq -c" (or sus) and sort in descending order
88
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 # An alternative to sus when the scale is too big for the initial sort, or if uniq -c already does a lot
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 # of the work
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 # Usage: ... | uniq -c | uniq-merge.py
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 import sys
158
f5e2211b50bd do whole line
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 88
diff changeset
7 from collections import defaultdict
f5e2211b50bd do whole line
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 88
diff changeset
8 s=defaultdict(int)
88
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 for l in sys.stdin:
158
f5e2211b50bd do whole line
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 88
diff changeset
10 (i,d)=l.split(maxsplit=1)
f5e2211b50bd do whole line
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 88
diff changeset
11 s[d]+=int(i)
88
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 for (d,n) in sorted(s.items(),key=lambda j:j[1],reverse=True):
158
f5e2211b50bd do whole line
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 88
diff changeset
13 sys.stdout.write('%5d\t%s'%(n,d))