annotate char_hist.py @ 63:721bd7a04efb

bigger regexp building
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 14 Dec 2023 10:16:28 +0000
parents 44fea514ca45
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
51
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/python3
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 import sys
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 h={}
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 mc=0
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 nonAscii=0
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 if len(sys.argv)>1:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 showMe=[int(a) for a in sys.argv[1:]]
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 else:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 showMe=[]
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 def main():
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 global h, mc, nonAscii, showMe
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 for l in sys.stdin:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 na=0
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 sm=0
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 for c in l:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19 h[c]=h.get(c,0)+1
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 o=ord(c)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 sm+=(o in showMe)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22 na+=(o>127)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 if o>mc:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24 mc=o
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 if na>0:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26 nonAscii+=1
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
27 if sm>0:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
28 sys.stderr.write(l)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
29
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
30 for i in range(128):
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
31 print(i,chr(i),h.get(chr(i),0))
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
32 print('-------')
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
33 if nonAscii>0:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
34 print("%s lines with one or more non-ascii characters"%nonAscii)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
35 for i in range(128,mc+1):
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
36 c=chr(i)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
37 if c in h:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
38 print(i,c,h[c])
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
39
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
40 if __name__ == '__main__':
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
41 main()
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
42
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
43