Mercurial > hg > cc > cirrus_work
annotate bin/fix_key.py @ 86:3a2ae6057242
handle double .www, more keep-me chars
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 28 Aug 2023 21:07:43 +0100 |
parents | |
children | df231c95e4aa |
rev | line source |
---|---|
86
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 #!/usr/bin/python3 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 from percent_encode import percent_encode |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 from urllib.parse import quote, unquote |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 import sys |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 # From RFC-3986: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
9 # / "*" / "+" / "," / ";" / "=" |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
10 # But # _is_ escaped in Java surt results |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 # and additionally " \ : < = > ? \ ^ _ ` { | } are not |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
12 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
13 # Also, Java surt strips _all_ leading 'www.', |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
14 # where python3 surt only strips the first one. |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
15 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
16 with open(sys.argv[1],"r") as f: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
17 for l in f: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
18 while l.endswith(',www',0,ploc:=l.index(')')): |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
19 l=l[:ploc-4]+l[ploc:] |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
20 if '%' in l: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
21 (key,wt,ts)=l.split('\t') |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
22 sys.stdout.write(quote(unquote(key,errors='percent'), |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
23 safe='!"$&\'()*+,/:;<=>?@[\\]^_`{|}').lower()) |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
24 sys.stdout.write('\t') |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
25 sys.stdout.write(wt) |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
26 sys.stdout.write('\t') |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
27 sys.stdout.write(ts) |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
28 else: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
29 sys.stdout.write(l) |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
30 |