annotate bin/fix_key.py @ 111:ab3d547f3e76

one uncommited fix from quentin
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 22 Sep 2023 15:27:28 +0100
parents df231c95e4aa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
86
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/python3
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 from percent_encode import percent_encode
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 from urllib.parse import quote, unquote
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 import sys
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 # From RFC-3986:
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 # / "*" / "+" / "," / ";" / "="
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 # But # _is_ escaped in Java surt results
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 # and additionally " \ : < = > ? \ ^ _ ` { | } are not
87
df231c95e4aa final keystroke fixes,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 86
diff changeset
12 # Note also that quote already does _not_ quote - . / _ ~
86
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 # Also, Java surt strips _all_ leading 'www.',
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 # where python3 surt only strips the first one.
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 with open(sys.argv[1],"r") as f:
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 for l in f:
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19 while l.endswith(',www',0,ploc:=l.index(')')):
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 l=l[:ploc-4]+l[ploc:]
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 if '%' in l:
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22 (key,wt,ts)=l.split('\t')
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 sys.stdout.write(quote(unquote(key,errors='percent'),
87
df231c95e4aa final keystroke fixes,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 86
diff changeset
24 safe='!"$&\'()*+,:;<=>?@[\\]^`{|}').lower())
86
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 sys.stdout.write('\t')
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26 sys.stdout.write(wt)
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
27 sys.stdout.write('\t')
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
28 sys.stdout.write(ts)
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
29 else:
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
30 sys.stdout.write(l)
3a2ae6057242 handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
31