Mercurial > hg > cc > cirrus_work
annotate bin/fix_key.py @ 111:ab3d547f3e76
one uncommited fix from quentin
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 22 Sep 2023 15:27:28 +0100 |
parents | df231c95e4aa |
children |
rev | line source |
---|---|
86
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 #!/usr/bin/python3 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 from percent_encode import percent_encode |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 from urllib.parse import quote, unquote |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 import sys |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 # From RFC-3986: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
9 # / "*" / "+" / "," / ";" / "=" |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
10 # But # _is_ escaped in Java surt results |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 # and additionally " \ : < = > ? \ ^ _ ` { | } are not |
87 | 12 # Note also that quote already does _not_ quote - . / _ ~ |
86
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
13 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
14 # Also, Java surt strips _all_ leading 'www.', |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
15 # where python3 surt only strips the first one. |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
16 |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
17 with open(sys.argv[1],"r") as f: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
18 for l in f: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
19 while l.endswith(',www',0,ploc:=l.index(')')): |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
20 l=l[:ploc-4]+l[ploc:] |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
21 if '%' in l: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
22 (key,wt,ts)=l.split('\t') |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
23 sys.stdout.write(quote(unquote(key,errors='percent'), |
87 | 24 safe='!"$&\'()*+,:;<=>?@[\\]^`{|}').lower()) |
86
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
25 sys.stdout.write('\t') |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
26 sys.stdout.write(wt) |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
27 sys.stdout.write('\t') |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
28 sys.stdout.write(ts) |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
29 else: |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
30 sys.stdout.write(l) |
3a2ae6057242
handle double .www, more keep-me chars
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
31 |