comparison bin/sort_date.py @ 83:d92bd8527718

use surt instead of trying to create index term by hand
author Henry Thompson <ht@markup.co.uk>
date Sat, 19 Aug 2023 16:33:23 -0400
parents 7bbb14f6e394
children c18c307cc325
comparison
equal deleted inserted replaced
82:7bbb14f6e394 83:d92bd8527718
3 # Recommended to also sed '/GMT$/s/\([^ ]\)GMT$/\1 GMT/' 3 # Recommended to also sed '/GMT$/s/\([^ ]\)GMT$/\1 GMT/'
4 # to fix a common 'bad' timestamp (~ .2% of inputs) 4 # to fix a common 'bad' timestamp (~ .2% of inputs)
5 import email.utils 5 import email.utils
6 import sys 6 import sys
7 from urllib.parse import urlsplit, unquote 7 from urllib.parse import urlsplit, unquote
8 from surt import surt
8 import re 9 import re
9 # Thanks to https://stackoverflow.com/a/8776871 10 # Thanks to https://stackoverflow.com/a/8776871
10 import locale 11 import locale
11 from functools import cmp_to_key 12 from functools import cmp_to_key
12 13
35 try: 36 try:
36 try: 37 try:
37 epoch = email.utils.parsedate_to_datetime(dateTime).timestamp() 38 epoch = email.utils.parsedate_to_datetime(dateTime).timestamp()
38 except OverflowError: 39 except OverflowError:
39 epoch = 32535215999.0 40 epoch = 32535215999.0
41 return (surt(uri),epoch)
40 parts = urlsplit(uri) 42 parts = urlsplit(uri)
41 nl = parts.netloc 43 nl = parts.netloc
42 pq = '?%s'%parts.query if parts.query else ''; 44 pq = '?%s'%parts.query if parts.query else '';
43 #print('nl',nl,file=sys.stderr) 45 #print('nl',nl,file=sys.stderr)
44 if ':' in nl: 46 if ':' in nl: