annotate bin/merge_date.py @ 94:009e633eb804

last version before giving up on approach based only on key and datestamp
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 07 Sep 2023 18:03:55 +0100
parents 25bd398a8035
children 18446a7eeb9e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
89
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/python3
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 '''Add timestamps from Last-Modified-dated (ks.tsv) files into
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 that year's index
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 Usage: merge_date.py ksvstream cdx-dir outdir
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 ksvstream consists of tab-separated key, CC date and Unix timestamp
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 ''' # '
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
10 import sys, io, os, os.path, time, re
89
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 from isal import igzip
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
13 if sys.argv[1] == '-d':
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
14 sys.argv.pop(1)
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
15 DEBUG = True
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
16 else:
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
17 DEBUG = False
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
18
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
19 XPATH = "%s/cdx-00%%0.3d.gz"%sys.argv[2]
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
20 NPATH = "%s/cdx-00%%0.3d"%sys.argv[3]
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
21
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
22 RorDPAT = re.compile(b'", "filename": "crawl-data/[A-Z0-9-]*/segments/[0-9.]*/'
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
23 b'(crawldiagnostics|robotstxt)/')
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
24 SESSION = re.compile(b'([^?]*\?)((cfid=[^&]*&cftoken|'
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
25 b'sid|jsessionid|phpsessid|aspsessionid[a-z]*)'
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
26 b'=[^&]*)')
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
27
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
28 # Above based on this from fixed Java code:
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
29 #(?:jsessionid=[0-9a-zA-Z]{32})(?:&(.*))?$", "jsessionid=", 1, 2),
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
30 #(?:phpsessid=[0-9a-zA-Z]{32})(?:&(.*))?$", "phpsessid=", 1, 2),
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
31 #(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", "sid=", 1, 2),
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
32 #(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", "aspsessionid", 1, 2),
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
33 #(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$",
89
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
34
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
35 #print(sys.argv[3],NPATH,file=sys.stderr)
89
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
36
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
37 os.makedirs(sys.argv[3], exist_ok=True)
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
38
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
39 FN = 0
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
40
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
41 XCNT = 0
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
42 DCNT = 0
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
43
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
44 XF = igzip.IGzipFile(filename=XPATH%0)
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
45 NF = open(NN:=(NPATH%0),'wb')
89
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
46
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
47 def nextLine():
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
48 '''Move on to next index file if current has run out'''
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
49 global FN, NF, NPATH, NN, XF, XPATH, XCNT, DCNT
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
50 while True:
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
51 xl=XF.readline()
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
52 XCNT += 1
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
53 if xl == b'':
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
54 # need to move to next index file
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
55 FN += 1
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
56 DCNT=0 # this is relative to FN
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
57 XF.close()
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
58 NF.close()
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
59 print(NN, flush=True) # so we can compress it
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
60 time.sleep(0.1) # so it flushes?
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
61 XN=XPATH%FN
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
62 if not os.path.exists(XN):
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
63 return
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
64 XF = igzip.IGzipFile(filename=XN)
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
65 NF = open((NN:=NPATH%FN), 'wb')
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
66 xl = XF.readline()
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
67 XCNT = 1
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
68 return xl
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
69
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
70 def keys(key):
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
71 '''Deal with failure of 2019-35-vintage Java fixup to detect
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
72 parameter-part-initial session ids'''
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
73 if m:=SESSION.match(key):
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
74 prefix=m[1]
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
75 e, b = m.span(2)
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
76 fixed=key[:e]+key[b:]
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
77 if fixed==m[1]:
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
78 return True, prefix[:-1], None
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
79 else:
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
80 return True, prefix, fixed
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
81 else:
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
82 return False, key, None
89
a62580816f1c merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
83
92
e56a7aad9ce9 attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 91
diff changeset
84 dfq = [] # for reordering if needed
e56a7aad9ce9 attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 91
diff changeset
85
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
86 with open(sys.argv[1], 'rb') as df:
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
87 dl = df.readline()
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
88 DCNT = 1
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
89 dkey, ddate, dtime = dl.split(b'\t')
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
91 while (xl:=nextLine()) is not None:
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
92 xkey, xdate, xprops = xl.split(b' ', maxsplit=2)
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
93 messy, xkey1, xkey2 = keys(xkey)
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
94 if messy:
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
95 stale=dfq
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
96 dfq=[]
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
97 while (dkey.startswith(xkey1) and
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
98 (ddate!=xdate or (xkey2 is not None and dkey!=xkey2))):
92
e56a7aad9ce9 attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 91
diff changeset
99 dfq.append(dl)
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
100 if stale:
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
101 dl = stale.pop(0)
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
102 else:
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
103 dl = df.readline()
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
104 DCNT += 1
92
e56a7aad9ce9 attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 91
diff changeset
105 dkey, ddate, dtime = dl.split(b'\t')
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
106 if (ddate != xdate or
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
107 not dkey.startswith(xkey1) or
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
108 (xkey2 is not None and dkey!=xkey2)):
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
109 if DEBUG and dkey and xkey.decode('ascii')>(dkey.decode('ascii')):
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
110 print("Fail: xkey: %s\n"
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
111 " dkey: %s\n"
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
112 " xdate: %s\n"
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
113 " ddate: %s\n"
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
114 "dfq: %s\n"
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
115 "k1, k2: |%s|%s|\n"
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
116 "FN: %s XCNT: %s DCNT: %s\n"
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
117 "xl: %s"%(xkey, dkey, xdate, ddate,
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
118 (b'\n '.join(dfq)).decode('ascii'),
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
119 xkey1, xkey2, FN, XCNT, DCNT, xl),
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
120 file=sys.stderr)
94
009e633eb804 last version before giving up on approach based only on key and datestamp
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 93
diff changeset
121 raise ValueError
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
122 NF.write(xl)
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
123 continue
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
124 NF.write(xkey)
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
125 NF.write(b' ')
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
126 NF.write(xdate)
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
127 NF.write(b' ')
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
128 NF.write(xprops[:-2])
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
129 NF.write(b', "lastmod": "%d"}\n'%int(dtime[:-3]))
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
130 dl = df.readline()
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
131 if dl == '':
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
132 if dfq:
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
133 if DEBUG:
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
134 breakpoint()
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
135 # write out the last of the last index file, if any
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
136 dkey = ddate = ""
90
c1a70532444c flip loops
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 89
diff changeset
137 else:
93
25bd398a8035 improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 92
diff changeset
138 DCNT += 1
91
460f0599e8cd mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 90
diff changeset
139 dkey, ddate, dtime = dl.split(b'\t')