Mercurial > hg > cc > cirrus_work
annotate bin/merge_date.py @ 94:009e633eb804
last version before giving up on approach based only on key and datestamp
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 07 Sep 2023 18:03:55 +0100 |
parents | 25bd398a8035 |
children | 18446a7eeb9e |
rev | line source |
---|---|
89
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 #!/usr/bin/python3 |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 '''Add timestamps from Last-Modified-dated (ks.tsv) files into |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 that year's index |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 Usage: merge_date.py ksvstream cdx-dir outdir |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 ksvstream consists of tab-separated key, CC date and Unix timestamp |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 ''' # ' |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
9 |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
10 import sys, io, os, os.path, time, re |
89
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 from isal import igzip |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
12 |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
13 if sys.argv[1] == '-d': |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
14 sys.argv.pop(1) |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
15 DEBUG = True |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
16 else: |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
17 DEBUG = False |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
18 |
90 | 19 XPATH = "%s/cdx-00%%0.3d.gz"%sys.argv[2] |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
20 NPATH = "%s/cdx-00%%0.3d"%sys.argv[3] |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
21 |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
22 RorDPAT = re.compile(b'", "filename": "crawl-data/[A-Z0-9-]*/segments/[0-9.]*/' |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
23 b'(crawldiagnostics|robotstxt)/') |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
24 SESSION = re.compile(b'([^?]*\?)((cfid=[^&]*&cftoken|' |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
25 b'sid|jsessionid|phpsessid|aspsessionid[a-z]*)' |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
26 b'=[^&]*)') |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
27 |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
28 # Above based on this from fixed Java code: |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
29 #(?:jsessionid=[0-9a-zA-Z]{32})(?:&(.*))?$", "jsessionid=", 1, 2), |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
30 #(?:phpsessid=[0-9a-zA-Z]{32})(?:&(.*))?$", "phpsessid=", 1, 2), |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
31 #(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", "sid=", 1, 2), |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
32 #(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", "aspsessionid", 1, 2), |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
33 #(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", |
89
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
34 |
90 | 35 #print(sys.argv[3],NPATH,file=sys.stderr) |
89
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
36 |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
37 os.makedirs(sys.argv[3], exist_ok=True) |
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
38 |
90 | 39 FN = 0 |
40 | |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
41 XCNT = 0 |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
42 DCNT = 0 |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
43 |
90 | 44 XF = igzip.IGzipFile(filename=XPATH%0) |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
45 NF = open(NN:=(NPATH%0),'wb') |
89
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
46 |
90 | 47 def nextLine(): |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
48 '''Move on to next index file if current has run out''' |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
49 global FN, NF, NPATH, NN, XF, XPATH, XCNT, DCNT |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
50 while True: |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
51 xl=XF.readline() |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
52 XCNT += 1 |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
53 if xl == b'': |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
54 # need to move to next index file |
90 | 55 FN += 1 |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
56 DCNT=0 # this is relative to FN |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
57 XF.close() |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
58 NF.close() |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
59 print(NN, flush=True) # so we can compress it |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
60 time.sleep(0.1) # so it flushes? |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
61 XN=XPATH%FN |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
62 if not os.path.exists(XN): |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
63 return |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
64 XF = igzip.IGzipFile(filename=XN) |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
65 NF = open((NN:=NPATH%FN), 'wb') |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
66 xl = XF.readline() |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
67 XCNT = 1 |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
68 return xl |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
69 |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
70 def keys(key): |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
71 '''Deal with failure of 2019-35-vintage Java fixup to detect |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
72 parameter-part-initial session ids''' |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
73 if m:=SESSION.match(key): |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
74 prefix=m[1] |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
75 e, b = m.span(2) |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
76 fixed=key[:e]+key[b:] |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
77 if fixed==m[1]: |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
78 return True, prefix[:-1], None |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
79 else: |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
80 return True, prefix, fixed |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
81 else: |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
82 return False, key, None |
89
a62580816f1c
merge a stream of ks files with a set of cdx files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
83 |
92
e56a7aad9ce9
attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
91
diff
changeset
|
84 dfq = [] # for reordering if needed |
e56a7aad9ce9
attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
91
diff
changeset
|
85 |
90 | 86 with open(sys.argv[1], 'rb') as df: |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
87 dl = df.readline() |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
88 DCNT = 1 |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
89 dkey, ddate, dtime = dl.split(b'\t') |
90 | 90 |
91 while (xl:=nextLine()) is not None: | |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
92 xkey, xdate, xprops = xl.split(b' ', maxsplit=2) |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
93 messy, xkey1, xkey2 = keys(xkey) |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
94 if messy: |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
95 stale=dfq |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
96 dfq=[] |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
97 while (dkey.startswith(xkey1) and |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
98 (ddate!=xdate or (xkey2 is not None and dkey!=xkey2))): |
92
e56a7aad9ce9
attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
91
diff
changeset
|
99 dfq.append(dl) |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
100 if stale: |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
101 dl = stale.pop(0) |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
102 else: |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
103 dl = df.readline() |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
104 DCNT += 1 |
92
e56a7aad9ce9
attempt at reordering if necessary
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
91
diff
changeset
|
105 dkey, ddate, dtime = dl.split(b'\t') |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
106 if (ddate != xdate or |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
107 not dkey.startswith(xkey1) or |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
108 (xkey2 is not None and dkey!=xkey2)): |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
109 if DEBUG and dkey and xkey.decode('ascii')>(dkey.decode('ascii')): |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
110 print("Fail: xkey: %s\n" |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
111 " dkey: %s\n" |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
112 " xdate: %s\n" |
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
113 " ddate: %s\n" |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
114 "dfq: %s\n" |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
115 "k1, k2: |%s|%s|\n" |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
116 "FN: %s XCNT: %s DCNT: %s\n" |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
117 "xl: %s"%(xkey, dkey, xdate, ddate, |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
118 (b'\n '.join(dfq)).decode('ascii'), |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
119 xkey1, xkey2, FN, XCNT, DCNT, xl), |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
120 file=sys.stderr) |
94
009e633eb804
last version before giving up on approach based only on key and datestamp
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
93
diff
changeset
|
121 raise ValueError |
90 | 122 NF.write(xl) |
123 continue | |
124 NF.write(xkey) | |
125 NF.write(b' ') | |
126 NF.write(xdate) | |
127 NF.write(b' ') | |
128 NF.write(xprops[:-2]) | |
129 NF.write(b', "lastmod": "%d"}\n'%int(dtime[:-3])) | |
130 dl = df.readline() | |
131 if dl == '': | |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
132 if dfq: |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
133 if DEBUG: |
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
134 breakpoint() |
90 | 135 # write out the last of the last index file, if any |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
136 dkey = ddate = "" |
90 | 137 else: |
93
25bd398a8035
improve reordering, still failing on cdx-00004
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
92
diff
changeset
|
138 DCNT += 1 |
91
460f0599e8cd
mostly working, but need to reorder in case of cfid and friends
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
90
diff
changeset
|
139 dkey, ddate, dtime = dl.split(b'\t') |