comparison bin/getcc_multi.aws @ 171:143d2c6d56da

cross-language confusion :-)
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 15 Nov 2023 09:36:23 +0000
parents 348f4a31228f
children 10c87f5c704d
comparison
equal deleted inserted replaced
170:4870e14ec237 171:143d2c6d56da
35 # max_concurrent_requests = 1 35 # max_concurrent_requests = 1
36 # multipart_chunksize = 32MB 36 # multipart_chunksize = 32MB
37 37
38 s=$(grep -Eow "[0-9]*\.$SEG" $wf | head -1) 38 s=$(grep -Eow "[0-9]*\.$SEG" $wf | head -1)
39 mkdir -p $s/orig/warc 39 mkdir -p $s/orig/warc
40 fgrep -v -f <(cd $s/orig/warc && ls *.warc.gz || pass) <(fgrep -w $s warc.paths) > /tmp/hst/$s 40 fgrep -v -f <(cd $s/orig/warc && ls *.warc.gz || :) <(fgrep -w $s warc.paths) > /tmp/hst/$s
41 41
42 split -a 1 --numeric-suffixes=1 -n l/$nthreads /tmp/hst/$s /tmp/hst/${s}_ 42 split -a 1 --numeric-suffixes=1 -n l/$nthreads /tmp/hst/$s /tmp/hst/${s}_
43 43
44 seq 1 $nthreads | while read i 44 seq 1 $nthreads | while read i
45 do 45 do