changeset 210:6faed9e5d9c9

use 2-digit suffixes, use more care in what we wait for in the outer loop
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 11 Jan 2024 16:43:16 +0000
parents b6669d78a5d9
children 0ffa655efc21
files bin/getcc_multi.aws
diffstat 1 files changed, 3 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/bin/getcc_multi.aws	Fri Dec 08 10:32:07 2023 +0000
+++ b/bin/getcc_multi.aws	Thu Jan 11 16:43:16 2024 +0000
@@ -40,9 +40,8 @@
 mkdir -p $s/orig/warc
 fgrep -v -f <(cd $s/orig/warc && ls *.warc.gz || :) <(fgrep -w $s warc.paths) > /tmp/hst/$s
 
-split -a 1 --numeric-suffixes=1 -n l/$nthreads /tmp/hst/$s /tmp/hst/${s}_
-
-seq 1 $nthreads | while read i
+split -a 2 --numeric-suffixes=1 -n l/$nthreads /tmp/hst/$s /tmp/hst/${s}_
+printf "%02d\n" $(seq 1 $nthreads) | while read i
 do
   cat /tmp/hst/${s}_$i | {
   printf "thread\t%s\t%s\t%s\n" $i $$ $(ps -o pgid= -p "$$") >> errlog_${SEG}_$i
@@ -57,5 +56,5 @@
   sleep 30
   }
 done
-while pgrep -a aws |grep -c s3; do sleep 60; done
+while pgrep -a aws |grep -c "aws s3.*${s}"; do sleep 60; done
 echo $(date) end $SEG