Mercurial > hg > cc > cirrus_work
changeset 3:668579197bec
oops, 1.1 was half-modified, bogus
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 19 Jul 2022 11:02:41 +0100 |
parents | b4801f5696b2 |
children | f27061e8a9da |
files | bin/_ex1.sh |
diffstat | 1 files changed, 5 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/_ex1.sh Mon Jul 18 19:22:42 2022 +0100 +++ b/bin/_ex1.sh Tue Jul 19 11:02:41 2022 +0100 @@ -1,6 +1,6 @@ #!/bin/bash # This runs on the compute nodes... -# count top 21 solitary languages in parallel, taking input directly from /work/dc007/dc007/hst/results/$1/cdx_counts/xxx.tsv +# count http vs. https vs. english, chinese in parallel, taking input directly from /work/dc007/dc007/hst/results/$1/cdx_counts/xxx.tsv N=$SLURM_JOB_NUM_NODES n=$SLURM_NTASKS @@ -16,9 +16,8 @@ cc=$1 resdir=$W/$USER/results/$cc/$2 srcdir=$W/hst/results/$cc/cdx_counts -langs=$3 -s1=$4 -sn=$5 +s1=$3 +sn=$4 echo $(date) task $n.$task on $nodename:$N.$node start @@ -26,8 +25,8 @@ doit () { echo $(date) start $1 $task $PARALLEL_SEQ - fgrep ' w ' $srcdir/$1.tsv | awk 'BEGIN { | uniq -c | \ - $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv + fgrep ' w ' $srcdir/$1.tsv | cut -f 3,4 | uniq -c | \ + $W/shared/bin/uniq_merge.py > $resdir/ex1_$1.tsv echo $(date) end $1 $task $PARALLEL_SEQ }