changeset 3:668579197bec

oops, 1.1 was half-modified, bogus
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 19 Jul 2022 11:02:41 +0100
parents b4801f5696b2
children f27061e8a9da
files bin/_ex1.sh
diffstat 1 files changed, 5 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/bin/_ex1.sh	Mon Jul 18 19:22:42 2022 +0100
+++ b/bin/_ex1.sh	Tue Jul 19 11:02:41 2022 +0100
@@ -1,6 +1,6 @@
 #!/bin/bash
 # This runs on the compute nodes...
-# count top 21 solitary languages in parallel, taking input directly from /work/dc007/dc007/hst/results/$1/cdx_counts/xxx.tsv
+# count http vs. https vs. english, chinese in parallel, taking input directly from /work/dc007/dc007/hst/results/$1/cdx_counts/xxx.tsv
 
 N=$SLURM_JOB_NUM_NODES
 n=$SLURM_NTASKS
@@ -16,9 +16,8 @@
 cc=$1
 resdir=$W/$USER/results/$cc/$2
 srcdir=$W/hst/results/$cc/cdx_counts
-langs=$3
-s1=$4
-sn=$5
+s1=$3
+sn=$4
 
 echo $(date) task $n.$task on $nodename:$N.$node start
 
@@ -26,8 +25,8 @@
 
 doit () {
  echo $(date) start $1 $task $PARALLEL_SEQ
- fgrep '	w	' $srcdir/$1.tsv | awk 'BEGIN { | uniq -c | \
-    $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv
+ fgrep '	w	' $srcdir/$1.tsv | cut -f 3,4 | uniq -c | \
+    $W/shared/bin/uniq_merge.py > $resdir/ex1_$1.tsv
  echo $(date) end $1 $task $PARALLEL_SEQ 
 }