changeset 17:b976a7449d41

sic
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 27 Feb 2020 13:24:19 +0000
parents 47ef882acbec
children 6662a353379a
files bin/hist bin/plinksRedo.sh bin/plinksRedoMaster.sh plinksRedoJob.sh
diffstat 4 files changed, 116 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/hist	Thu Feb 27 13:24:19 2020 +0000
@@ -0,0 +1,88 @@
+#!/bin/sh
+# Usage: hist file bins [-max n] [-min n] [-ll n] [-t title] [-ntile n]
+file=$1
+shift
+nb=$1
+shift
+date=`date`
+while
+  case "x$1" in
+   x-max) shift; max=$1 ;;
+   x-ll) shift; ll=$1 ;;
+   x-min) shift; min=$1 ;;
+   x-t) shift; title=$1 ;;
+   x-ntile) shift; ntile=$1 ;;
+   x) break ;;
+   *) echo hunh 1>&2; exit 1 ;;
+  esac
+ do
+  shift
+done
+if [ \( -z "$max" -o -z "$min" \) ]
+ then
+  rfile=/tmp/hist$$
+  sort -n $file > $rfile
+fi
+perl -e "
+(\$max,\$min,\$nb,\$ll,\$title,\$ntile)=(${max:-`tail -1 /tmp/hist$$`},
+                    ${min:-`head -1 /tmp/hist$$`}
+                   ,$nb,${ll:-80},\"${title:-$file}\",${ntile:-0});"'
+$#bins=$nb;
+$range=$max-$min;
+while (<>) {
+  if ($_<$min) {
+    $minn+=1;
+  }
+  elsif ($_>$max) {
+    $maxn+=1;
+  }
+  else {
+    $n+=1;
+    $i=(($_-$min)/$range)*$nb;
+    if ($i==$nb) {$bins[$i-1]+=1} else {$bins[int($i)]+=1};
+  };
+};
+$start=$min+$range/(2*$nb);
+$step=$range/$nb;
+foreach $i (0..$nb-1) {
+  $mb=$bins[$i] if ($bins[$i]>$mb);
+};
+$bscale=$mb/($ll-16);
+$bscale=1 if ($bscale<1);
+print "$title ",`date`,"\n";
+print "    n     min    max    width  maxcnt bscale";
+if ($ntile) {
+  print "  ntiles  tilew";
+  $tile=$tstep=($n/$ntile);
+  $ntile--;
+}
+print "\n";
+printf("%7d%7.3f %7.3f%7.3f%6d  %7.3f",$n,$min,$max,$step,$mb,$bscale);
+printf("%5d   %7.2f",$ntile+1,$tstep) if ($ntile);
+print "\n\n";
+if ($minn) {printf("<%7.3f%7d\n",$min,$minn)};
+foreach $i (0..$nb-1) {
+  $n=$bins[$i];
+  if ($ntile) {
+    $cum+=$n;
+    if ($cum>=$tile) {
+      print "-"; $tile+=$tstep; $ntile--;
+    }
+    else {
+      print " ";
+    };
+  }
+  else {
+    print " ";
+  };
+  printf("%7.3f%7d ",$start+($i*$step),$bins[$i]);
+  $nx=$bins[$i]/$bscale;
+  $nx=1 if (($nx<1) && ($bins[$i]>0));
+  print "*" x $nx; print "\n";
+};
+if ($maxn) {printf(">%7.3f%7d\n",$max,$maxn)};' ${rfile:-$file}
+if [ "$rfile" ]
+ then
+  rm $rfile
+fi
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/plinksRedo.sh	Thu Feb 27 13:24:19 2020 +0000
@@ -0,0 +1,8 @@
+#!/usr/bin/bash
+module load miniconda/python3
+echo $(date) $(hostname)
+h=$(hostname)
+hn=${h##*n}
+cat /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/redo_$hn |\
+parallel --will-cite -j 48 -N 1 bin/doPlinks.sh ${hn} '{#}' '{}'
+echo $(date) $(hostname) $?
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/plinksRedoMaster.sh	Thu Feb 27 13:24:19 2020 +0000
@@ -0,0 +1,5 @@
+#!/bin/bash
+# This runs on 1 machine to launch the redo job on two machines
+echo $(date) Launching plinks workers for redo
+parallel --will-cite --nonall -S r1i5n0 -S r1i5n1 bin/plinksRedo.sh
+echo $(date) Workers done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plinksRedoJob.sh	Thu Feb 27 13:24:19 2020 +0000
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Usage: qsub -v t1=1stTar,tn=numTars plinksJob.sh
+#PBS -l select=2:ncpus=36
+#PBS -l place=exclhost
+#PBS -l walltime=08:00:00
+#PBS -V
+#PBS -A dc007
+#PBS -N plr
+
+#module load mpt
+
+cd ${PBS_O_WORKDIR}
+
+bin/plinksRedoMaster.sh
+