annotate bin/doCLM.sh @ 166:afd7879181c9

old style
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 19:15:20 +0100
parents ef961d91eea5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
163
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/bash
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 mkdir -p /dev/shm/hst
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 c=$1
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 i=$2
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 f=$(printf 'cdx-%05.0f.gz' $i)
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 unpigz -dp 1 -c data/$c/cdx/warc/$f | parallel --willcite --pipe -N 50000 -j10 "ix.py -x -h -c '/lustre/home/dc007/hst/bin/clm.sh /dev/shm/hst/'$i'.lmh_{#}.txt' 2>/dev/shm/hst/$i.lmh_{#}.errs"
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 cd /dev/shm/hst
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 tar -czf $HOME/results/${i}.lmh.tar.gz ${i}.lmh_*.{txt,errs}
ef961d91eea5 previous approach to lang/field extraction
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12