comparison bin/doCLM.sh @ 163:ef961d91eea5

previous approach to lang/field extraction
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 18:16:27 +0100
parents
children
comparison
equal deleted inserted replaced
162:e82981075b4a 163:ef961d91eea5
1 #!/usr/bin/bash
2 mkdir -p /dev/shm/hst
3
4 c=$1
5 i=$2
6 f=$(printf 'cdx-%05.0f.gz' $i)
7
8 unpigz -dp 1 -c data/$c/cdx/warc/$f | parallel --willcite --pipe -N 50000 -j10 "ix.py -x -h -c '/lustre/home/dc007/hst/bin/clm.sh /dev/shm/hst/'$i'.lmh_{#}.txt' 2>/dev/shm/hst/$i.lmh_{#}.errs"
9
10 cd /dev/shm/hst
11 tar -czf $HOME/results/${i}.lmh.tar.gz ${i}.lmh_*.{txt,errs}
12