comparison master/wecu/run_hadoop_equivalent.sh @ 57:ac1a20e627a9

from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted, sac not quite working yet
author Henry S. Thompson <ht@markup.co.uk>
date Wed, 27 May 2020 20:54:34 +0000
parents
children
comparison
equal deleted inserted replaced
56:8ce6a81e2bb4 57:ac1a20e627a9
1 # This file can be used to run MapReduce jobs using Hadoop in order to compare performance with wecu
2
3 hadoop fs -rm -r /output
4
5 yarn jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-streaming.jar \
6 -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider \
7 -files mapper.py,reducer.py \
8 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312025.20/warc/CC-MAIN-20190817203056-20190817225056-000* \
9 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312025.20/warc/CC-MAIN-20190817203056-20190817225056-001* \
10 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312025.20/warc/CC-MAIN-20190817203056-20190817225056-002* \
11 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312025.20/warc/CC-MAIN-20190817203056-20190817225056-003* \
12 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312025.20/warc/CC-MAIN-20190817203056-20190817225056-004* \
13 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312025.20/warc/CC-MAIN-20190817203056-20190817225056-005* \
14 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312128.3/warc/CC-MAIN-20190817102624-20190817124624-000* \
15 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312128.3/warc/CC-MAIN-20190817102624-20190817124624-001* \
16 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312128.3/warc/CC-MAIN-20190817102624-20190817124624-002* \
17 -input s3a://commoncrawl/crawl-data/CC-MAIN-2019-35/segments/1566027312128.3/warc/CC-MAIN-20190817102624-20190817124624-003* \
18 -output /output \
19 -mapper mapper.py \
20 -reducer reducer.py