Mercurial > hg > cc > cirrus_home
changeset 194:1845222b3d73
move DummyContext out
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 25 Sep 2024 17:45:52 +0100 |
parents | fff248a65e39 |
children | 5f3c36e4fd6d |
files | src/nutch-cc/src/test/org/commoncrawl/util/DummyContext.java src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java |
diffstat | 2 files changed, 300 insertions(+), 287 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/nutch-cc/src/test/org/commoncrawl/util/DummyContext.java Wed Sep 25 17:45:52 2024 +0100 @@ -0,0 +1,300 @@ +package org.commoncrawl.util; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configuration.IntegerRanges; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.Counters.Counter; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.security.Credentials; + +import org.apache.nutch.crawl.CrawlDatum; +import org.apache.nutch.crawl.CrawlDbReducer; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; + +public class DummyContext extends Reducer<Text, CrawlDatum, Text, CrawlDatum>.Context { + + private Configuration conf; + final static CrawlDbReducer reducer; + static { + reducer = new CrawlDbReducer(); + } + + public DummyContext() { + reducer.super(); + conf = new Configuration(); + } + + private List<CrawlDatum> values = new ArrayList<CrawlDatum>(); + + @Override + public void write(Text key, CrawlDatum value) throws IOException, InterruptedException { + values.add(value); + } + + /** collected values as List */ + public List<CrawlDatum> getValues() { + return values; + } + + /** Obtain current collected value from List */ + @Override + public CrawlDatum getCurrentValue() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context"); + } + + /** Obtain current collected key from List */ + @Override + public Text getCurrentKey() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context with no keys"); + } + + private Counters dummyCounters = new Counters(); + private String status; + + public void progress() { + } + + public Counter getCounter(Enum<?> arg0) { + return dummyCounters.getGroup("dummy").getCounterForName("dummy"); + } + + public Counter getCounter(String arg0, String arg1) { + return dummyCounters.getGroup("dummy").getCounterForName("dummy"); + } + + public void setStatus(String arg0) throws UnsupportedOperationException { + //throw new UnsupportedOperationException("Dummy context with no status"); + status = arg0; + } + + @Override + public String getStatus() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context with no status"); + } + + public float getProgress() { + return 1f; + } + + public OutputCommitter getOutputCommitter() { + throw new UnsupportedOperationException("Dummy context without committer"); + } + + public boolean nextKey(){ + return false; + } + + @Override + public boolean nextKeyValue(){ + return false; + } + + @Override + public TaskAttemptID getTaskAttemptID() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context without TaskAttemptID"); + } + + @Override + public Path[] getArchiveClassPaths() { + return null; + } + + @Override + public String[] getArchiveTimestamps() { + return null; + } + + @Override + public URI[] getCacheArchives() throws IOException { + return null; + } + + @Override + public URI[] getCacheFiles() throws IOException { + return null; + } + + @Override + public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException { + return null; + } + + @Override + public RawComparator<?> getCombinerKeyGroupingComparator() { + return null; + } + + @Override + public Configuration getConfiguration() { + return conf; + } + + @Override + public Credentials getCredentials() { + return null; + } + + @Override + public Path[] getFileClassPaths() { + return null; + } + + @Override + public String[] getFileTimestamps() { + return null; + } + + @Override + public RawComparator<?> getGroupingComparator() { + return null; + } + + @Override + public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException { + return null; + } + + @Override + public String getJar() { + return null; + } + + @Override + public JobID getJobID() { + return null; + } + + @Override + public String getJobName() { + return null; + } + + @Override + public boolean getJobSetupCleanupNeeded() { + return false; + } + + @Override + @Deprecated + public Path[] getLocalCacheArchives() throws IOException { + return null; + } + + @Override + @Deprecated + public Path[] getLocalCacheFiles() throws IOException { + return null; + } + + @Override + public Class<?> getMapOutputKeyClass() { + return null; + } + + @Override + public Class<?> getMapOutputValueClass() { + return null; + } + + @Override + public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException { + return null; + } + + @Override + public int getMaxMapAttempts() { + return 0; + } + + @Override + public int getMaxReduceAttempts() { + return 0; + } + + @Override + public int getNumReduceTasks() { + return 0; + } + + @Override + public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException { + return null; + } + + @Override + public Class<?> getOutputKeyClass() { + return null; + } + + @Override + public Class<?> getOutputValueClass() { + return null; + } + + @Override + public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { + return null; + } + + @Override + public boolean getProfileEnabled() { + return false; + } + + @Override + public String getProfileParams() { + return null; + } + + @Override + public IntegerRanges getProfileTaskRange(boolean arg0) { + return null; + } + + @Override + public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException { + return null; + } + + @Override + public RawComparator<?> getSortComparator() { + return null; + } + + @Override + @Deprecated + public boolean getSymlink() { + return false; + } + + @Override + public boolean getTaskCleanupNeeded() { + return false; + } + + @Override + public String getUser() { + return null; + } + + @Override + public Path getWorkingDirectory() throws IOException { + return null; + } + +}
--- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Wed Sep 25 13:52:42 2024 +0100 +++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Wed Sep 25 17:45:52 2024 +0100 @@ -21,27 +21,13 @@ import static org.junit.Assert.assertTrue; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configuration.IntegerRanges; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.MapWritable; -import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.Counters; -import org.apache.hadoop.mapred.Counters.Counter; -import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.Partitioner; -import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.security.Credentials; import org.apache.nutch.crawl.CrawlDatum; -import org.apache.nutch.crawl.CrawlDbReducer; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.NutchJob; import org.apache.nutch.metadata.Metadata; @@ -56,286 +42,13 @@ import java.io.OutputStream; import java.io.DataOutputStream; import java.io.DataInputStream; -import java.io.UnsupportedEncodingException; import java.net.URI; -import java.util.ArrayList; import java.util.List; import org.junit.Test; public class TestWarcCdxWriter { - private static CrawlDbReducer reducer = new CrawlDbReducer(); - - private static class DummyContext extends Reducer<Text, CrawlDatum, Text, CrawlDatum>.Context { - - private Configuration conf; - - private DummyContext() { - reducer.super(); - conf = new Configuration(); - } - - private List<CrawlDatum> values = new ArrayList<CrawlDatum>(); - - @Override - public void write(Text key, CrawlDatum value) throws IOException, InterruptedException { - values.add(value); - } - - /** collected values as List */ - public List<CrawlDatum> getValues() { - return values; - } - - /** Obtain current collected value from List */ - @Override - public CrawlDatum getCurrentValue() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context"); - } - - /** Obtain current collected key from List */ - @Override - public Text getCurrentKey() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context with no keys"); - } - - private Counters dummyCounters = new Counters(); - private String status; - - public void progress() { - } - - public Counter getCounter(Enum<?> arg0) { - return dummyCounters.getGroup("dummy").getCounterForName("dummy"); - } - - public Counter getCounter(String arg0, String arg1) { - return dummyCounters.getGroup("dummy").getCounterForName("dummy"); - } - - public void setStatus(String arg0) throws UnsupportedOperationException { - //throw new UnsupportedOperationException("Dummy context with no status"); - status = arg0; - } - - @Override - public String getStatus() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context with no status"); - } - - public float getProgress() { - return 1f; - } - - public OutputCommitter getOutputCommitter() { - throw new UnsupportedOperationException("Dummy context without committer"); - } - - public boolean nextKey(){ - return false; - } - - @Override - public boolean nextKeyValue(){ - return false; - } - - @Override - public TaskAttemptID getTaskAttemptID() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context without TaskAttemptID"); - } - - @Override - public Path[] getArchiveClassPaths() { - return null; - } - - @Override - public String[] getArchiveTimestamps() { - return null; - } - - @Override - public URI[] getCacheArchives() throws IOException { - return null; - } - - @Override - public URI[] getCacheFiles() throws IOException { - return null; - } - - @Override - public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException { - return null; - } - - @Override - public RawComparator<?> getCombinerKeyGroupingComparator() { - return null; - } - - @Override - public Configuration getConfiguration() { - return conf; - } - - @Override - public Credentials getCredentials() { - return null; - } - - @Override - public Path[] getFileClassPaths() { - return null; - } - - @Override - public String[] getFileTimestamps() { - return null; - } - - @Override - public RawComparator<?> getGroupingComparator() { - return null; - } - - @Override - public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException { - return null; - } - - @Override - public String getJar() { - return null; - } - - @Override - public JobID getJobID() { - return null; - } - - @Override - public String getJobName() { - return null; - } - - @Override - public boolean getJobSetupCleanupNeeded() { - return false; - } - - @Override - @Deprecated - public Path[] getLocalCacheArchives() throws IOException { - return null; - } - - @Override - @Deprecated - public Path[] getLocalCacheFiles() throws IOException { - return null; - } - - @Override - public Class<?> getMapOutputKeyClass() { - return null; - } - - @Override - public Class<?> getMapOutputValueClass() { - return null; - } - - @Override - public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException { - return null; - } - - @Override - public int getMaxMapAttempts() { - return 0; - } - - @Override - public int getMaxReduceAttempts() { - return 0; - } - - @Override - public int getNumReduceTasks() { - return 0; - } - - @Override - public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException { - return null; - } - - @Override - public Class<?> getOutputKeyClass() { - return null; - } - - @Override - public Class<?> getOutputValueClass() { - return null; - } - - @Override - public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { - return null; - } - - @Override - public boolean getProfileEnabled() { - return false; - } - - @Override - public String getProfileParams() { - return null; - } - - @Override - public IntegerRanges getProfileTaskRange(boolean arg0) { - return null; - } - - @Override - public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException { - return null; - } - - @Override - public RawComparator<?> getSortComparator() { - return null; - } - - @Override - @Deprecated - public boolean getSymlink() { - return false; - } - - @Override - public boolean getTaskCleanupNeeded() { - return false; - } - - @Override - public String getUser() { - return null; - } - - @Override - public Path getWorkingDirectory() throws IOException { - return null; - } - - } - public final static String statusLine1 = "HTTP/1.1 200 OK"; public final static String testHeaders1[] = { // "Content-Type", "text/html", //