# HG changeset patch # User Henry S. Thompson # Date 1727282752 -3600 # Node ID 1845222b3d7386c80d9c391d6fa94859475189a1 # Parent fff248a65e39046383c5e0d82170d1ca670f43d1 move DummyContext out diff -r fff248a65e39 -r 1845222b3d73 src/nutch-cc/src/test/org/commoncrawl/util/DummyContext.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/nutch-cc/src/test/org/commoncrawl/util/DummyContext.java Wed Sep 25 17:45:52 2024 +0100 @@ -0,0 +1,300 @@ +package org.commoncrawl.util; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configuration.IntegerRanges; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.Counters.Counter; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.security.Credentials; + +import org.apache.nutch.crawl.CrawlDatum; +import org.apache.nutch.crawl.CrawlDbReducer; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; + +public class DummyContext extends Reducer.Context { + + private Configuration conf; + final static CrawlDbReducer reducer; + static { + reducer = new CrawlDbReducer(); + } + + public DummyContext() { + reducer.super(); + conf = new Configuration(); + } + + private List values = new ArrayList(); + + @Override + public void write(Text key, CrawlDatum value) throws IOException, InterruptedException { + values.add(value); + } + + /** collected values as List */ + public List getValues() { + return values; + } + + /** Obtain current collected value from List */ + @Override + public CrawlDatum getCurrentValue() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context"); + } + + /** Obtain current collected key from List */ + @Override + public Text getCurrentKey() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context with no keys"); + } + + private Counters dummyCounters = new Counters(); + private String status; + + public void progress() { + } + + public Counter getCounter(Enum arg0) { + return dummyCounters.getGroup("dummy").getCounterForName("dummy"); + } + + public Counter getCounter(String arg0, String arg1) { + return dummyCounters.getGroup("dummy").getCounterForName("dummy"); + } + + public void setStatus(String arg0) throws UnsupportedOperationException { + //throw new UnsupportedOperationException("Dummy context with no status"); + status = arg0; + } + + @Override + public String getStatus() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context with no status"); + } + + public float getProgress() { + return 1f; + } + + public OutputCommitter getOutputCommitter() { + throw new UnsupportedOperationException("Dummy context without committer"); + } + + public boolean nextKey(){ + return false; + } + + @Override + public boolean nextKeyValue(){ + return false; + } + + @Override + public TaskAttemptID getTaskAttemptID() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Dummy context without TaskAttemptID"); + } + + @Override + public Path[] getArchiveClassPaths() { + return null; + } + + @Override + public String[] getArchiveTimestamps() { + return null; + } + + @Override + public URI[] getCacheArchives() throws IOException { + return null; + } + + @Override + public URI[] getCacheFiles() throws IOException { + return null; + } + + @Override + public Class> getCombinerClass() throws ClassNotFoundException { + return null; + } + + @Override + public RawComparator getCombinerKeyGroupingComparator() { + return null; + } + + @Override + public Configuration getConfiguration() { + return conf; + } + + @Override + public Credentials getCredentials() { + return null; + } + + @Override + public Path[] getFileClassPaths() { + return null; + } + + @Override + public String[] getFileTimestamps() { + return null; + } + + @Override + public RawComparator getGroupingComparator() { + return null; + } + + @Override + public Class> getInputFormatClass() throws ClassNotFoundException { + return null; + } + + @Override + public String getJar() { + return null; + } + + @Override + public JobID getJobID() { + return null; + } + + @Override + public String getJobName() { + return null; + } + + @Override + public boolean getJobSetupCleanupNeeded() { + return false; + } + + @Override + @Deprecated + public Path[] getLocalCacheArchives() throws IOException { + return null; + } + + @Override + @Deprecated + public Path[] getLocalCacheFiles() throws IOException { + return null; + } + + @Override + public Class getMapOutputKeyClass() { + return null; + } + + @Override + public Class getMapOutputValueClass() { + return null; + } + + @Override + public Class> getMapperClass() throws ClassNotFoundException { + return null; + } + + @Override + public int getMaxMapAttempts() { + return 0; + } + + @Override + public int getMaxReduceAttempts() { + return 0; + } + + @Override + public int getNumReduceTasks() { + return 0; + } + + @Override + public Class> getOutputFormatClass() throws ClassNotFoundException { + return null; + } + + @Override + public Class getOutputKeyClass() { + return null; + } + + @Override + public Class getOutputValueClass() { + return null; + } + + @Override + public Class> getPartitionerClass() throws ClassNotFoundException { + return null; + } + + @Override + public boolean getProfileEnabled() { + return false; + } + + @Override + public String getProfileParams() { + return null; + } + + @Override + public IntegerRanges getProfileTaskRange(boolean arg0) { + return null; + } + + @Override + public Class> getReducerClass() throws ClassNotFoundException { + return null; + } + + @Override + public RawComparator getSortComparator() { + return null; + } + + @Override + @Deprecated + public boolean getSymlink() { + return false; + } + + @Override + public boolean getTaskCleanupNeeded() { + return false; + } + + @Override + public String getUser() { + return null; + } + + @Override + public Path getWorkingDirectory() throws IOException { + return null; + } + +} diff -r fff248a65e39 -r 1845222b3d73 src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java --- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Wed Sep 25 13:52:42 2024 +0100 +++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Wed Sep 25 17:45:52 2024 +0100 @@ -21,27 +21,13 @@ import static org.junit.Assert.assertTrue; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configuration.IntegerRanges; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.MapWritable; -import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.Counters; -import org.apache.hadoop.mapred.Counters.Counter; -import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.Partitioner; -import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.security.Credentials; import org.apache.nutch.crawl.CrawlDatum; -import org.apache.nutch.crawl.CrawlDbReducer; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.NutchJob; import org.apache.nutch.metadata.Metadata; @@ -56,286 +42,13 @@ import java.io.OutputStream; import java.io.DataOutputStream; import java.io.DataInputStream; -import java.io.UnsupportedEncodingException; import java.net.URI; -import java.util.ArrayList; import java.util.List; import org.junit.Test; public class TestWarcCdxWriter { - private static CrawlDbReducer reducer = new CrawlDbReducer(); - - private static class DummyContext extends Reducer.Context { - - private Configuration conf; - - private DummyContext() { - reducer.super(); - conf = new Configuration(); - } - - private List values = new ArrayList(); - - @Override - public void write(Text key, CrawlDatum value) throws IOException, InterruptedException { - values.add(value); - } - - /** collected values as List */ - public List getValues() { - return values; - } - - /** Obtain current collected value from List */ - @Override - public CrawlDatum getCurrentValue() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context"); - } - - /** Obtain current collected key from List */ - @Override - public Text getCurrentKey() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context with no keys"); - } - - private Counters dummyCounters = new Counters(); - private String status; - - public void progress() { - } - - public Counter getCounter(Enum arg0) { - return dummyCounters.getGroup("dummy").getCounterForName("dummy"); - } - - public Counter getCounter(String arg0, String arg1) { - return dummyCounters.getGroup("dummy").getCounterForName("dummy"); - } - - public void setStatus(String arg0) throws UnsupportedOperationException { - //throw new UnsupportedOperationException("Dummy context with no status"); - status = arg0; - } - - @Override - public String getStatus() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context with no status"); - } - - public float getProgress() { - return 1f; - } - - public OutputCommitter getOutputCommitter() { - throw new UnsupportedOperationException("Dummy context without committer"); - } - - public boolean nextKey(){ - return false; - } - - @Override - public boolean nextKeyValue(){ - return false; - } - - @Override - public TaskAttemptID getTaskAttemptID() throws UnsupportedOperationException { - throw new UnsupportedOperationException("Dummy context without TaskAttemptID"); - } - - @Override - public Path[] getArchiveClassPaths() { - return null; - } - - @Override - public String[] getArchiveTimestamps() { - return null; - } - - @Override - public URI[] getCacheArchives() throws IOException { - return null; - } - - @Override - public URI[] getCacheFiles() throws IOException { - return null; - } - - @Override - public Class> getCombinerClass() throws ClassNotFoundException { - return null; - } - - @Override - public RawComparator getCombinerKeyGroupingComparator() { - return null; - } - - @Override - public Configuration getConfiguration() { - return conf; - } - - @Override - public Credentials getCredentials() { - return null; - } - - @Override - public Path[] getFileClassPaths() { - return null; - } - - @Override - public String[] getFileTimestamps() { - return null; - } - - @Override - public RawComparator getGroupingComparator() { - return null; - } - - @Override - public Class> getInputFormatClass() throws ClassNotFoundException { - return null; - } - - @Override - public String getJar() { - return null; - } - - @Override - public JobID getJobID() { - return null; - } - - @Override - public String getJobName() { - return null; - } - - @Override - public boolean getJobSetupCleanupNeeded() { - return false; - } - - @Override - @Deprecated - public Path[] getLocalCacheArchives() throws IOException { - return null; - } - - @Override - @Deprecated - public Path[] getLocalCacheFiles() throws IOException { - return null; - } - - @Override - public Class getMapOutputKeyClass() { - return null; - } - - @Override - public Class getMapOutputValueClass() { - return null; - } - - @Override - public Class> getMapperClass() throws ClassNotFoundException { - return null; - } - - @Override - public int getMaxMapAttempts() { - return 0; - } - - @Override - public int getMaxReduceAttempts() { - return 0; - } - - @Override - public int getNumReduceTasks() { - return 0; - } - - @Override - public Class> getOutputFormatClass() throws ClassNotFoundException { - return null; - } - - @Override - public Class getOutputKeyClass() { - return null; - } - - @Override - public Class getOutputValueClass() { - return null; - } - - @Override - public Class> getPartitionerClass() throws ClassNotFoundException { - return null; - } - - @Override - public boolean getProfileEnabled() { - return false; - } - - @Override - public String getProfileParams() { - return null; - } - - @Override - public IntegerRanges getProfileTaskRange(boolean arg0) { - return null; - } - - @Override - public Class> getReducerClass() throws ClassNotFoundException { - return null; - } - - @Override - public RawComparator getSortComparator() { - return null; - } - - @Override - @Deprecated - public boolean getSymlink() { - return false; - } - - @Override - public boolean getTaskCleanupNeeded() { - return false; - } - - @Override - public String getUser() { - return null; - } - - @Override - public Path getWorkingDirectory() throws IOException { - return null; - } - - } - public final static String statusLine1 = "HTTP/1.1 200 OK"; public final static String testHeaders1[] = { // "Content-Type", "text/html", //