changeset 194:1845222b3d73

move DummyContext out
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 25 Sep 2024 17:45:52 +0100
parents fff248a65e39
children 5f3c36e4fd6d
files src/nutch-cc/src/test/org/commoncrawl/util/DummyContext.java src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java
diffstat 2 files changed, 300 insertions(+), 287 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/nutch-cc/src/test/org/commoncrawl/util/DummyContext.java	Wed Sep 25 17:45:52 2024 +0100
@@ -0,0 +1,300 @@
+package org.commoncrawl.util;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configuration.IntegerRanges;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.security.Credentials;
+
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.crawl.CrawlDbReducer;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
+
+public class DummyContext extends Reducer<Text, CrawlDatum, Text, CrawlDatum>.Context {
+
+  private Configuration conf;
+  final static CrawlDbReducer reducer;
+  static {
+    reducer = new CrawlDbReducer();
+  }
+
+  public DummyContext() {
+    reducer.super();
+    conf = new Configuration();
+  }
+
+  private List<CrawlDatum> values = new ArrayList<CrawlDatum>();
+
+  @Override
+    public void write(Text key, CrawlDatum value) throws IOException, InterruptedException {
+    values.add(value);
+  }
+
+  /** collected values as List */
+  public List<CrawlDatum> getValues() {
+    return values;
+  }
+
+  /** Obtain current collected value from List */
+  @Override
+    public CrawlDatum getCurrentValue() throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Dummy context");
+  }
+
+  /** Obtain current collected key from List */
+  @Override
+    public Text getCurrentKey() throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Dummy context with no keys");
+  }
+
+  private Counters dummyCounters = new Counters();
+  private String status;
+
+  public void progress() {
+  }
+
+  public Counter getCounter(Enum<?> arg0) {
+    return dummyCounters.getGroup("dummy").getCounterForName("dummy");
+  }
+
+  public Counter getCounter(String arg0, String arg1) {
+    return dummyCounters.getGroup("dummy").getCounterForName("dummy");
+  }
+
+  public void setStatus(String arg0) throws UnsupportedOperationException {
+    //throw new UnsupportedOperationException("Dummy context with no status");
+    status = arg0;
+  }
+
+  @Override
+    public String getStatus() throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Dummy context with no status");
+  }
+
+  public float getProgress() {
+    return 1f;
+  }
+
+  public OutputCommitter getOutputCommitter() {
+    throw new UnsupportedOperationException("Dummy context without committer");
+  }
+
+  public boolean nextKey(){
+    return false;
+  }
+
+  @Override
+    public boolean nextKeyValue(){
+    return false;
+  }
+
+  @Override
+    public TaskAttemptID getTaskAttemptID() throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Dummy context without TaskAttemptID");
+  }
+
+  @Override
+    public Path[] getArchiveClassPaths() {
+    return null;
+  }
+
+  @Override
+    public String[] getArchiveTimestamps() {
+    return null;
+  }
+
+  @Override
+    public URI[] getCacheArchives() throws IOException {
+    return null;
+  }
+
+  @Override
+    public URI[] getCacheFiles() throws IOException {
+    return null;
+  }
+
+  @Override
+    public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException {
+    return null;
+  }
+
+  @Override
+    public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return null;
+  }
+
+  @Override
+    public Configuration getConfiguration() {
+    return conf;
+  }
+
+  @Override
+    public Credentials getCredentials() {
+    return null;
+  }
+
+  @Override
+    public Path[] getFileClassPaths() {
+    return null;
+  }
+
+  @Override
+    public String[] getFileTimestamps() {
+    return null;
+  }
+
+  @Override
+    public RawComparator<?> getGroupingComparator() {
+    return null;
+  }
+
+  @Override
+    public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
+    return null;
+  }
+
+  @Override
+    public String getJar() {
+    return null;
+  }
+
+  @Override
+    public JobID getJobID() {
+    return null;
+  }
+
+  @Override
+    public String getJobName() {
+    return null;
+  }
+
+  @Override
+    public boolean getJobSetupCleanupNeeded() {
+    return false;
+  }
+
+  @Override
+    @Deprecated
+    public Path[] getLocalCacheArchives() throws IOException {
+    return null;
+  }
+
+  @Override
+    @Deprecated
+    public Path[] getLocalCacheFiles() throws IOException {
+    return null;
+  }
+
+  @Override
+    public Class<?> getMapOutputKeyClass() {
+    return null;
+  }
+
+  @Override
+    public Class<?> getMapOutputValueClass() {
+    return null;
+  }
+
+  @Override
+    public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException {
+    return null;
+  }
+
+  @Override
+    public int getMaxMapAttempts() {
+    return 0;
+  }
+
+  @Override
+    public int getMaxReduceAttempts() {
+    return 0;
+  }
+
+  @Override
+    public int getNumReduceTasks() {
+    return 0;
+  }
+
+  @Override
+    public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException {
+    return null;
+  }
+
+  @Override
+    public Class<?> getOutputKeyClass() {
+    return null;
+  }
+
+  @Override
+    public Class<?> getOutputValueClass() {
+    return null;
+  }
+
+  @Override
+    public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException {
+    return null;
+  }
+
+  @Override
+    public boolean getProfileEnabled() {
+    return false;
+  }
+
+  @Override
+    public String getProfileParams() {
+    return null;
+  }
+
+  @Override
+    public IntegerRanges getProfileTaskRange(boolean arg0) {
+    return null;
+  }
+
+  @Override
+    public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException {
+    return null;
+  }
+
+  @Override
+    public RawComparator<?> getSortComparator() {
+    return null;
+  }
+
+  @Override
+    @Deprecated
+    public boolean getSymlink() {
+    return false;
+  }
+
+  @Override
+    public boolean getTaskCleanupNeeded() {
+    return false;
+  }
+
+  @Override
+    public String getUser() {
+    return null;
+  }
+
+  @Override
+    public Path getWorkingDirectory() throws IOException {
+    return null;
+  }
+
+}
--- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java	Wed Sep 25 13:52:42 2024 +0100
+++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java	Wed Sep 25 17:45:52 2024 +0100
@@ -21,27 +21,13 @@
 import static org.junit.Assert.assertTrue;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configuration.IntegerRanges;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.Counters;
-import org.apache.hadoop.mapred.Counters.Counter;
-import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobID;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.security.Credentials;
 
 import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.crawl.CrawlDbReducer;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 import org.apache.nutch.metadata.Metadata;
@@ -56,286 +42,13 @@
 import java.io.OutputStream;
 import java.io.DataOutputStream;
 import java.io.DataInputStream;
-import java.io.UnsupportedEncodingException;
 import java.net.URI;
-import java.util.ArrayList;
 import java.util.List;
 
 import org.junit.Test;
 
 public class TestWarcCdxWriter {
 
-  private static CrawlDbReducer reducer = new CrawlDbReducer();
-
-  private static class DummyContext extends Reducer<Text, CrawlDatum, Text, CrawlDatum>.Context {
-
-    private Configuration conf;
-
-    private DummyContext() {
-      reducer.super();
-      conf = new Configuration();
-    }
-
-    private List<CrawlDatum> values = new ArrayList<CrawlDatum>();
-
-    @Override
-    public void write(Text key, CrawlDatum value) throws IOException, InterruptedException {
-      values.add(value);
-    }
-
-    /** collected values as List */
-    public List<CrawlDatum> getValues() {
-      return values;
-    }
-
-    /** Obtain current collected value from List */
-    @Override
-    public CrawlDatum getCurrentValue() throws UnsupportedOperationException {
-      throw new UnsupportedOperationException("Dummy context");
-    }
-
-    /** Obtain current collected key from List */
-    @Override
-    public Text getCurrentKey() throws UnsupportedOperationException {
-      throw new UnsupportedOperationException("Dummy context with no keys");
-    }
-
-    private Counters dummyCounters = new Counters();
-    private String status;
-
-    public void progress() {
-    }
-
-    public Counter getCounter(Enum<?> arg0) {
-      return dummyCounters.getGroup("dummy").getCounterForName("dummy");
-    }
-
-    public Counter getCounter(String arg0, String arg1) {
-      return dummyCounters.getGroup("dummy").getCounterForName("dummy");
-    }
-
-    public void setStatus(String arg0) throws UnsupportedOperationException {
-      //throw new UnsupportedOperationException("Dummy context with no status");
-      status = arg0;
-    }
-
-    @Override
-    public String getStatus() throws UnsupportedOperationException {
-      throw new UnsupportedOperationException("Dummy context with no status");
-    }
-
-    public float getProgress() {
-      return 1f;
-    }
-
-    public OutputCommitter getOutputCommitter() {
-      throw new UnsupportedOperationException("Dummy context without committer");
-    }
-
-    public boolean nextKey(){
-      return false;
-    }
-
-    @Override
-    public boolean nextKeyValue(){
-      return false;
-    }
-
-    @Override
-    public TaskAttemptID getTaskAttemptID() throws UnsupportedOperationException {
-      throw new UnsupportedOperationException("Dummy context without TaskAttemptID");
-    }
-
-    @Override
-    public Path[] getArchiveClassPaths() {
-      return null;
-    }
-
-    @Override
-    public String[] getArchiveTimestamps() {
-      return null;
-    }
-
-    @Override
-    public URI[] getCacheArchives() throws IOException {
-      return null;
-    }
-
-    @Override
-    public URI[] getCacheFiles() throws IOException {
-      return null;
-    }
-
-    @Override
-    public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException {
-      return null;
-    }
-
-    @Override
-    public RawComparator<?> getCombinerKeyGroupingComparator() {
-      return null;
-    }
-  
-    @Override
-    public Configuration getConfiguration() {
-      return conf;
-    }
-
-    @Override
-    public Credentials getCredentials() {
-      return null;
-    }
-
-    @Override
-    public Path[] getFileClassPaths() {
-      return null;
-    }
-
-    @Override
-    public String[] getFileTimestamps() {
-      return null;
-    }
-
-    @Override
-    public RawComparator<?> getGroupingComparator() {
-      return null;
-    }
-
-    @Override
-    public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
-      return null;
-    }
-
-    @Override
-    public String getJar() {
-      return null;
-    }
-
-    @Override
-    public JobID getJobID() {
-      return null;
-    }
-
-    @Override
-    public String getJobName() {
-      return null;
-    }
-
-    @Override
-    public boolean getJobSetupCleanupNeeded() {
-      return false;
-    }
-
-    @Override
-    @Deprecated
-    public Path[] getLocalCacheArchives() throws IOException {
-      return null;
-    }
-
-    @Override
-    @Deprecated
-    public Path[] getLocalCacheFiles() throws IOException {
-      return null;
-    }
-
-    @Override
-    public Class<?> getMapOutputKeyClass() {
-      return null;
-    }
-
-    @Override
-    public Class<?> getMapOutputValueClass() {
-      return null;
-    }
-
-    @Override
-    public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException {
-      return null;
-    }
-
-    @Override
-    public int getMaxMapAttempts() {
-      return 0;
-    }
-
-    @Override
-    public int getMaxReduceAttempts() {
-      return 0;
-    }
-
-    @Override
-    public int getNumReduceTasks() {
-      return 0;
-    }
-
-    @Override
-    public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException {
-      return null;
-    }
-
-    @Override
-    public Class<?> getOutputKeyClass() {
-      return null;
-    }
-
-    @Override
-    public Class<?> getOutputValueClass() {
-      return null;
-    }
-
-    @Override
-    public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException {
-      return null;
-    }
-    
-    @Override
-    public boolean getProfileEnabled() {
-      return false;
-    }
-
-    @Override
-    public String getProfileParams() {
-      return null;
-    }
-
-    @Override
-    public IntegerRanges getProfileTaskRange(boolean arg0) {
-      return null;
-    }
-
-    @Override
-    public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException {
-      return null;
-    }
-
-    @Override
-    public RawComparator<?> getSortComparator() {
-      return null;
-    }
-
-    @Override
-    @Deprecated
-    public boolean getSymlink() {
-      return false;
-    }
-
-    @Override
-    public boolean getTaskCleanupNeeded() {
-      return false;
-    }
-
-   @Override
-    public String getUser() {
-      return null;
-    }
-
-    @Override
-    public Path getWorkingDirectory() throws IOException {
-      return null;
-    }
-
-  }
-
   public final static String statusLine1 = "HTTP/1.1 200 OK";
   public final static String testHeaders1[] = { //
       "Content-Type", "text/html", //