changeset 188:0c5422df3a67

runs, but no cdx yet, because no value.content I presume
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 23 Sep 2024 19:18:36 +0100
parents 9805323d9969
children 1cc12a5a070b
files src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java
diffstat 1 files changed, 6 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java	Mon Sep 23 16:35:22 2024 +0100
+++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java	Mon Sep 23 19:18:36 2024 +0100
@@ -47,6 +47,7 @@
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.DataOutputStream;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
@@ -354,13 +355,13 @@
     Job job = NutchJob.getInstance(config);
     TaskAttemptContext context = new DummyContext();
     config = job.getConfiguration();
-    OutputStream devnull = OutputStream.nullOutputStream();
     config.setBoolean("warc.export.cdx", true);
     WarcRecordWriter wrw = new WarcRecordWriter(config, new Path("/tmp"),
-						123, context); 
-
+						123, context);
+    System.err.print("testing...");
+    //DataOutputStream devnull = new DataOutputStream(OutputStream.nullOutputStream());
     //ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    //WarcCdxWriter cdx = new WarcCdxWriter(devnull, baos, new Path("/tmp"));
-    
+    wrw.write(new Text("someKey"),new WarcCapture());
+    System.err.println("done");
   }
 }