changeset 189:1cc12a5a070b

compiles with content, but fails with EOF -- need blank lines?
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 24 Sep 2024 12:34:51 +0100
parents 0c5422df3a67
children f2bf736c2d40
files src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java
diffstat 1 files changed, 17 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java	Mon Sep 23 19:18:36 2024 +0100
+++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java	Tue Sep 24 12:34:51 2024 +0100
@@ -43,11 +43,15 @@
 import org.apache.nutch.crawl.CrawlDbReducer;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.protocol.Content;
 
 import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.DataOutputStream;
+import java.io.DataInputStream;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
@@ -361,7 +365,19 @@
     System.err.print("testing...");
     //DataOutputStream devnull = new DataOutputStream(OutputStream.nullOutputStream());
     //ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    wrw.write(new Text("someKey"),new WarcCapture());
+    Metadata meta = new Metadata();
+    meta.readFields(new DataInputStream(
+		    new ByteArrayInputStream(testHeaderString1.getBytes())));
+    wrw.write(new Text("someKey"),
+	      new WarcCapture(new Text("https://www.w3.org/1999/xhtml"),
+			      new CrawlDatum(CrawlDatum.STATUS_FETCH_SUCCESS,
+					     100),
+			      new Content("https://www.w3.org/1999/xhtml",
+					  "https://www.w3.org/1999/xhtml",
+					  new byte[0],
+					  "text/xml",
+					  meta,
+					  config)));
     System.err.println("done");
   }
 }