# HG changeset patch # User Henry S. Thompson # Date 1727177691 -3600 # Node ID 1cc12a5a070b720f2bdd70d99274a0016c1455ab # Parent 0c5422df3a67ba3f6d1ce48f225d0fa74b97b96b compiles with content, but fails with EOF -- need blank lines? diff -r 0c5422df3a67 -r 1cc12a5a070b src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java --- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Mon Sep 23 19:18:36 2024 +0100 +++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Tue Sep 24 12:34:51 2024 +0100 @@ -43,11 +43,15 @@ import org.apache.nutch.crawl.CrawlDbReducer; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.NutchJob; +import org.apache.nutch.metadata.Metadata; +import org.apache.nutch.protocol.Content; import java.io.ByteArrayOutputStream; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.OutputStream; import java.io.DataOutputStream; +import java.io.DataInputStream; import java.net.URI; import java.util.ArrayList; import java.util.List; @@ -361,7 +365,19 @@ System.err.print("testing..."); //DataOutputStream devnull = new DataOutputStream(OutputStream.nullOutputStream()); //ByteArrayOutputStream baos = new ByteArrayOutputStream(); - wrw.write(new Text("someKey"),new WarcCapture()); + Metadata meta = new Metadata(); + meta.readFields(new DataInputStream( + new ByteArrayInputStream(testHeaderString1.getBytes()))); + wrw.write(new Text("someKey"), + new WarcCapture(new Text("https://www.w3.org/1999/xhtml"), + new CrawlDatum(CrawlDatum.STATUS_FETCH_SUCCESS, + 100), + new Content("https://www.w3.org/1999/xhtml", + "https://www.w3.org/1999/xhtml", + new byte[0], + "text/xml", + meta, + config))); System.err.println("done"); } }