Mercurial > hg > cc > cirrus_home
changeset 189:1cc12a5a070b
compiles with content, but fails with EOF -- need blank lines?
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 24 Sep 2024 12:34:51 +0100 |
parents | 0c5422df3a67 |
children | f2bf736c2d40 |
files | src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java |
diffstat | 1 files changed, 17 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Mon Sep 23 19:18:36 2024 +0100 +++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Tue Sep 24 12:34:51 2024 +0100 @@ -43,11 +43,15 @@ import org.apache.nutch.crawl.CrawlDbReducer; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.NutchJob; +import org.apache.nutch.metadata.Metadata; +import org.apache.nutch.protocol.Content; import java.io.ByteArrayOutputStream; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.OutputStream; import java.io.DataOutputStream; +import java.io.DataInputStream; import java.net.URI; import java.util.ArrayList; import java.util.List; @@ -361,7 +365,19 @@ System.err.print("testing..."); //DataOutputStream devnull = new DataOutputStream(OutputStream.nullOutputStream()); //ByteArrayOutputStream baos = new ByteArrayOutputStream(); - wrw.write(new Text("someKey"),new WarcCapture()); + Metadata meta = new Metadata(); + meta.readFields(new DataInputStream( + new ByteArrayInputStream(testHeaderString1.getBytes()))); + wrw.write(new Text("someKey"), + new WarcCapture(new Text("https://www.w3.org/1999/xhtml"), + new CrawlDatum(CrawlDatum.STATUS_FETCH_SUCCESS, + 100), + new Content("https://www.w3.org/1999/xhtml", + "https://www.w3.org/1999/xhtml", + new byte[0], + "text/xml", + meta, + config))); System.err.println("done"); } }