Mercurial > hg > cc > cirrus_home
changeset 188:0c5422df3a67
runs, but no cdx yet, because no value.content I presume
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 23 Sep 2024 19:18:36 +0100 |
parents | 9805323d9969 |
children | 1cc12a5a070b |
files | src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java |
diffstat | 1 files changed, 6 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Mon Sep 23 16:35:22 2024 +0100 +++ b/src/nutch-cc/src/test/org/commoncrawl/util/TestWarcCdxWriter.java Mon Sep 23 19:18:36 2024 +0100 @@ -47,6 +47,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.io.DataOutputStream; import java.net.URI; import java.util.ArrayList; import java.util.List; @@ -354,13 +355,13 @@ Job job = NutchJob.getInstance(config); TaskAttemptContext context = new DummyContext(); config = job.getConfiguration(); - OutputStream devnull = OutputStream.nullOutputStream(); config.setBoolean("warc.export.cdx", true); WarcRecordWriter wrw = new WarcRecordWriter(config, new Path("/tmp"), - 123, context); - + 123, context); + System.err.print("testing..."); + //DataOutputStream devnull = new DataOutputStream(OutputStream.nullOutputStream()); //ByteArrayOutputStream baos = new ByteArrayOutputStream(); - //WarcCdxWriter cdx = new WarcCdxWriter(devnull, baos, new Path("/tmp")); - + wrw.write(new Text("someKey"),new WarcCapture()); + System.err.println("done"); } }