# HG changeset patch # User Henry S. Thompson # Date 1588771413 -3600 # Node ID b7daa4f8767c5d31bc27415ae2f119727e454683 # Parent 0780445a0840a523345b62055c04b3333cf78696 works for big files with Hadoop 3.4.0 diff -r 0780445a0840 -r b7daa4f8767c src/nutch-cc/conf/nutch-site.xml --- a/src/nutch-cc/conf/nutch-site.xml Wed May 06 14:22:48 2020 +0100 +++ b/src/nutch-cc/conf/nutch-site.xml Wed May 06 14:23:33 2020 +0100 @@ -5,14 +5,14 @@ - http.content.limit500000000 + http.content.limit-1 http.store.responsetimetrue store.ip.addresstrue store.http.requesttrue store.http.headerstrue http.accept.languageen-US,en;q=0.5 http.accept.charset - http.time.limit600 + http.time.limit1200 http.timeout45000 http.redirect.max3 http.redirect.max.skipfalse @@ -32,6 +32,18 @@ fetcher.signaturetrue fetcher.redirect.dedupcache.seconds5400 fetcher.redirect.dedupcache.size6000 + fetcher.threads.timeout.divisor1 + mapreduce.fileoutputcommitter.marksuccessfuljobs true + mapreduce.task.timeout1200000 + + plugin.includesprotocol-okhttp