changeset 75:1c5dab2e1cb3

use local .m2/repository for Hadoop 3.4.0
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 06 May 2020 14:24:42 +0100
parents b7daa4f8767c
children 6cf3dc7ff022
files src/nutch-cc/ivy/ivy.xml src/nutch-cc/ivy/ivysettings.xml
diffstat 2 files changed, 273 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/nutch-cc/ivy/ivy.xml	Wed May 06 14:24:42 2020 +0100
@@ -0,0 +1,172 @@
+<?xml version="1.0" ?>
+
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	You under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+
+<ivy-module version="1.0" xmlns:maven="http://ant.apache.org/ivy/maven">
+	<info organisation="org.apache.nutch" module="nutch">
+		<license name="Apache 2.0"
+			url="https://www.apache.org/licenses/LICENSE-2.0.txt" />
+		<ivyauthor name="Apache Nutch Team" url="https://nutch.apache.org/" />
+		<description homepage="https://nutch.apache.org/">Nutch is an open source web-search
+			software. It builds on Hadoop, Tika and Solr, adding web-specifics,
+			such as a crawler, a link-graph database etc.
+		</description>
+	</info>
+
+	<configurations>
+		<include file="${basedir}/ivy/ivy-configurations.xml" />
+	</configurations>
+
+	<publications>
+		<!--get the artifact from our module name -->
+		<artifact conf="master" />
+	</publications>
+
+	<dependencies>
+		<dependency org="org.slf4j" name="slf4j-api" rev="1.7.25" conf="*->master" />
+		<dependency org="org.slf4j" name="slf4j-log4j12" rev="1.7.25" conf="*->master" />
+
+		<!--dependency org="log4j" name="log4j" rev="1.2.15" conf="*->default">
+			<exclude org="javax.jms" name="jms" />
+			<exclude org="com.sun.jdmk" name="jmxtools" />
+			<exclude org="com.sun.jmx" name="jmxri" />
+		</dependency-->
+
+		<dependency org="org.apache.commons" name="commons-lang3" rev="3.8.1" conf="*->default" />
+		<dependency org="org.apache.commons" name="commons-collections4" rev="4.2" conf="*->master" />
+		<dependency org="org.apache.httpcomponents" name="httpclient" rev="4.5.6" conf="*->master" />
+		<dependency org="commons-codec" name="commons-codec" rev="1.11" conf="*->default" />
+		<dependency org="org.apache.commons" name="commons-compress" rev="1.18" conf="*->default" />
+		<dependency org="org.apache.commons" name="commons-jexl" rev="2.1.1" />
+		<dependency org="com.tdunning" name="t-digest" rev="3.2" />
+		<dependency org="org.codehaus.jettison" name="jettison" rev="1.4.0"/>
+
+		<!-- Hadoop Dependencies -->
+		<dependency org="org.apache.hadoop" name="hadoop-client" rev="3.4.0-SNAPSHOT" conf="*->default">
+			<exclude org="hsqldb" name="hsqldb" />
+			<exclude org="net.sf.kosmosfs" name="kfs" />
+			<exclude org="net.java.dev.jets3t" name="jets3t" />
+			<exclude org="org.eclipse.jdt" name="core" />
+			<exclude org="org.mortbay.jetty" name="jsp-*" />
+			<exclude org="org.mortbay.jetty" name="jetty-util" />
+			<exclude org="com.squareup.okhttp" name="*" />
+			<exclude org="ant" name="ant" />
+		</dependency>
+		<!-- End of Hadoop Dependencies -->
+
+		<dependency org="org.apache.tika" name="tika-core" rev="1.22" />
+		<!-- tika-parsers (without transitive dependencies) is used to detect the charset in text and HTML documents -->
+		<dependency org="org.apache.tika" name="tika-parsers" rev="1.22" transitive="false" />
+
+		<!-- language detection -->
+		<dependency org="org.commoncrawl" name="language-detection-cld2" rev="0.1-SNAPSHOT" />
+		<dependency org="net.java.dev.jna" name="jna" rev="4.5.2" />
+
+		<dependency org="xml-apis" name="xml-apis" rev="1.4.01"/><!-- force this version as it is required by Tika -->
+		<dependency org="xerces" name="xercesImpl" rev="2.12.0" />
+
+		<dependency org="com.ibm.icu" name="icu4j" rev="61.1" />
+
+		<dependency org="com.google.guava" name="guava" rev="25.0-jre" />
+
+		<dependency org="com.github.crawler-commons" name="crawler-commons" rev="1.1-SNAPSHOT" />
+
+		<!-- jetty is required by NutchServer -->
+		<dependency org="org.mortbay.jetty" name="jetty" rev="6.1.26" />
+
+		<dependency org="com.martinkl.warc" name="warc-hadoop" rev="0.1.0">
+			<exclude module="hadoop-client" />
+		</dependency>
+
+		<dependency org="org.apache.cxf" name="cxf-rt-frontend-jaxws" rev="3.3.3" conf="*->default"/>
+		<dependency org="org.apache.cxf" name="cxf-rt-frontend-jaxrs" rev="3.3.3" conf="*->default"/>
+		<dependency org="org.apache.cxf" name="cxf-rt-transports-http" rev="3.3.3" conf="*->default"/>
+		<dependency org="org.apache.cxf" name="cxf-rt-transports-http-jetty" rev="3.3.3" conf="*->default"/>
+		<dependency org="org.apache.cxf" name="cxf-rt-rs-client" rev="3.3.3" conf="test->default"/>
+		<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.9.9" conf="*->default"/>
+		<dependency org="com.fasterxml.jackson.core" name="jackson-annotations" rev="2.9.9" conf="*->default"/>
+		<dependency org="com.fasterxml.jackson.dataformat" name="jackson-dataformat-cbor" rev="2.9.9" conf="*->default"/>
+		<dependency org="com.fasterxml.jackson.jaxrs" name="jackson-jaxrs-json-provider" rev="2.9.9" conf="*->default"/>
+
+		<!-- WARC artifacts needed -->
+		<dependency org="org.netpreserve.commons" name="webarchive-commons" rev="1.1.5" conf="*->default">
+			<exclude module="hadoop-core"/>
+			<exclude org="com.google.guava"/>
+			<exclude org="junit"/>
+			<!-- Exclude dependencies with incompatible license (see https://www.apache.org/legal/resolved.html#category-x) -->
+			<exclude org="org.json"/><!-- JSON License -->
+			<!--
+				Exclusion of the following dependencies disables support of WARC generation by
+				"bin/nutch commoncrawldump -warc ..."
+				Please remove these exclusion and recompile Nutch to generate WARC files using the tool "commoncrawldump".
+			-->
+			<exclude org="it.unimi.dsi" module="dsiutils"/><!-- LGPL 2.1 -->
+			<exclude org="org.gnu.inet" module="libidn"/><!-- LGPL 2.1 -->
+		</dependency>
+
+		<!--artifacts needed for testing -->
+		<dependency org="junit" name="junit" rev="4.11" conf="test->default" />
+		<dependency org="org.apache.mrunit" name="mrunit" rev="1.1.0" conf="test->default">
+			<artifact name="mrunit" maven:classifier="hadoop2" />
+			<exclude org="log4j" module="log4j" />
+		</dependency>
+		<dependency org="org.mortbay.jetty" name="jetty-client" rev="6.1.26" conf="test->default" />
+		<dependency org="org.mortbay.jetty" name="jetty" rev="6.1.26" conf="test->default" />
+		<dependency org="org.mortbay.jetty" name="jetty-util" rev="6.1.26" conf="test->default" />
+		<dependency org="tomcat" name="jasper-runtime" rev="5.5.23" conf="test->default" />
+		<dependency org="tomcat" name="jasper-compiler" rev="5.5.23" conf="test->default">
+			<exclude org="ant" name="ant" />
+		</dependency>
+		<!-- end of test artifacts -->
+
+		<!-- web app dependencies -->
+		<dependency org="org.mortbay.jetty" name="jetty" rev="6.1.26" />
+
+		<dependency org="org.apache.commons" name="commons-collections4" rev="4.1" conf="*->default" />
+		<dependency org="org.springframework" name="spring-core" rev="4.0.9.RELEASE" conf="*->default" />
+		<dependency org="org.springframework" name="spring-context" rev="4.0.9.RELEASE" conf="*->default" />
+		<dependency org="org.springframework" name="spring-web" rev="4.0.9.RELEASE" conf="*->default" />
+
+		<dependency org="com.sun.jersey" name="jersey-client" rev="1.19.4" conf="*->default" />
+		<dependency org="com.sun.jersey" name="jersey-json" rev="1.19.4"/>
+
+		<dependency org="com.j256.ormlite" name="ormlite-jdbc" rev="5.1" conf="*->default" />
+		<dependency org="com.h2database" name="h2" rev="1.4.197" conf="*->default" />
+		<dependency org="org.eclipse.persistence" name="javax.persistence" rev="2.2.0" conf="*->default" />
+
+		<dependency org="org.apache.wicket" name="wicket-core" rev="6.17.0" conf="*->default" />
+		<dependency org="org.apache.wicket" name="wicket-spring" rev="6.17.0" conf="*->default" />
+		<dependency org="de.agilecoders.wicket" name="wicket-bootstrap-core" rev="0.9.2" conf="*->default" />
+		<dependency org="de.agilecoders.wicket" name="wicket-bootstrap-extensions" rev="0.9.2" conf="*->default">
+			<exclude org="org.json"/>
+		</dependency>
+
+		<!-- RabbitMQ dependencies -->
+		<dependency org="com.rabbitmq" name="amqp-client" rev="5.2.0" conf="*->default" />
+
+		<!--Added Because of Elasticsearch JEST client-->
+		<!--TODO refactor these to indexer-elastic-rest plugin somehow, currently doesn't resolve correctly-->
+		<dependency org="org.apache.httpcomponents" name="httpcore-nio" rev="4.4.9"/>
+		<dependency org="org.apache.httpcomponents" name="httpcore" rev="4.4.9"/>
+		<dependency org="org.apache.httpcomponents" name="httpclient" rev="4.5.5"/>
+
+		<dependency org="de.vandermeer" name="asciitable" rev="0.3.2"/>
+
+		<!--global exclusion -->
+		<exclude module="jmxtools" />
+		<exclude module="jms" />
+		<exclude module="jmxri" />
+		<exclude org="com.thoughtworks.xstream"/>
+
+	</dependencies>
+
+</ivy-module>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/nutch-cc/ivy/ivysettings.xml	Wed May 06 14:24:42 2020 +0100
@@ -0,0 +1,101 @@
+<ivysettings>
+
+ <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+  <property name="oss.sonatype.org" 
+    value="https://oss.sonatype.org/content/repositories/releases/" 
+    override="false"/>
+  <property name="repo.maven.org"
+    value="https://repo1.maven.org/maven2/"
+    override="false"/>
+  <property name="repository.apache.org"
+    value="https://repository.apache.org/content/repositories/snapshots/"
+    override="false"/>
+  <property name="maven2.pattern"
+    value="[organisation]/[module]/[revision]/[module]-[revision](-[classifier])"/>
+  <property name="maven2.pattern.ext"
+    value="${maven2.pattern}.[ext]"/>
+  <property name="maven2.pattern.local"
+    value="${user.home}/.m2/repository/[organisation]/[module]/[revision]/[module]-[revision](-[classifier]).[ext]"
+    override="false" />
+  <!-- pull in the local repository -->
+  <include url="${ivy.default.conf.dir}/ivyconf-local.xml"/>
+  <settings defaultResolver="default"/>
+  <resolvers>
+    <ibiblio name="maven2"
+      root="${repo.maven.org}"
+      pattern="${maven2.pattern.ext}"
+      m2compatible="true"
+      />
+    <ibiblio name="apache-snapshot"
+      root="${repository.apache.org}"
+      m2compatible="true" 
+      changingMatcher="regexp"
+      changingPattern=".*SNAPSHOT.*" 
+      checkmodified="true"
+      />
+    <ibiblio name="sonatype"
+      root="${oss.sonatype.org}"
+      pattern="${maven2.pattern.ext}"
+      m2compatible="true"
+      />
+    <ibiblio name="cloudera-repos"
+      root="https://repository.cloudera.com/artifactory/cloudera-repos/"
+      pattern="${maven2.pattern.ext}"
+      m2compatible="true"
+      />
+    <ibiblio name="maven2-local" m2compatible="true" 
+	     root="file://${user.home}/.m2/repository"
+	     pattern="${maven2.pattern.ext}"/>
+    <chain name="default" dual="true">
+      <resolver ref="local"/>
+      <resolver ref="maven2"/>
+      <resolver ref="apache-snapshot"/>
+      <resolver ref="sonatype"/>
+      <resolver ref="cloudera-repos"/>
+      <resolver ref="maven2-local"/>
+    </chain>
+    <chain name="internal">
+      <resolver ref="local"/>
+    </chain>
+    <chain name="internal-and-maven">
+      <resolver ref="local"/>
+      <resolver ref="maven2-local"/>
+      <resolver ref="maven2"/>
+      <resolver ref="apache-snapshot"/>
+      <resolver ref="sonatype"/>
+      <resolver ref="cloudera-repos"/>
+    </chain>
+    <chain name="external">
+      <resolver ref="maven2"/>
+    </chain>
+    <chain name="external-and-snapshots">
+      <resolver ref="maven2"/>
+      <resolver ref="apache-snapshot"/>
+      <resolver ref="sonatype"/>
+    </chain>
+  </resolvers>
+  <modules>
+    <!--
+    This forces a requirement for other nutch-artifacts to be built locally
+    rather than look for them online.
+    -->
+    <module organisation="org.apache.nutch" name=".*" resolver="internal"/>
+    <module organisation="org.commoncrawl" name=".*" resolver="internal-and-maven"/>
+    <module organisation="org.apache.tika" name="tika-core" resolver="internal-and-maven"/>
+  </modules>
+</ivysettings>