changeset 191:b5904d0bdfd4

orig, more or less
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 25 Sep 2024 09:49:12 +0100
parents f2bf736c2d40
children 4275eb6484da
files src/nutch-cc/build.xml
diffstat 1 files changed, 1204 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/nutch-cc/build.xml	Wed Sep 25 09:49:12 2024 +0100
@@ -0,0 +1,1204 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="${name}" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
+
+  <!-- Load all the default properties, and any the user wants    -->
+  <!-- to contribute (without having to type -D or edit this file -->
+  <property file="${user.home}/build.properties" />
+  <property file="${basedir}/build.properties" />
+  <property file="${basedir}/default.properties" />
+  <property name="release.dir" value="${build.dir}/release"/>
+
+  <!-- define Maven coordinates, repository url and artifacts name etc -->
+  <property name="groupId" value="org.apache.nutch" />
+  <property name="artifactId" value="nutch" />
+  <property name="maven-repository-url" value="https://repository.apache.org/service/local/staging/deploy/maven2" />
+  <property name="maven-repository-id" value="apache.releases.https" />
+  <property name="maven-jar" value="${release.dir}/${artifactId}-${version}.jar" />
+  <property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" />
+  <property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" />
+
+
+  <!-- the normal classpath -->
+  <path id="classpath">
+    <pathelement location="${build.classes}"/>
+    <fileset dir="${build.lib.dir}">
+      <include name="*.jar" />
+    </fileset>
+  </path>
+
+  <dirname property="plugins.classpath.dir" file="${build.plugins}"/>
+
+  <!-- the unit test classpath -->
+  <path id="test.classpath">
+    <pathelement location="${test.build.classes}" />
+    <pathelement location="${conf.dir}"/>
+    <pathelement location="${test.src.dir}"/>
+    <pathelement location="${plugins.classpath.dir}"/>
+    <path refid="classpath"/>
+    <pathelement location="${build.dir}/${final.name}.job" />
+    <fileset dir="${build.lib.dir}">
+      <include name="*.jar" />
+    </fileset>
+    <fileset dir="${test.build.lib.dir}">
+      <include name="*.jar" />
+    </fileset>
+  </path>
+
+  <presetdef name="javac">
+    <javac includeantruntime="false" />
+  </presetdef>
+
+  <!-- ====================================================== -->
+  <!-- Stuff needed by all targets                            -->
+  <!-- ====================================================== -->
+  <target name="init" depends="ivy-init" description="--> stuff required by all targets">
+    <mkdir dir="${build.dir}"/>
+    <mkdir dir="${build.classes}"/>
+    <mkdir dir="${release.dir}"/>
+
+    <mkdir dir="${test.build.dir}"/>
+    <mkdir dir="${test.build.classes}"/>
+    <mkdir dir="${test.build.lib.dir}"/>
+
+    <touch datetime="01/25/1971 2:00 pm">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+    </touch>
+
+    <copy todir="${conf.dir}" verbose="true">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+      <mapper type="glob" from="*.template" to="*"/>
+    </copy>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Compile the Java files                                 -->
+  <!-- ====================================================== -->
+  <target name="compile" depends="compile-core, compile-plugins" description="--> compile all Java files"/>
+
+  <target name="compile-core" depends="init, resolve-default" description="--> compile core Java files only">
+    <javac
+     encoding="${build.encoding}"
+     srcdir="${src.dir}"
+     includes="org/apache/nutch/**/*.java org/commoncrawl/**/*.java"
+     destdir="${build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg value="-Xlint:-path"/>
+      <classpath refid="classpath"/>
+    </javac>
+    <copy todir="${build.classes}">
+      <fileset dir="${src.dir}" includes="**/*.html" />
+      <fileset dir="${src.dir}" includes="**/*.css" />
+      <fileset dir="${src.dir}" includes="**/*.properties" />
+    </copy>
+  </target>
+
+  <target name="compile-plugins" depends="init, resolve-default" description="--> compile plugins only">
+    <ant dir="src/plugin" target="deploy" inheritAll="false"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make nutch.jar                                                     -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar" depends="compile-core" description="--> make nutch.jar">
+    <copy file="${conf.dir}/nutch-default.xml"
+          todir="${build.classes}"/>
+    <copy file="${conf.dir}/nutch-site.xml"
+          todir="${build.classes}"/>
+    <jar jarfile="${build.dir}/${final.name}.jar"
+         basedir="${build.classes}">
+      <manifest>
+      </manifest>
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make Maven Central Release                                         -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="release" depends="compile-core" description="--> generate the release distribution">
+    <copy file="${conf.dir}/nutch-default.xml"
+          todir="${build.classes}"/>
+    <copy file="${conf.dir}/nutch-site.xml"
+          todir="${build.classes}"/>
+
+  <!-- build the main artifact -->
+  <jar jarfile="${maven-jar}" basedir="${build.classes}" />
+
+    <fail message="Unsupported Java version: ${java.version}. Javadoc requires Java version 7u25 or greater. See https://issues.apache.org/jira/browse/NUTCH-1590">
+      <condition>
+        <or>
+          <matches string="${java.version}" pattern="1.7.0_2[01234].+" casesensitive="false" />
+          <matches string="${java.version}" pattern="1.7.0_1.+" casesensitive="false" />
+          <equals arg1="${ant.java.version}" arg2="1.6" />
+          <equals arg1="${ant.java.version}" arg2="1.5" />
+        </or>
+      </condition>
+    </fail>
+
+    <!-- build the javadoc artifact -->
+    <javadoc
+      destdir="${release.dir}/javadoc"
+      overview="${src.dir}/overview.html"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${name} ${version} API"
+      doctitle="${name} ${version} API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      >
+      <arg value="${javadoc.proxy.host}"/>
+      <arg value="${javadoc.proxy.port}"/>
+
+      <packageset dir="${src.dir}"/>
+      <packageset dir="${plugins.dir}/any23/src/java/" />
+      <packageset dir="${plugins.dir}/creativecommons/src/java"/>
+      <packageset dir="${plugins.dir}/feed/src/java"/>
+      <packageset dir="${plugins.dir}/headings/src/java"/>
+      <packageset dir="${plugins.dir}/exchange-jexl/src/java"/>
+      <packageset dir="${plugins.dir}/index-anchor/src/java"/>
+      <packageset dir="${plugins.dir}/index-basic/src/java"/>
+      <packageset dir="${plugins.dir}/index-geoip/src/java"/>
+      <packageset dir="${plugins.dir}/index-jexl-filter/src/java"/>
+      <packageset dir="${plugins.dir}/index-links/src/java"/>
+      <packageset dir="${plugins.dir}/index-metadata/src/java"/>
+      <packageset dir="${plugins.dir}/index-more/src/java"/>
+      <packageset dir="${plugins.dir}/index-replace/src/java"/>
+      <packageset dir="${plugins.dir}/index-static/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-cloudsearch/src/java/" />
+      <packageset dir="${plugins.dir}/indexer-csv/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-dummy/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-elastic-rest/src/java/"/>
+      <packageset dir="${plugins.dir}/indexer-elastic/src/java/" />
+      <packageset dir="${plugins.dir}/indexer-kafka/src/java/" />
+      <packageset dir="${plugins.dir}/indexer-rabbit/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-solr/src/java"/>
+      <packageset dir="${plugins.dir}/language-identifier/src/java"/>
+      <packageset dir="${plugins.dir}/lib-htmlunit/src/java"/>
+      <packageset dir="${plugins.dir}/lib-http/src/java"/>
+      <packageset dir="${plugins.dir}/lib-rabbitmq/src/java"/>
+      <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
+      <packageset dir="${plugins.dir}/lib-selenium/src/java"/>
+      <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
+      <packageset dir="${plugins.dir}/mimetype-filter/src/java"/>
+      <packageset dir="${plugins.dir}/parse-ext/src/java"/>
+      <packageset dir="${plugins.dir}/parse-html/src/java"/>
+      <packageset dir="${plugins.dir}/parse-js/src/java"/>
+      <packageset dir="${plugins.dir}/parse-metatags/src/java"/>
+      <packageset dir="${plugins.dir}/parse-swf/src/java"/>
+      <packageset dir="${plugins.dir}/parse-tika/src/java"/>
+      <packageset dir="${plugins.dir}/parse-zip/src/java"/>
+      <packageset dir="${plugins.dir}/parsefilter-naivebayes/src/java"/>
+      <packageset dir="${plugins.dir}/parsefilter-regex/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-file/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-htmlunit/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-http/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-interactiveselenium/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-okhttp/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-selenium/src/java"/>
+      <packageset dir="${plugins.dir}/publish-rabbitmq/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-adaptive/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-depth/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-link/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-orphan/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-similarity/src/java"/>
+      <packageset dir="${plugins.dir}/subcollection/src/java"/>
+      <packageset dir="${plugins.dir}/tld/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
+      <packageset dir="${plugins.dir}/urlmeta/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-ajax/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-protocol/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-slash/src/java"/>
+
+      <link href="${javadoc.link.java}"/>
+      <link href="${javadoc.link.hadoop}"/>
+      <link href="${javadoc.link.lucene.core}"/>
+      <link href="${javadoc.link.lucene.analyzers-common}"/>
+      <link href="${javadoc.link.solr-solrj}"/>
+
+      <classpath refid="classpath"/>
+      <classpath>
+        <fileset dir="${plugins.dir}" >
+          <include name="**/*.jar"/>
+        </fileset>
+      </classpath>
+
+      <group title="Core" packages="org.apache.nutch.*"/>
+      <group title="Plugins API" packages="${plugins.api}"/>
+      <group title="Protocol Plugins" packages="${plugins.protocol}"/>
+      <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+      <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
+      <group title="Scoring Plugins" packages="${plugins.scoring}"/>
+      <group title="Parse Plugins" packages="${plugins.parse}"/>
+      <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
+      <group title="Publisher Plugins" packages="${plugins.publisher}"/>
+      <group title="Exchange Plugins" packages="${plugins.exchange}"/>
+      <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
+      <group title="Indexer Plugins" packages="${plugins.indexer}"/>
+      <group title="Misc. Plugins" packages="${plugins.misc}"/>
+    </javadoc>
+    <jar jarfile="${maven-javadoc-jar}">
+      <fileset dir="${release.dir}/javadoc" />
+    </jar>
+
+    <!-- build the sources artifact -->
+    <jar jarfile="${maven-sources-jar}">
+      <fileset dir="${src.dir}" />
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Deploy to Apache Nexus                                             -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="deploy" depends="release" description="--> deploy to Apache Nexus">
+
+  <!-- generate a pom file -->
+  <ivy:makepom ivyfile="${ivy.file}" pomfile="${basedir}/pom.xml" templatefile="ivy/mvn.template">
+     <mapping conf="default" scope="compile"/>
+     <mapping conf="runtime" scope="runtime"/>
+  </ivy:makepom>
+
+  <!-- sign and deploy the main artifact -->
+  <artifact:mvn>
+    <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" />
+    <arg value="-Durl=${maven-repository-url}" />
+    <arg value="-DrepositoryId=${maven-repository-id}" />
+    <arg value="-DpomFile=pom.xml" />
+    <arg value="-Dfile=${maven-jar}" />
+    <arg value="-Papache-release" />
+  </artifact:mvn>
+
+  <!-- sign and deploy the sources artifact -->
+  <artifact:mvn>
+    <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" />
+    <arg value="-Durl=${maven-repository-url}" />
+    <arg value="-DrepositoryId=${maven-repository-id}" />
+    <arg value="-DpomFile=pom.xml" />
+    <arg value="-Dfile=${maven-sources-jar}" />
+    <arg value="-Dclassifier=sources" />
+    <arg value="-Papache-release" />
+  </artifact:mvn>
+
+  <!-- sign and deploy the javadoc artifact -->
+  <artifact:mvn>
+    <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" />
+    <arg value="-Durl=${maven-repository-url}" />
+    <arg value="-DrepositoryId=${maven-repository-id}" />
+    <arg value="-DpomFile=pom.xml" />
+    <arg value="-Dfile=${maven-javadoc-jar}" />
+    <arg value="-Dclassifier=javadoc" />
+    <arg value="-Papache-release" />
+  </artifact:mvn>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Generate REST API Documentation with Miredot                       -->
+  <!-- ================================================================== -->
+  <target name="restdocs" description="--> generate REST API Documentation with Miredot">
+
+    <!-- generate a pom file -->
+    <ivy:makepom ivyfile="${ivy.file}" pomfile="${basedir}/pom.xml" templatefile="ivy/mvn.template">
+      <mapping conf="default" scope="compile"/>
+      <mapping conf="runtime" scope="runtime"/>
+    </ivy:makepom>
+
+    <!--artifact:dependencies pathId="dependency.classpath">
+      <dependency groupId="log4j" artifactId="log4j" version="1.2.15" >
+        <exclusion groupId="javax.jms" artifactId="jms" />
+        <exclusion groupId="com.sun.jdmk" artifactId="jmxtools" />
+        <exclusion groupId="com.sun.jmx" artifactId="jmxri" />
+      </dependency>
+    </artifact:dependencies-->
+
+    <artifact:mvn>
+      <arg value="test"/>
+      <arg value="-e"/>
+      <arg value="-o"/>
+      <!-- run offline (-o): must not download dependencies as this is
+           done from http://repo1.maven.org/ hardwired in
+           maven-ant-tasks-2.1.3.jar, see NUTCH-2722.
+
+           Dependencies and plugins need to be resolved and cached locally beforehand
+           by running
+             `mvn dependency:resolve`
+           resp.
+             `mvn dependency:resolve-plugins`
+           after the pom.xml has been generated. -->
+    </artifact:mvn>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make job jar                                                       -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="job" depends="compile" description="--> make nutch.job jar">
+    <jar jarfile="${build.dir}/${final.name}.job">
+      <!-- If the build.classes has the nutch config files because the jar
+           command command has run, exclude them.  The conf directory has
+           them.
+      -->
+      <zipfileset dir="${build.classes}"
+                  excludes="nutch-default.xml,nutch-site.xml"/>
+      <zipfileset dir="${conf.dir}" excludes="*.template,hadoop*.*"/>
+      <zipfileset dir="${build.lib.dir}" prefix="lib"
+                  includes="**/*.jar" excludes="hadoop-*.jar,slf4j*.jar,log4j*.jar"/>
+      <zipfileset dir="${build.plugins}" prefix="classes/plugins"/>
+    </jar>
+  </target>
+
+  <target name="runtime" depends="jar, job" description="--> default target for running Nutch">
+    <mkdir dir="${runtime.dir}"/>
+    <mkdir dir="${runtime.local}"/>
+    <mkdir dir="${runtime.deploy}"/>
+    <!-- deploy area -->
+    <copy file="${build.dir}/${final.name}.job"
+          todir="${runtime.deploy}"/>
+    <copy todir="${runtime.deploy}/bin">
+      <fileset dir="src/bin"/>
+    </copy>
+    <chmod perm="ugo+x" type="file">
+        <fileset dir="${runtime.deploy}/bin"/>
+    </chmod>
+    <!-- local area -->
+    <copy file="${build.dir}/${final.name}.jar"
+          todir="${runtime.local}/lib"/>
+    <copy todir="${runtime.local}/lib/native">
+      <fileset dir="lib/native"/>
+    </copy>
+    <copy todir="${runtime.local}/conf">
+      <fileset dir="${conf.dir}" excludes="*.template"/>
+    </copy>
+    <copy todir="${runtime.local}/bin">
+      <fileset dir="src/bin"/>
+    </copy>
+    <chmod perm="ugo+x" type="file">
+        <fileset dir="${runtime.local}/bin"/>
+    </chmod>
+    <copy todir="${runtime.local}/lib">
+      <fileset dir="${build.dir}/lib"/>
+    </copy>
+    <copy todir="${runtime.local}/plugins">
+      <fileset dir="${build.dir}/plugins"/>
+    </copy>
+    <copy todir="${runtime.local}/test">
+      <fileset dir="${build.dir}/test"/>
+    </copy>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Compile test code                                                  -->
+  <!-- ================================================================== -->
+  <target name="compile-core-test" depends="init, compile-core, resolve-test" description="--> compile test code">
+    <javac
+     encoding="${build.encoding}"
+     srcdir="${test.src.dir}"
+     includes="org/apache/nutch/**/*.java org/commoncrawl/**/*.java"
+     destdir="${test.build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg value="-Xlint:-path"/>
+      <classpath refid="test.classpath"/>
+    </javac>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run Nutch proxy                                                    -->
+  <!-- ================================================================== -->
+
+  <target name="proxy" depends="compile-core-test, job" description="--> run nutch proxy">
+    <java classname="org.apache.nutch.tools.proxy.ProxyTestbed" fork="true">
+      <classpath refid="test.classpath"/>
+      <arg value="-fake"/>
+<!--
+      <arg value="-delay"/>
+      <arg value="-200"/>
+-->
+      <jvmarg line="-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+    </java>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run Nutch benchmarking analysis                                    -->
+  <!-- ================================================================== -->
+
+  <target name="benchmark" description="--> run nutch benchmarking analysis">
+    <java classname="org.apache.nutch.tools.Benchmark" fork="true">
+      <classpath refid="test.classpath"/>
+      <jvmarg line="-Xmx512m -Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+      <arg value="-maxPerHost"/>
+      <arg value="10"/>
+      <arg value="-seeds"/>
+      <arg value="1"/>
+      <arg value="-depth"/>
+      <arg value="5"/>
+    </java>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     -->
+  <!-- ================================================================== -->
+  <target name="test" depends="test-core, test-plugins" description="--> run JUnit tests"/>
+
+  <target name="test-core" depends="compile-core-test, job" description="--> run core JUnit tests only">
+
+    <delete dir="${test.build.data}"/>
+    <mkdir dir="${test.build.data}"/>
+    <!--
+     copy resources needed in junit tests
+    -->
+    <copy todir="${test.build.data}">
+      <fileset dir="src/testresources" includes="**/*"/>
+    </copy>
+
+    <copy file="${test.src.dir}/log4j.properties"
+          todir="${test.build.classes}"/>
+
+    <copy file="${test.src.dir}/crawl-tests.xml"
+        todir="${test.build.classes}"/>
+
+    <copy file="${test.src.dir}/domain-urlfilter.txt"
+        todir="${test.build.classes}"/>
+
+    <copy file="${test.src.dir}/filter-all.txt"
+        todir="${test.build.classes}"/>
+
+    <junit printsummary="yes" haltonfailure="no" fork="yes"
+      forkmode="once" dir="${basedir}"
+      errorProperty="tests.failed" failureProperty="tests.failed"
+      maxmemory="1000m">
+      <sysproperty key="test.build.data" value="${test.build.data}"/>
+      <sysproperty key="test.src.dir" value="${test.src.dir}"/>
+      <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+      <classpath refid="test.classpath"/>
+      <formatter type="${test.junit.output.format}" />
+      <batchtest todir="${test.build.dir}" unless="testcase">
+        <fileset dir="${test.src.dir}"
+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+      </batchtest>
+      <batchtest todir="${test.build.dir}" if="testcase">
+        <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
+      </batchtest>
+    </junit>
+
+    <fail if="tests.failed">Tests failed!</fail>
+
+  </target>
+
+  <target name="test-plugins" depends="resolve-test, compile" description="--> run plugin JUnit tests only">
+    <ant dir="src/plugin" target="test" inheritAll="false"/>
+  </target>
+
+  <target name="test-plugin" depends="resolve-test, compile" description="--> run a single plugin's JUnit tests">
+    <ant dir="src/plugin" target="test-single" inheritAll="false"/>
+  </target>
+
+  <target name="nightly" depends="test, tar-src, zip-src" description="--> run the nightly target build">
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Ivy targets                                                        -->
+  <!-- ================================================================== -->
+
+  <!-- target: resolve  ================================================= -->
+  <target name="resolve-default" depends="clean-default-lib, init" description="--> resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="${ivy.file}" conf="default" log="download-only"/>
+    <ivy:retrieve pattern="${build.lib.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+    <antcall target="copy-libs"/>
+  </target>
+
+  <target name="resolve-test" depends="clean-test-lib, init" description="--> resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="${ivy.file}" conf="test" log="download-only"/>
+    <ivy:retrieve pattern="${test.build.lib.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+    <antcall target="copy-libs"/>
+  </target>
+
+  <target name="copy-libs" description="--> copy the libs in lib, which are not ivy enabled">
+    <!-- copy the libs in lib, which are not ivy enabled-->
+    <copy todir="${build.lib.dir}/" failonerror="false">
+      <fileset dir="${lib.dir}" includes="**/*.jar"/>
+    </copy>
+  </target>
+
+  <!-- target: publish-local  =========================================== -->
+  <target name="publish-local" depends="jar" description="--> publish this project in the local ivy repository">
+    <ivy:publish artifactspattern="${build.dir}/[artifact]-${version}.[ext]"
+                    resolver="local"
+                    pubrevision="${version}"
+          pubdate="${now}"
+                    status="integration"
+              forcedeliver="true"
+              overwrite="true"
+        />
+    <echo message="project ${ant.project.name} published locally with version ${version}" />
+  </target>
+
+  <!-- target: report  ================================================== -->
+  <target name="report" depends="resolve-test" description="--> generates a report of dependencies">
+    <ivy:report todir="${build.dir}" xml="true"/>
+  </target>
+
+  <!-- target: 3rd-party licenses report  =============================== -->
+  <target name="report-licenses" depends="resolve-default" description="--> generates a report of licenses of dependencies">
+    <ivy:report todir="${build.dir}" xml="false" graph="false" xslfile="ivy/ivy-report-license.xsl"
+                outputpattern="[organisation]-[module]-[conf]-3rd-party-licenses.tsv"/>
+  </target>
+
+  <!-- target: ivy-init  ================================================ -->
+  <target name="ivy-init" depends="ivy-probe-antlib, ivy-init-antlib" description="--> initialise Ivy settings">
+    <ivy:settings file="${ivy.dir}/ivysettings.xml" />
+  </target>
+
+  <!-- target: ivy-probe-antlib  ======================================== -->
+  <target name="ivy-probe-antlib" description="--> probe the antlib library">
+    <condition property="ivy.found">
+      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" />
+    </condition>
+  </target>
+
+  <!-- target: ivy-download  ============================================ -->
+  <target name="ivy-download" description="--> download ivy">
+    <available file="${ivy.jar}" property="ivy.jar.found"/>
+    <antcall target="ivy-download-unchecked"/>
+  </target>
+
+  <!-- target: ivy-download-unchecked  ================================== -->
+  <target name="ivy-download-unchecked" unless="ivy.jar.found" description="--> fetch any ivy file">
+    <get src="${ivy.repo.url}" dest="${ivy.jar}" usetimestamp="true" />
+  </target>
+
+  <!-- target: ivy-init-antlib  ========================================= -->
+  <target name="ivy-init-antlib" depends="ivy-download" unless="ivy.found" description="--> attempt to use Ivy with Antlib">
+    <typedef uri="antlib:org.apache.ivy.ant" onerror="fail" loaderRef="ivyLoader">
+      <classpath>
+        <pathelement location="${ivy.jar}" />
+      </classpath>
+    </typedef>
+    <fail>
+      <condition>
+        <not>
+          <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" />
+        </not>
+      </condition>
+      You need Apache Ivy 2.0 or later from http://ant.apache.org/
+      It could not be loaded from ${ivy.repo.url}
+    </fail>
+  </target>
+
+  <!-- Check dependencies for security vulnerabilities                                    -->
+  <!-- requires installation of OWASP dependency check tool, see                          -->
+  <!--   https://jeremylong.github.io/DependencyCheck/dependency-check-ant/index.html     -->
+  <!-- get http://dl.bintray.com/jeremy-long/owasp/dependency-check-ant-3.3.2-release.zip -->
+  <!-- and unzip in directory ./ivy/                                                      -->
+  <property name="dependency-check.home" value="${ivy.dir}/dependency-check-ant/"/>
+  <path id="dependency-check.path">
+    <pathelement location="${dependency-check.home}/dependency-check-ant.jar"/>
+    <fileset dir="${dependency-check.home}/lib" erroronmissingdir="false">
+      <include name="*.jar"/>
+    </fileset>
+  </path>
+  <taskdef resource="dependency-check-taskdefs.properties" onerror="ignore">
+    <classpath refid="dependency-check.path" />
+  </taskdef>
+  <target name="report-vulnerabilities" description="--> check dependencies for security vulnerabilities">
+    <dependency-check projectname="${name}"
+                      reportoutputdirectory="${build.dir}"
+                      reportformat="ALL">
+        <suppressionfile path="${dependency-check.home}/dependency-check-suppressions.xml" />
+        <retirejsFilter regex="copyright.*jeremy long" />
+        <fileset dir="${build.dir}">
+          <include name="lib/*.jar"/>
+          <include name="plugins/*/*.jar"/>
+        </fileset>
+    </dependency-check>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Documentation                                                      -->
+  <!-- ================================================================== -->
+  <target name="javadoc" depends="compile" description="--> generate Javadoc">
+    <fail message="Unsupported Java version: ${java.version}. Javadoc requires Java version 7u25 or greater. See https://issues.apache.org/jira/browse/NUTCH-1590">
+      <condition>
+        <or>
+          <matches string="${java.version}" pattern="1.7.0_2[01234].+" casesensitive="false" />
+          <matches string="${java.version}" pattern="1.7.0_1.+" casesensitive="false" />
+          <equals arg1="${ant.java.version}" arg2="1.6" />
+          <equals arg1="${ant.java.version}" arg2="1.5" />
+        </or>
+      </condition>
+    </fail>
+    <mkdir dir="${build.javadoc}"/>
+    <javadoc
+      overview="${src.dir}/overview.html"
+      destdir="${build.javadoc}"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${name} ${version} API"
+      doctitle="${name} ${version} API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      >
+      <arg value="${javadoc.proxy.host}"/>
+      <arg value="${javadoc.proxy.port}"/>
+
+      <packageset dir="${src.dir}"/>
+      <packageset dir="${plugins.dir}/any23/src/java/" />
+      <packageset dir="${plugins.dir}/creativecommons/src/java"/>
+      <packageset dir="${plugins.dir}/feed/src/java"/>
+      <packageset dir="${plugins.dir}/headings/src/java"/>
+      <packageset dir="${plugins.dir}/exchange-jexl/src/java"/>
+      <packageset dir="${plugins.dir}/index-anchor/src/java"/>
+      <packageset dir="${plugins.dir}/index-basic/src/java"/>
+      <packageset dir="${plugins.dir}/index-geoip/src/java"/>
+      <packageset dir="${plugins.dir}/index-jexl-filter/src/java"/>
+      <packageset dir="${plugins.dir}/index-links/src/java"/>
+      <packageset dir="${plugins.dir}/index-metadata/src/java"/>
+      <packageset dir="${plugins.dir}/index-more/src/java"/>
+      <packageset dir="${plugins.dir}/index-replace/src/java"/>
+      <packageset dir="${plugins.dir}/index-static/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-cloudsearch/src/java/" />
+      <packageset dir="${plugins.dir}/indexer-csv/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-dummy/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-elastic-rest/src/java/"/>
+      <packageset dir="${plugins.dir}/indexer-elastic/src/java/" />
+      <packageset dir="${plugins.dir}/indexer-kafka/src/java/" />
+      <packageset dir="${plugins.dir}/indexer-rabbit/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-solr/src/java"/>
+      <packageset dir="${plugins.dir}/language-identifier/src/java"/>
+      <packageset dir="${plugins.dir}/lib-htmlunit/src/java"/>
+      <packageset dir="${plugins.dir}/lib-http/src/java"/>
+      <packageset dir="${plugins.dir}/lib-rabbitmq/src/java"/>
+      <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
+      <packageset dir="${plugins.dir}/lib-selenium/src/java"/>
+      <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
+      <packageset dir="${plugins.dir}/mimetype-filter/src/java"/>
+      <packageset dir="${plugins.dir}/parse-ext/src/java"/>
+      <packageset dir="${plugins.dir}/parse-html/src/java"/>
+      <packageset dir="${plugins.dir}/parse-js/src/java"/>
+      <packageset dir="${plugins.dir}/parse-metatags/src/java"/>
+      <packageset dir="${plugins.dir}/parse-swf/src/java"/>
+      <packageset dir="${plugins.dir}/parse-tika/src/java"/>
+      <packageset dir="${plugins.dir}/parse-zip/src/java"/>
+      <packageset dir="${plugins.dir}/parsefilter-naivebayes/src/java"/>
+      <packageset dir="${plugins.dir}/parsefilter-regex/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-file/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-htmlunit/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-http/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-interactiveselenium/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-okhttp/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-selenium/src/java"/>
+      <packageset dir="${plugins.dir}/publish-rabbitmq/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-adaptive/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-depth/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-link/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-orphan/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-similarity/src/java"/>
+      <packageset dir="${plugins.dir}/subcollection/src/java"/>
+      <packageset dir="${plugins.dir}/tld/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
+      <packageset dir="${plugins.dir}/urlmeta/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-ajax/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-protocol/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-slash/src/java"/>
+
+      <link href="${javadoc.link.java}"/>
+      <link href="${javadoc.link.hadoop}"/>
+      <link href="${javadoc.link.lucene.core}"/>
+      <link href="${javadoc.link.lucene.analyzers-common}"/>
+      <link href="${javadoc.link.solr-solrj}"/>
+
+      <classpath refid="classpath"/>
+      <classpath>
+        <fileset dir="${plugins.dir}" >
+          <include name="**/*.jar"/>
+        </fileset>
+      </classpath>
+
+      <group title="Core" packages="org.apache.nutch.*"/>
+      <group title="Plugins API" packages="${plugins.api}"/>
+      <group title="Protocol Plugins" packages="${plugins.protocol}"/>
+      <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+      <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
+      <group title="Scoring Plugins" packages="${plugins.scoring}"/>
+      <group title="Parse Plugins" packages="${plugins.parse}"/>
+      <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
+      <group title="Publisher Plugins" packages="${plugins.publisher}"/>
+      <group title="Exchange Plugins" packages="${plugins.exchange}"/>
+      <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
+      <group title="Indexer Plugins" packages="${plugins.indexer}"/>
+      <group title="Misc. Plugins" packages="${plugins.misc}"/>
+    </javadoc>
+    <!-- Copy the plugin.dtd file to the plugin doc-files dir -->
+    <copy file="${plugins.dir}/plugin.dtd"
+          todir="${build.javadoc}/org/apache/nutch/plugin/doc-files"/>
+  </target>
+
+  <target name="default-doc" description="--> generate default Nutch documentation">
+    <style basedir="${conf.dir}" destdir="${docs.dir}"
+           includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>
+  </target>
+
+    <!-- ================================================================== -->
+  <!-- D I S T R I B U T I O N                                            -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="package-src" depends="runtime, javadoc" description="--> generate source distribution package">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${src.dist.version.dir}"/>
+    <mkdir dir="${src.dist.version.dir}/lib"/>
+    <mkdir dir="${src.dist.version.dir}/docs"/>
+    <mkdir dir="${src.dist.version.dir}/docs/api"/>
+    <mkdir dir="${src.dist.version.dir}/ivy"/>
+
+    <copy todir="${src.dist.version.dir}/lib" includeEmptyDirs="false">
+      <fileset dir="lib"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/conf">
+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/docs/api">
+      <fileset dir="${build.javadoc}"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}">
+      <fileset dir=".">
+        <include name="*.txt" />
+        <!--<include name="KEYS" />-->
+      </fileset>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/src" includeEmptyDirs="true">
+      <fileset dir="src"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/ivy" includeEmptyDirs="true">
+      <fileset dir="ivy"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/" file="build.xml"/>
+    <copy todir="${src.dist.version.dir}/" file="default.properties"/>
+
+  </target>
+
+ <target name="package-bin" depends="runtime, javadoc" description="--> generate binary distribution package">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${bin.dist.version.dir}"/>
+    <mkdir dir="${bin.dist.version.dir}/lib"/>
+    <mkdir dir="${bin.dist.version.dir}/bin"/>
+    <mkdir dir="${bin.dist.version.dir}/conf"/>
+    <mkdir dir="${bin.dist.version.dir}/docs"/>
+    <mkdir dir="${bin.dist.version.dir}/docs/api"/>
+    <mkdir dir="${bin.dist.version.dir}/plugins"/>
+
+    <copy todir="${bin.dist.version.dir}/lib" includeEmptyDirs="false">
+      <fileset dir="runtime/local/lib"/>
+    </copy>
+
+    <copy todir="${bin.dist.version.dir}/bin">
+      <fileset dir="runtime/local/bin"/>
+    </copy>
+
+    <chmod perm="ugo+x" type="file">
+        <fileset dir="${bin.dist.version.dir}/bin"/>
+    </chmod>
+
+    <copy todir="${bin.dist.version.dir}/conf">
+      <fileset dir="runtime/local/conf" excludes="**/*.template"/>
+    </copy>
+
+    <copy todir="${bin.dist.version.dir}/docs/api">
+      <fileset dir="${build.javadoc}"/>
+    </copy>
+
+    <copy todir="${bin.dist.version.dir}">
+      <fileset dir=".">
+        <include name="*.txt" />
+      </fileset>
+    </copy>
+
+    <copy todir="${bin.dist.version.dir}/plugins" includeEmptyDirs="true">
+      <fileset dir="runtime/local/plugins"/>
+    </copy>
+
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make src release tarball                                               -->
+  <!-- ================================================================== -->
+  <target name="tar-src" depends="package-src" description="--> generate src.tar.gz distribution package">
+    <tar compression="gzip" longfile="gnu"
+      destfile="${src.dist.version.dir}.tar.gz">
+      <tarfileset dir="${src.dist.version.dir}" mode="664" prefix="${final.name}">
+        <exclude name="src/bin/*" />
+        <include name="**" />
+      </tarfileset>
+      <tarfileset dir="${src.dist.version.dir}" mode="755" prefix="${final.name}">
+        <include name="src/bin/*" />
+      </tarfileset>
+    </tar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make bin release tarball                                               -->
+  <!-- ================================================================== -->
+  <target name="tar-bin" depends="package-bin" description="--> generate bin.tar.gz distribution package">
+    <tar compression="gzip" longfile="gnu"
+      destfile="${bin.dist.version.dir}.tar.gz">
+      <tarfileset dir="${bin.dist.version.dir}" mode="664" prefix="${final.name}">
+        <exclude name="bin/*" />
+        <include name="**" />
+      </tarfileset>
+      <tarfileset dir="${bin.dist.version.dir}" mode="755" prefix="${final.name}">
+        <include name="bin/*" />
+      </tarfileset>
+    </tar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make src release zip                                               -->
+  <!-- ================================================================== -->
+  <target name="zip-src" depends="package-src" description="--> generate src.zip distribution package">
+   <zip compress="true" casesensitive="yes"
+     destfile="${src.dist.version.dir}.zip">
+   <zipfileset dir="${src.dist.version.dir}" filemode="664" prefix="${final.name}">
+       <exclude name="src/bin/*" />
+       <include name="**" />
+   </zipfileset>
+   <zipfileset dir="${src.dist.version.dir}" filemode="755" prefix="${final.name}">
+       <include name="src/bin/*" />
+   </zipfileset>
+   </zip>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make bin release zip                                               -->
+  <!-- ================================================================== -->
+  <target name="zip-bin" depends="package-bin" description="--> generate bin.zip distribution package">
+   <zip compress="true" casesensitive="yes"
+     destfile="${bin.dist.version.dir}.zip">
+   <zipfileset dir="${bin.dist.version.dir}" filemode="664" prefix="${final.name}">
+       <exclude name="bin/*" />
+       <include name="**" />
+   </zipfileset>
+   <zipfileset dir="${bin.dist.version.dir}" filemode="755" prefix="${final.name}">
+       <include name="bin/*" />
+   </zipfileset>
+   </zip>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Clean.  Delete the build files, and their directories              -->
+  <!-- ================================================================== -->
+
+  <!-- target: clean  =================================================== -->
+  <target name="clean" depends="clean-build, clean-lib, clean-dist, clean-runtime" description="--> clean the project" />
+
+  <!-- target: clean-eclipse ============================================ -->
+  <target name="clean-eclipse" depends="clean-build, clean-lib, clean-dist" description="--> cleans the eclipse project"/>
+
+  <!-- target: clean-local  ============================================= -->
+  <target name="clean-local" depends=""
+                description="--> cleans the local repository for the current module">
+    <delete dir="${ivy.local.default.root}/${ivy.organisation}/${ivy.module}"/>
+  </target>
+
+  <!-- target: clean-lib  =============================================== -->
+  <target name="clean-lib" depends="clean-default-lib, clean-test-lib"
+          description="--> clean the project libraries directories (dependencies: default + test)">
+  </target>
+  <!-- target: clean-default-lib  =============================================== -->
+  <target name="clean-default-lib" description="--> clean the project libraries directory (dependencies)">
+   <delete includeemptydirs="true" dir="${build.lib.dir}"/>
+  </target>
+  <!-- target: clean-test-lib  =============================================== -->
+  <target name="clean-test-lib" description="--> clean the project test libraries directory (dependencies)">
+    <delete includeemptydirs="true" dir="${test.build.lib.dir}"/>
+  </target>
+
+  <!-- target: clean-build  ============================================= -->
+  <target name="clean-build" description="--> clean the project built files">
+    <delete includeemptydirs="true" dir="${build.dir}"/>
+  </target>
+
+  <!-- target: clean-dist   ============================================= -->
+  <target name="clean-dist" description="--> clean the project dist files">
+    <delete includeemptydirs="true" dir="${dist.dir}"/>
+  </target>
+
+  <!-- target: clean-cache  ============================================= -->
+  <target name="clean-cache" depends=""
+                        description="--> delete ivy cache">
+    <ivy:cleancache />
+  </target>
+
+  <target name="clean-runtime" description="--> clean the project runtime area">
+    <delete includeemptydirs="true" dir="${runtime.dir}"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- RAT targets                                                        -->
+  <!-- ================================================================== -->
+  <target name="rat-sources-typedef" description="--> run RAT antlib task">
+    <typedef resource="org/apache/rat/anttasks/antlib.xml" >
+      <classpath>
+        <fileset dir="." includes="rat*.jar"/>
+      </classpath>
+    </typedef>
+  </target>
+
+  <target name="rat-sources" depends="rat-sources-typedef"
+    description="--> runs RAT tasks over src/java">
+    <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
+      <fileset dir="src">
+        <include name="java/**/*"/>
+        <include name="plugin/**/src/**/*"/>
+      </fileset>
+    </rat:report>
+  </target>
+
+
+  <!-- ================================================================== -->
+  <!-- Eclipse targets                                                    -->
+  <!-- ================================================================== -->
+
+  <!-- classpath for generating eclipse project -->
+  <path id="eclipse.classpath">
+    <fileset dir="${build.lib.dir}">
+      <include name="*.jar" />
+      <exclude name="ant-eclipse-1.0-jvm1.2.jar" />
+    </fileset>
+    <fileset dir="${build.plugins}">
+      <include name="**/*.jar" />
+    </fileset>
+    <fileset dir="${test.build.lib.dir}">
+      <include name="*.jar" />
+    </fileset>
+  </path>
+
+  <!-- target: ant-eclipse-download   =================================== -->
+  <target name="ant-eclipse-download" description="--> downloads the ant-eclipse binary.">
+    <get src="https://downloads.sourceforge.net/project/ant-eclipse/ant-eclipse/1.0/ant-eclipse-1.0.bin.tar.bz2"
+         dest="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" usetimestamp="false" />
+
+    <untar src="${build.dir}/ant-eclipse-1.0.bin.tar.bz2"
+           dest="${build.dir}" compression="bzip2">
+      <patternset>
+        <include name="lib/ant-eclipse-1.0-jvm1.2.jar"/>
+      </patternset>
+    </untar>
+
+    <delete file="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" />
+  </target>
+
+  <!-- target: eclipse   ================================================ -->
+  <target name="eclipse"
+          depends="clean-eclipse,init,resolve-test,job,ant-eclipse-download"
+          description="--> create eclipse project files">
+
+    <pathconvert property="eclipse.project">
+      <path path="${basedir}"/>
+      <regexpmapper from="^.*/([^/]+)$$" to="\1" handledirsep="yes"/>
+    </pathconvert>
+
+    <taskdef name="eclipse"
+             classname="prantl.ant.eclipse.EclipseTask"
+             classpath="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar" />
+    <eclipse updatealways="true">
+      <project name="${eclipse.project}" />
+      <classpath>
+        <library path="${conf.dir}" exported="false" />
+        <library path="${basedir}/src/bin" exported="false" />
+        <library pathref="eclipse.classpath" exported="false" />
+
+        <source path="${basedir}/src/java/" />
+        <source path="${basedir}/src/test/" output="build/test/classes" />
+
+        <source path="${plugins.dir}/any23/src/java/" />
+        <source path="${plugins.dir}/any23/src/test/" />
+        <source path="${plugins.dir}/creativecommons/src/java/" />
+        <source path="${plugins.dir}/creativecommons/src/test/" />
+        <source path="${plugins.dir}/feed/src/java/" />
+        <source path="${plugins.dir}/feed/src/test/" />
+        <source path="${plugins.dir}/headings/src/java/" />
+        <source path="${plugins.dir}/headings/src/test/" />
+        <source path="${plugins.dir}/exchange-jexl/src/java/" />
+        <source path="${plugins.dir}/index-anchor/src/java/" />
+        <source path="${plugins.dir}/index-anchor/src/test/" />
+        <source path="${plugins.dir}/index-basic/src/java/" />
+        <source path="${plugins.dir}/index-basic/src/test/" />
+        <source path="${plugins.dir}/index-geoip/src/java/" />
+        <source path="${plugins.dir}/index-jexl-filter/src/java/" />
+        <source path="${plugins.dir}/index-jexl-filter/src/test/" />
+        <source path="${plugins.dir}/index-links/src/java/" />
+        <source path="${plugins.dir}/index-links/src/test/" />
+        <source path="${plugins.dir}/index-metadata/src/java/" />
+        <source path="${plugins.dir}/index-more/src/java/" />
+        <source path="${plugins.dir}/index-more/src/test/" />
+        <source path="${plugins.dir}/index-replace/src/java/" />
+        <source path="${plugins.dir}/index-replace/src/test/" />
+        <source path="${plugins.dir}/index-static/src/java/" />
+        <source path="${plugins.dir}/index-static/src/test/" />
+        <source path="${plugins.dir}/indexer-cloudsearch/src/java/" />
+        <source path="${plugins.dir}/indexer-csv/src/java"/>
+        <source path="${plugins.dir}/indexer-csv/src/test"/>
+        <source path="${plugins.dir}/indexer-dummy/src/java/" />
+        <source path="${plugins.dir}/indexer-elastic-rest/src/java/"/>
+        <source path="${plugins.dir}/indexer-elastic/src/java/" />
+        <source path="${plugins.dir}/indexer-kafka/src/java/" />
+        <source path="${plugins.dir}/indexer-rabbit/src/java/" />
+        <source path="${plugins.dir}/indexer-solr/src/java/" />
+        <source path="${plugins.dir}/language-identifier/src/java/" />
+        <source path="${plugins.dir}/language-identifier/src/test/" />
+        <source path="${plugins.dir}/lib-htmlunit/src/java/" />
+        <source path="${plugins.dir}/lib-http/src/java/" />
+        <source path="${plugins.dir}/lib-http/src/test/" />
+        <source path="${plugins.dir}/lib-rabbitmq/src/java/" />
+        <source path="${plugins.dir}/lib-regex-filter/src/java/" />
+        <source path="${plugins.dir}/lib-regex-filter/src/test/" />
+        <source path="${plugins.dir}/lib-selenium/src/java/" />
+        <source path="${plugins.dir}/microformats-reltag/src/java/" />
+        <source path="${plugins.dir}/mimetype-filter/src/java/" />
+        <source path="${plugins.dir}/mimetype-filter/src/test/" />
+        <source path="${plugins.dir}/parse-ext/src/java/" />
+        <source path="${plugins.dir}/parse-ext/src/test/" />
+        <source path="${plugins.dir}/parse-html/src/java/" />
+        <source path="${plugins.dir}/parse-html/src/test/" />
+        <source path="${plugins.dir}/parse-js/src/java/" />
+        <source path="${plugins.dir}/parse-js/src/test/" />
+        <source path="${plugins.dir}/parse-metatags/src/java/" />
+        <source path="${plugins.dir}/parse-metatags/src/test/" />
+        <source path="${plugins.dir}/parse-swf/src/java/" />
+        <source path="${plugins.dir}/parse-swf/src/test/" />
+        <source path="${plugins.dir}/parse-tika/src/java/" />
+        <source path="${plugins.dir}/parse-tika/src/test/" />
+        <source path="${plugins.dir}/parse-zip/src/java/" />
+        <source path="${plugins.dir}/parse-zip/src/test/" />
+        <source path="${plugins.dir}/parsefilter-naivebayes/src/java/" />
+        <source path="${plugins.dir}/parsefilter-regex/src/java/" />
+        <source path="${plugins.dir}/parsefilter-regex/src/test/" />
+        <source path="${plugins.dir}/protocol-file/src/java/" />
+        <source path="${plugins.dir}/protocol-file/src/test/" />
+        <source path="${plugins.dir}/protocol-ftp/src/java/" />
+        <source path="${plugins.dir}/protocol-htmlunit/src/java/" />
+        <source path="${plugins.dir}/protocol-http/src/java/" />
+        <source path="${plugins.dir}/protocol-http/src/test/" />
+        <source path="${plugins.dir}/protocol-httpclient/src/java/" />
+        <source path="${plugins.dir}/protocol-httpclient/src/test/" />
+        <source path="${plugins.dir}/protocol-interactiveselenium/src/java/" />
+        <source path="${plugins.dir}/protocol-okhttp/src/java/" />
+        <source path="${plugins.dir}/protocol-okhttp/src/test/" />
+        <source path="${plugins.dir}/protocol-selenium/src/java"/>
+        <source path="${plugins.dir}/publish-rabbitmq/src/java"/>
+        <source path="${plugins.dir}/scoring-adaptive/src/java"/>
+        <source path="${plugins.dir}/scoring-depth/src/java/" />
+        <source path="${plugins.dir}/scoring-link/src/java/" />
+        <source path="${plugins.dir}/scoring-opic/src/java/" />
+        <source path="${plugins.dir}/scoring-orphan/src/java"/>
+        <source path="${plugins.dir}/scoring-orphan/src/test"/>
+        <source path="${plugins.dir}/scoring-similarity/src/java/" />
+        <source path="${plugins.dir}/subcollection/src/java/" />
+        <source path="${plugins.dir}/subcollection/src/test/" />
+        <source path="${plugins.dir}/tld/src/java/" />
+        <source path="${plugins.dir}/urlfilter-automaton/src/java/" />
+        <source path="${plugins.dir}/urlfilter-automaton/src/test/" />
+        <source path="${plugins.dir}/urlfilter-domain/src/java/" />
+        <source path="${plugins.dir}/urlfilter-domain/src/test/" />
+        <source path="${plugins.dir}/urlfilter-domainblacklist/src/java/" />
+        <source path="${plugins.dir}/urlfilter-domainblacklist/src/test/" />
+        <source path="${plugins.dir}/urlfilter-fast/src/java/"/>
+        <source path="${plugins.dir}/urlfilter-fast/src/test/"/>
+        <source path="${plugins.dir}/urlfilter-ignoreexempt/src/java/" />
+        <source path="${plugins.dir}/urlfilter-prefix/src/java/" />
+        <source path="${plugins.dir}/urlfilter-prefix/src/test/" />
+        <source path="${plugins.dir}/urlfilter-regex/src/java/" />
+        <source path="${plugins.dir}/urlfilter-regex/src/test/" />
+        <source path="${plugins.dir}/urlfilter-suffix/src/java/" />
+        <source path="${plugins.dir}/urlfilter-suffix/src/test/" />
+        <source path="${plugins.dir}/urlfilter-validator/src/java/" />
+        <source path="${plugins.dir}/urlfilter-validator/src/test/" />
+        <source path="${plugins.dir}/urlmeta/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-ajax/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-ajax/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-basic/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-basic/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-host/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-host/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-pass/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-pass/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-protocol/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-protocol/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-querystring/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-querystring/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-regex/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-regex/src/test/" />
+        <source path="${plugins.dir}/urlnormalizer-slash/src/java/" />
+        <source path="${plugins.dir}/urlnormalizer-slash/src/test/" />
+
+        <output path="${build.classes}" />
+      </classpath>
+    </eclipse>
+  </target>
+
+</project>