diff --git a/docker/Dockerfile b/docker/Dockerfile index 3eb556d..18eabd5 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,14 +1,16 @@ -# base image -FROM openjdk:11 +FROM openjdk:11-jre-slim # define spark and hadoop versions -ENV SPARK_VERSION=3.2.0 -ENV HADOOP_VERSION=3.3.1 +ENV SPARK_VERSION=3.3.2 +ENV HADOOP_VERSION=3.3.4 + +RUN apt-get -qq update \ + && apt-get -y install curl procps python3 # download and install hadoop RUN mkdir -p /opt && \ cd /opt && \ - curl http://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | \ + curl -f https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | \ tar -zx hadoop-${HADOOP_VERSION}/lib/native && \ ln -s hadoop-${HADOOP_VERSION} hadoop && \ echo Hadoop ${HADOOP_VERSION} native libraries installed in /opt/hadoop/lib/native @@ -16,9 +18,9 @@ RUN mkdir -p /opt && \ # download and install spark RUN mkdir -p /opt && \ cd /opt && \ - curl http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz | \ + curl -f https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz | \ tar -zx && \ - ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark && \ + ln -s spark-${SPARK_VERSION}-bin-hadoop3 spark && \ echo Spark ${SPARK_VERSION} installed in /opt # add scripts and update spark default config