Skip to content

Commit 464d2e3

Browse files
committed
Spark and Hadoop versions updated, openjdk:11-jre-slim as base image,
faster downloads
1 parent 75fcaa3 commit 464d2e3

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

docker/Dockerfile

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,26 @@
1-
# base image
2-
FROM openjdk:11
1+
FROM openjdk:11-jre-slim
32

43
# define spark and hadoop versions
5-
ENV SPARK_VERSION=3.2.0
6-
ENV HADOOP_VERSION=3.3.1
4+
ENV SPARK_VERSION=3.3.2
5+
ENV HADOOP_VERSION=3.3.4
6+
7+
RUN apt-get -qq update \
8+
&& apt-get -y install curl procps python3
79

810
# download and install hadoop
911
RUN mkdir -p /opt && \
1012
cd /opt && \
11-
curl http://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | \
13+
curl -f https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | \
1214
tar -zx hadoop-${HADOOP_VERSION}/lib/native && \
1315
ln -s hadoop-${HADOOP_VERSION} hadoop && \
1416
echo Hadoop ${HADOOP_VERSION} native libraries installed in /opt/hadoop/lib/native
1517

1618
# download and install spark
1719
RUN mkdir -p /opt && \
1820
cd /opt && \
19-
curl http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz | \
21+
curl -f https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz | \
2022
tar -zx && \
21-
ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark && \
23+
ln -s spark-${SPARK_VERSION}-bin-hadoop3 spark && \
2224
echo Spark ${SPARK_VERSION} installed in /opt
2325

2426
# add scripts and update spark default config

0 commit comments

Comments
 (0)