ARG FROM_TAG
FROM registry.gitlab.com/rychly-edu/docker/docker-spark:${FROM_TAG:-alpine-latest}

MAINTAINER Marek Rychly <marek.rychly@gmail.com>

# https://zeppelin.apache.org/download.html
ARG ZEPPELIN_VERSION=0.8.1

ARG DOWNLOAD_CACHE
#ARG APACHE_ORIG="http://www-eu.apache.org/dist"	# mirror disabled as it does not have the old versions, just the latest
ARG APACHE_ORIG="http://archive.apache.org/dist"
ARG APACHE_MIRROR="https://archive.apache.org/dist"

ENV ZEPPELIN_HOME="/opt/zeppelin"
ENV ZEPPELIN_CONF_DIR="${ZEPPELIN_HOME}/conf"
ENV \
ZEPPELIN_SITE_CONF="${ZEPPELIN_CONF_DIR}/zeppelin-site.xml" \
ZEPPELIN_ENV_CONF="${ZEPPELIN_CONF_DIR}/zeppelin-env.sh"

COPY scripts /

RUN true \
# make the scripts executable
&& chmod 755 /*.sh \
# bash: zeppelin shell scripts require BASH, they are not compatible with Busybox ASH/SH
# sed: scripts to set Zeppelin properties in files require GNU sed (the usage of busybox sed may result into incorrect outputs)
&& apk add --no-cache --update gnupg attr sed bash \
\
# download keys and trust them
&& ( [ -n "${DOWNLOAD_CACHE}" ] && cp -v "${DOWNLOAD_CACHE}/zeppelin.KEYS" /tmp \
	|| wget -O /tmp/zeppelin.KEYS "${APACHE_ORIG}/zeppelin/KEYS" ) \
&& gpg --import /tmp/zeppelin.KEYS \
&& echo "trust-model always" > ~/.gnupg/gpg.conf \
\
# download the package
&& ( [ -n "${DOWNLOAD_CACHE}" ] && cp -v "${DOWNLOAD_CACHE}/zeppelin-${ZEPPELIN_VERSION}-bin-netinst.tgz" /tmp \
	|| wget -O /tmp/zeppelin-${ZEPPELIN_VERSION}-bin-netinst.tgz "${APACHE_MIRROR}/zeppelin/zeppelin-${ZEPPELIN_VERSION}/zeppelin-${ZEPPELIN_VERSION}-bin-netinst.tgz" ) \
\
# download and verify signature
&& ( [ -n "${DOWNLOAD_CACHE}" ] && cp -v "${DOWNLOAD_CACHE}/zeppelin-${ZEPPELIN_VERSION}-bin-netinst.tgz.asc" /tmp \
	|| wget -O /tmp/zeppelin-${ZEPPELIN_VERSION}-bin-netinst.tgz.asc "${APACHE_ORIG}/zeppelin/zeppelin-${ZEPPELIN_VERSION}/zeppelin-${ZEPPELIN_VERSION}-bin-netinst.tgz.asc" ) \
&& for SIG in /tmp/*.asc; do gpg --verify "${SIG}" "${SIG%.asc}"; done \
\
# extract the package and remove garbage
&& mkdir -p "${ZEPPELIN_HOME}" \
&& tar -xzf "/tmp/zeppelin-${ZEPPELIN_VERSION}-bin-netinst.tgz" -C "${ZEPPELIN_HOME}" --strip-components 1 \
&& find "${ZEPPELIN_HOME}" -name '*.cmd' -delete \
\
# fix ISSUE: os::commit_memory failed; error=Operation not permitted
# (AUFS does not support xattr, so we need to set the flag once again after execution of the container in its entrypoint)
# https://en.wikibooks.org/wiki/Grsecurity/Application-specific_Settings#Java
&& setfattr -n user.pax.flags -v em "${JAVA_HOME}/bin/java" "${JAVA_HOME}/jre/bin/java" \
\
# create a PID directory
&& mkdir -p ${ZEPPELIN_HOME}/run \
\
# integrate with Java
&& echo '#!/bin/sh' > ${ZEPPELIN_ENV_CONF} \
&& echo "export JAVA_HOME=${JAVA_HOME}" >> ${ZEPPELIN_ENV_CONF} \
&& chmod 755 ${ZEPPELIN_ENV_CONF} \
\
# install all interpreters (a bash script, not compatible with a busybox shell)
&& ${ZEPPELIN_HOME}/bin/install-interpreter.sh --all \
\
# set up permissions
&& addgroup -S zeppelin \
&& adduser -h ${ZEPPELIN_HOME} -g "Apache Zeppelin" -s /bin/sh -G zeppelin -S -D -H zeppelin \
&& chown -R zeppelin:zeppelin ${ZEPPELIN_HOME} \
\
# set path for the shell
&& echo '#!/bin/sh' > /etc/profile.d/path-zeppelin.sh \
&& echo "export PATH=\"\${PATH}:${ZEPPELIN_HOME}/bin\"" >> /etc/profile.d/path-zeppelin.sh \
&& chmod 755 /etc/profile.d/path-zeppelin.sh \
\
# clean up
&& apk del gnupg \
&& rm -rf /tmp/* /var/tmp/* /var/cache/apk/*

ENTRYPOINT ["/entrypoint.sh"]

HEALTHCHECK CMD /healthcheck.sh
