FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 ARG PYTORCH_VERSION=2.4.0 ARG PYTHON_VERSION=3.9 ARG CUDA_VERSION=12.1 ARG MAMBA_VERSION=24.3.0-0 ARG CUDA_CHANNEL=nvidia ARG INSTALL_CHANNEL=pytorch # Automatically set by buildx ARG TARGETPLATFORM #ENV HOME=/home/user \ # PATH=/home/user/.local/bin:/opt/conda/bin:$PATH ENV PATH=/opt/conda/bin:$PATH RUN mkdir -p .cache #RUN mkdir -p data # I'm not sure how to allow later python files used here to write to .cache without making it world-writeable. RUN chmod 777 -R .cache #RUN chmod 777 -R data RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ build-essential \ ca-certificates \ ccache \ curl \ python3 \ python3-pip \ git && \ rm -rf /var/lib/apt/lists/* # Install conda # translating Docker's TARGETPLATFORM into mamba arches RUN case ${TARGETPLATFORM} in \ "linux/arm64") MAMBA_ARCH=aarch64 ;; \ *) MAMBA_ARCH=x86_64 ;; \ esac && \ curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" RUN chmod +x ~/mambaforge.sh && \ bash ~/mambaforge.sh -b -p /opt/conda && \ rm ~/mambaforge.sh # Install pytorch # On arm64 we exit with an error code RUN case ${TARGETPLATFORM} in \ "linux/arm64") exit 1 ;; \ *) /opt/conda/bin/conda update -y conda && \ /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ esac && \ /opt/conda/bin/conda clean -ya COPY ./requirements.txt requirements.txt RUN pip install -r requirements.txt RUN git clone -b energy_star_dev https://github.com/huggingface/optimum-benchmark.git /optimum-benchmark && cd optimum-benchmark && pip install -e . COPY ./check_h100.py /check_h100.py COPY ./.cache /.cache COPY ./entrypoint.sh /entrypoint.sh COPY ./pause_space.py /pause_space.py COPY ./parse_requests.py /parse_requests.py COPY ./create_results.py /create_results.py COPY ./failed_run.py /failed_run.py COPY ./runs /runs COPY ./upload_run_folder.py /upload_run_folder.py COPY ./attempts.txt /attempts.txt COPY ./failed_attempts.txt /failed_attempts.txt RUN chmod 777 *.py RUN chmod 777 -R /runs RUN chmod 777 -R /.cache RUN chmod 777 /attempts.txt RUN chmod 777 /failed_attempts.txt RUN chmod +x /entrypoint.sh # Expose the secret DEBUG at buildtime and use its value as git remote URL RUN --mount=type=secret,id=DEBUG,mode=0444,required=true \ git init && \ git remote add origin $(cat /run/secrets/DEBUG) ENTRYPOINT ["/entrypoint.sh"]