# # syntax=docker/dockerfile:1.4 # ARG TARGETPLATFORM # ARG BUILDPLATFORM # # Other build arguments # ARG PYTHON_VERSION=3.10 # # Base stage with system dependencies # FROM python:${PYTHON_VERSION}-slim as base # # Declare ARG variables again within the build stage # ARG INSTALL_TYPE=basic # ARG ENABLE_GPU=false # # Platform-specific labels # LABEL maintainer="unclecode" # LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" # LABEL version="1.0" # # Environment setup # ENV PYTHONUNBUFFERED=1 \ # PYTHONDONTWRITEBYTECODE=1 \ # PIP_NO_CACHE_DIR=1 \ # PIP_DISABLE_PIP_VERSION_CHECK=1 \ # PIP_DEFAULT_TIMEOUT=100 \ # DEBIAN_FRONTEND=noninteractive # # Install system dependencies # RUN apt-get update && apt-get install -y --no-install-recommends \ # build-essential \ # curl \ # wget \ # gnupg \ # git \ # cmake \ # pkg-config \ # python3-dev \ # libjpeg-dev \ # libpng-dev \ # && rm -rf /var/lib/apt/lists/* # # Playwright system dependencies for Linux # RUN apt-get update && apt-get install -y --no-install-recommends \ # libglib2.0-0 \ # libnss3 \ # libnspr4 \ # libatk1.0-0 \ # libatk-bridge2.0-0 \ # libcups2 \ # libdrm2 \ # libdbus-1-3 \ # libxcb1 \ # libxkbcommon0 \ # libx11-6 \ # libxcomposite1 \ # libxdamage1 \ # libxext6 \ # libxfixes3 \ # libxrandr2 \ # libgbm1 \ # libpango-1.0-0 \ # libcairo2 \ # libasound2 \ # libatspi2.0-0 \ # && rm -rf /var/lib/apt/lists/* # # GPU support if enabled and architecture is supported # RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ # apt-get update && apt-get install -y --no-install-recommends \ # nvidia-cuda-toolkit \ # && rm -rf /var/lib/apt/lists/* ; \ # else \ # echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \ # fi # # Create and set working directory # WORKDIR /app # # Copy the entire project # COPY . . # # Install base requirements # RUN pip install --no-cache-dir -r requirements.txt # # Install required library for FastAPI # RUN pip install fastapi uvicorn psutil # # Install ML dependencies first for better layer caching # RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ # pip install --no-cache-dir \ # torch \ # torchvision \ # torchaudio \ # scikit-learn \ # nltk \ # transformers \ # tokenizers && \ # python -m nltk.downloader punkt stopwords ; \ # fi # # Install the package # RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ # pip install ".[all]" && \ # python -m crawl4ai.model_loader ; \ # elif [ "$INSTALL_TYPE" = "torch" ] ; then \ # pip install ".[torch]" ; \ # elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ # pip install ".[transformer]" && \ # python -m crawl4ai.model_loader ; \ # else \ # pip install "." ; \ # fi # # Install MkDocs and required plugins # RUN pip install --no-cache-dir \ # mkdocs \ # mkdocs-material \ # mkdocs-terminal \ # pymdown-extensions # # Build MkDocs documentation # RUN mkdocs build # # Install Playwright and browsers # RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ # playwright install chromium; \ # elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ # playwright install chromium; \ # fi # # Expose port # EXPOSE 8000 11235 9222 8080 # # Start the FastAPI server # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] # syntax=docker/dockerfile:1.4 # syntax=docker/dockerfile:1.4 ARG TARGETPLATFORM ARG BUILDPLATFORM # Other build arguments ARG PYTHON_VERSION=3.10 # Base stage with system dependencies FROM python:${PYTHON_VERSION}-slim as base # Declare ARG variables again within the build stage ARG INSTALL_TYPE=basic ARG ENABLE_GPU=false # Platform-specific labels LABEL maintainer="unclecode" LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" LABEL version="1.0" # Environment setup ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PIP_DEFAULT_TIMEOUT=100 \ DEBIAN_FRONTEND=noninteractive # Install system dependencies as root RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ curl \ wget \ gnupg \ git \ cmake \ pkg-config \ python3-dev \ libjpeg-dev \ libpng-dev \ && rm -rf /var/lib/apt/lists/* # Playwright system dependencies for Linux RUN apt-get update && apt-get install -y --no-install-recommends \ libglib2.0-0 \ libnss3 \ libnspr4 \ libatk1.0-0 \ libatk-bridge2.0-0 \ libcups2 \ libdrm2 \ libdbus-1-3 \ libxcb1 \ libxkbcommon0 \ libx11-6 \ libxcomposite1 \ libxdamage1 \ libxext6 \ libxfixes3 \ libxrandr2 \ libgbm1 \ libpango-1.0-0 \ libcairo2 \ libasound2 \ libatspi2.0-0 \ && rm -rf /var/lib/apt/lists/* # GPU support if enabled and architecture is supported RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ apt-get update && apt-get install -y --no-install-recommends \ nvidia-cuda-toolkit \ && rm -rf /var/lib/apt/lists/* ; \ else \ echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \ fi # Add a non-root user RUN useradd -m -u 1000 user USER user ENV PATH="/home/user/.local/bin:$PATH" # Create and set working directory WORKDIR /app # Copy the entire project with correct ownership COPY --chown=user . . # Install base requirements RUN pip install --no-cache-dir -r requirements.txt # Install required library for FastAPI RUN pip install fastapi uvicorn psutil # Install ML dependencies first for better layer caching RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ pip install --no-cache-dir \ torch \ torchvision \ torchaudio \ scikit-learn \ nltk \ transformers \ tokenizers && \ python -m nltk.downloader punkt stopwords ; \ fi # Install the package RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ pip install ".[all]" && \ python -m crawl4ai.model_loader ; \ elif [ "$INSTALL_TYPE" = "torch" ] ; then \ pip install ".[torch]" ; \ elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ pip install ".[transformer]" && \ python -m crawl4ai.model_loader ; \ else \ pip install "." ; \ fi # Install MkDocs and required plugins RUN pip install --no-cache-dir \ mkdocs \ mkdocs-material \ mkdocs-terminal \ pymdown-extensions # Build MkDocs documentation RUN mkdocs build # Install Playwright and browsers RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ playwright install chromium; \ elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ playwright install chromium; \ fi # Expose port EXPOSE 8000 11235 9222 8080 RUN python -m playwright install chromium # Start the FastAPI server CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"]