FROM python:3.10-slim # Install system dependencies RUN apt-get update && apt-get install -y \ git \ build-essential \ && rm -rf /var/lib/apt/lists/* # Install llama.cpp with server support RUN pip install git+https://github.com/abetlen/llama-cpp-python.git --no-cache-dir # Install Python dependencies for Gradio app RUN pip install gradio requests --no-cache-dir # Copy the Gradio app code WORKDIR /app COPY app.py . # Download the GGUF model (replace with your model URL) RUN mkdir -p /models && \ wget -O /models/qwen3-14b-q4_k_xl.gguf https://huggingface.co/unsloth/Qwen3-14B-GGUF/resolve/main/qwen3-14b-q4_k_xl.gguf # Expose ports for Gradio and llama.cpp server EXPOSE 7860 8000 # Start llama.cpp server and Gradio app CMD ["bash", "-c", "python -m llama_cpp.server --model /models/qwen3-14b-q4_k_xl.gguf --host 0.0.0.0 --port 8000 & python app.py"]