FROM python:3.10-slim

# Install system dependencies
RUN apt-get update && apt-get install -y \
    git \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Install llama.cpp with server support
RUN pip install git+https://github.com/abetlen/llama-cpp-python.git --no-cache-dir

# Install Python dependencies for Gradio app
RUN pip install gradio requests --no-cache-dir

# Copy the Gradio app code
WORKDIR /app
COPY app.py .

# Download the GGUF model (replace with your model URL)
RUN mkdir -p /models && \
    wget -O /models/qwen3-14b-q4_k_xl.gguf https://huggingface.co/unsloth/Qwen3-14B-GGUF/resolve/main/qwen3-14b-q4_k_xl.gguf

# Expose ports for Gradio and llama.cpp server
EXPOSE 7860 8000

# Start llama.cpp server and Gradio app
CMD ["bash", "-c", "python -m llama_cpp.server --model /models/qwen3-14b-q4_k_xl.gguf --host 0.0.0.0 --port 8000 & python app.py"]