File size: 921 Bytes
6455a8e 60ff592 c9f46e7 cb863c6 c519e63 f75aab0 c9f46e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
#!/bin/bash
export WORK="/home/user/app"
cd $WORK
echo "✅ Downloading llamacpp..."
wget -O llama_cpp.zip https://github.com/ggml-org/llama.cpp/releases/download/b6102/llama-b6102-bin-ubuntu-x64.zip > /dev/null 2>&1
unzip llama_cpp.zip > /dev/null 2>&1
echo "✅ Booting up llama server..."
wget -O model.gguf https://huggingface.co/lmstudio-community/Qwen3-4B-Thinking-2507-GGUF/resolve/main/Qwen3-4B-Thinking-2507-Q6_K.gguf?download=true > /dev/null 2>&1
# wget -O model.gguf https://huggingface.co/lmstudio-community/Qwen3-4B-Instruct-2507-GGUF/resolve/main/Qwen3-4B-Instruct-2507-Q6_K.gguf?download=true > /dev/null 2>&1
# wget -O model.gguf https://huggingface.co/unsloth/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-F16.gguf?download=true > /dev/null 2>&1
./build/bin/llama-server -m model.gguf --port 8000 --host 0.0.0.0 --threads 2 --ctx-size 4096 --mlock --jinja
echo "✅ llama server running on port 8000" |