| @echo off | |
| setlocal | |
| :: Create and change to the directory | |
| mkdir DeepSeek-V2-Chat.Q2_K.gguf | |
| cd DeepSeek-V2-Chat.Q2_K.gguf | |
| :: Download the GGUF files | |
| for %%i in (1 2 3 4 5) do ( | |
| powershell -command "Invoke-WebRequest -Uri 'https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf?download=true' -OutFile DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf" | |
| ) | |
| :: Download the llama.cpp binaries | |
| powershell -command "Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-avx2-x64.zip' -OutFile llama-b2961-bin-win-avx2-x64.zip" | |
| powershell -command "Expand-Archive -Path llama-b2961-bin-win-avx2-x64.zip -DestinationPath ." | |
| :: Check for GPU and download additional files if needed | |
| powershell -Command "if (Get-WmiObject -Query 'select * from Win32_VideoController where Description like "NVIDIA%"') { | |
| Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/cudart-llama-bin-win-cu11.7.1-x64.zip' -OutFile cudart-llama-bin-win-cu11.7.1-x64.zip | |
| Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-cuda-cu11.7.1-x64.zip' -OutFile llama-b2961-bin-win-cuda-cu11.7.1-x64.zip | |
| Expand-Archive -Path cudart-llama-bin-win-cu11.7.1-x64.zip -DestinationPath . | |
| Expand-Archive -Path llama-b2961-bin-win-cuda-cu11.7.1-x64.zip -DestinationPath . | |
| }" | |
| :: Execute the server command | |
| server.exe ^ | |
| -m DeepSeek-V2-Chat.q2_k.gguf ^ | |
| -c 4096 ^ | |
| -i ^ | |
| --mlock ^ | |
| --override-kv deepseek2.attention.q_lora_rank=int:1536 ^ | |
| --override-kv deepseek2.attention.kv_lora_rank=int:512 ^ | |
| --override-kv deepseek2.expert_shared_count=int:2 ^ | |
| --override-kv deepseek2.expert_feed_forward_length=int:1536 ^ | |
| --override-kv deepseek2.leading_dense_block_count=int:1 | |
| endlocal |