anikifoss commited on
Commit
ad39954
·
verified ·
1 Parent(s): 4df8565

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +106 -3
README.md CHANGED
@@ -1,3 +1,106 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ quantized_by: anikifoss
3
+ pipeline_tag: text-generation
4
+ base_model: Qwen/Qwen3-235B-A22B-Thinking-2507
5
+ license: apache-2.0
6
+ license_link: LICENSE
7
+ base_model_relation: quantized
8
+ tags:
9
+ - conversational
10
+ ---
11
+
12
+ # Model Card
13
+
14
+ High quality quantization of **Qwen3-235B-A22B-Thinking-2507** without using imatrix.
15
+
16
+ # Run
17
+
18
+ ## ik_llama.cpp
19
+
20
+ ```
21
+ ./build/bin/llama-server \
22
+ --alias anikifoss/Qwen3-235B-A22B-Thinking-2507-DQ4_K \
23
+ --model /mnt/data/Models/anikifoss/Qwen3-235B-A22B-Thinking-2507-DQ4_K/Qwen3-235B-A22B-Thinking-2507-DQ4_K-00001-of-00003.gguf \
24
+ --no-mmap -rtr \
25
+ --temp 0.5 --top-k 0 --top-p 1.0 --min-p 0.1 --repeat-penalty 1.0 \
26
+ --ctx-size 101000 \
27
+ -ctk f16 -ctv f16 \
28
+ -fa \
29
+ -b 1024 -ub 1024 \
30
+ -fmoe \
31
+ --n-gpu-layers 99 \
32
+ --override-tensor exps=CPU \
33
+ --parallel 1 \
34
+ --threads 32 \
35
+ --threads-batch 64 \
36
+ --host 127.0.0.1 \
37
+ --port 8090
38
+ ```
39
+
40
+ ## llama.cpp
41
+
42
+ ```
43
+ ./build/bin/llama-server \
44
+ --alias anikifoss/Qwen3-235B-A22B-Thinking-2507-DQ4_K \
45
+ --model /mnt/data/Models/anikifoss/Qwen3-235B-A22B-Thinking-2507-DQ4_K/Qwen3-235B-A22B-Thinking-2507-DQ4_K-00001-of-00003.gguf \
46
+ --no-mmap \
47
+ --temp 0.5 --top-k 0 --top-p 1.0 --min-p 0.1 --repeat-penalty 1.0 \
48
+ --ctx-size 101000 \
49
+ -ctk f16 -ctv f16 \
50
+ -fa \
51
+ -b 1024 -ub 1024 \
52
+ --n-gpu-layers 99 \
53
+ --override-tensor exps=CPU \
54
+ --parallel 1 \
55
+ --threads 32 \
56
+ --threads-batch 64 \
57
+ --host 127.0.0.1 \
58
+ --port 8090
59
+ ```
60
+
61
+ ## Quantization Recipe
62
+ Quantized with [ik_llama](https://github.com/ikawrakow/ik_llama.cpp), but should work with any GGUF compatible inference framework.
63
+
64
+ ```bash
65
+ #!/usr/bin/env bash
66
+
67
+ custom="
68
+ # Token embedding and output tensors
69
+ output\.weight=bf16
70
+ output_norm\.weight=f32
71
+ token_embd\.weight=bf16
72
+
73
+ # Attention
74
+ blk\.[0-93]\.attn_k\.weight=q8_0
75
+ blk\.[0-93]\.attn_k_norm\.weight=f32
76
+ blk\.[0-93]\.attn_norm\.weight=f32
77
+ blk\.[0-93]\.attn_output\.weight=q8_0
78
+ blk\.[0-93]\.attn_q\.weight=q8_0
79
+ blk\.[0-93]\.attn_q_norm\.weight=f32
80
+ blk\.[0-93]\.attn_v\.weight=q8_0
81
+
82
+ # MoE
83
+ blk\.[0-93]\.ffn_down_exps\.weight=q6_K
84
+ blk\.[0-93]\.ffn_gate_exps\.weight=q4_K
85
+ blk\.[0-93]\.ffn_up_exps\.weight=q4_K
86
+
87
+ # Other
88
+ blk.[0-93].ffn_gate_inp.weight=f32
89
+ blk.[0-93].ffn_norm.weight=f32
90
+ "
91
+
92
+ custom=$(
93
+ echo "$custom" | grep -v '^#' | \
94
+ sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
95
+ )
96
+
97
+ echo "Running with: -custom-q $custom"
98
+
99
+ mkdir -p /mnt/data/Models/anikifoss/Qwen3-235B-A22B-Thinking-2507-DQ4_K
100
+ ./build/bin/llama-quantize \
101
+ --custom-q "$custom" \
102
+ /mnt/data/Models/Qwen/Qwen3-235B-A22B-Thinking-2507-GGUF/Qwen3-235B-A22B-Thinking-2507-BF16-00001-of-00010.gguf \
103
+ /mnt/data/Models/anikifoss/Qwen3-235B-A22B-Thinking-2507-DQ4_K/Qwen3-235B-A22B-Thinking-2507-DQ4_K.gguf \
104
+ Q4_K \
105
+ 32
106
+ ```