mntss mwhanna commited on
Commit
835f055
·
verified ·
0 Parent(s):

Duplicate from mwhanna/qwen3-4b-transcoders

Browse files

Co-authored-by: Michael Hanna <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
layer_0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f57f1cc3292492a73733192adfdb122d7e361a5e797327f363646a352a95ff5
3
+ size 1678054736
layer_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aafc41c7486aa31a700707b6a9bef3c9a2dfb158ccd8f69cd63c9d5ef0a94396
3
+ size 1678054736
layer_10.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0a8f99900efe0c7e5310fcea60a74655172e0cd3c5d29a8b78ca0468ea85bf8
3
+ size 1678054736
layer_11.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a82abfcbc1e915bf90303171590487e2e7f97bc110165c3aef5a6a64e31b50d
3
+ size 1678054736
layer_12.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ae9f81f1f1985ca3a0b2b7341db2e686988a407fca04d3795c92975efeb584
3
+ size 1678054736
layer_13.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d632107b61559ded3306214b55bc6e77afc5fa570d8c8c31421b75ddde275a9b
3
+ size 1678054736
layer_14.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0655c96cf7b556398214c08cf18a3e72d3cb7cee3bcc24d43de6208e5433ec1
3
+ size 1678054736
layer_15.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2da70628fba42012d86df97714560957d03ca6ed40e053f1a728500a36a30ea
3
+ size 1678054736
layer_16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78285e291b0cc3df7a7eff18f2a32cca64630d8b5210318ac8e4774d694b7dd7
3
+ size 1678054736
layer_17.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce85f9315a0ef68285d44ee5581527651cf358a97abc5e33c8d5148933929bb
3
+ size 1678054736
layer_18.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d970cd59bafcbace2efbcc5cb95e06b1bf5e57b906bd17d84a04ef7ad872db0
3
+ size 1678054736
layer_19.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dc12824ae21b2106d00e50b450bcf590fd75857498acff18813d98f947c6db9
3
+ size 1678054736
layer_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc5818da3b1e855707b7de8430467e05662885f40fd8b895dbed8fdf28aaa3e
3
+ size 1678054736
layer_20.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:576119742b830846ec9cb7c53311f7dfc0f58adffb9dbcbeb0a501d2b1d8c2dd
3
+ size 1678054736
layer_21.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d013a31692a263e6beb7c574e79e8169b27b365a622d1efbb4082d0467353c93
3
+ size 1678054736
layer_22.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8102a91559b5334b08b7b4cb84dadbfea108a73890f78c0fdd7f95285b4f99
3
+ size 1678054736
layer_23.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1747f36f31a956f31e6ea03c70b1b4a9ac7efbee52643e87095dc92521ad217
3
+ size 1678054736
layer_24.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a84bc9d36f6e3b21d596a98e30fab83dbb9011d736bc6a3206a942f4cc3289
3
+ size 1678054736
layer_25.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371ba57f9e04bdc42f5e24a0840cdce8791562626028af49bf917f2ef868f2e3
3
+ size 1678054736
layer_26.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f51d80b017591d1c6b5b3c24e535888ab05c2524d911dc0a43fc69bfb6fc3667
3
+ size 1678054736
layer_27.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6229944269113980e8a4a5c58182ed81a601463591d9519d2fcb7cd5e44dc02
3
+ size 1678054736
layer_28.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:786dda24ee88348833e264de9a2109d4bed676b9184a39d01b0f402a76409418
3
+ size 1678054736
layer_29.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90818b782d6c49623889423fe0a5856b585cc577f31fabee13ff5ec77740b2f9
3
+ size 1678054736
layer_3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ceddf0e5524ecb11b73fbee9f831aaf8a1cca86f5c19ee874b8e0638404f9e
3
+ size 1678054736
layer_30.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1d2801420e644323a79d36ab900e14912e44db0493b9ba34d69992f09d597a
3
+ size 1678054736
layer_31.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff6fcf9ae067206049726a798ef16a29939f97843e95d465a654a0e1ba5b63e
3
+ size 1678054736
layer_32.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:367531b9152ed6ae75fc14f3414005f6a682f4d06bd9b4ca7fda338b60eb22e7
3
+ size 1678054736
layer_33.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542899ee0098a278a2b8bb16a1dd6b45d2d65b688d2f505f9bb1e1eba5d026b5
3
+ size 1678054736
layer_34.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66263fbfbc6eb447bada4311cf1b5cb3389f990894ae69a5ba8ba332c003c85
3
+ size 1678054736
layer_35.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b36d6232c2e8816fd97a4f404faf77e9f393d55a4da882cfcf37bc59c3ca2f
3
+ size 1678054736
layer_4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac83291a8bc07516288dbe2ec356dd99e79b0622e7ecc8f3df0c7d337b9f850e
3
+ size 1678054736
layer_5.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba78a2c4633e91755bb247517476a2d578edd75021ca95d844063a575a7ec47
3
+ size 1678054736
layer_6.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73077054c45dad48872c1b84f3b148a64fc0a1be05d12e78c4b1ad118722f29a
3
+ size 1678054736
layer_7.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d2e1f2f348db31ed5b1684cd7399d1dc69be25c0c107fcd4a488906eeef600
3
+ size 1678054736
layer_8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18acd2e45e52e9616b518eeea4db986718868daec496f40619e1cd9118af63c4
3
+ size 1678054736
layer_9.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f29f08ee4835c28e907ca75ee92c3502bde8e5a829dcada8e51921642c5328fb
3
+ size 1678054736
wanb-config.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.20.1
4
+ m:
5
+ - "1": gpu/memory_allocated_gb
6
+ "6":
7
+ - 3
8
+ "7": []
9
+ - "1": gpu/memory_reserved_gb
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ - "1": gpu/max_memory_allocated_gb
14
+ "6":
15
+ - 3
16
+ "7": []
17
+ python_version: 3.11.10
18
+ t:
19
+ "1":
20
+ - 1
21
+ - 11
22
+ - 49
23
+ - 71
24
+ "2":
25
+ - 1
26
+ - 11
27
+ - 49
28
+ - 71
29
+ "3":
30
+ - 7
31
+ - 13
32
+ - 16
33
+ - 55
34
+ - 61
35
+ "4": 3.11.10
36
+ "5": 0.20.1
37
+ "6": 4.52.4
38
+ "12": 0.20.1
39
+ "13": linux-x86_64
40
+ act_fn:
41
+ value: relu
42
+ batch_size:
43
+ value: 8192
44
+ before_ln:
45
+ value: false
46
+ c_coeff:
47
+ value: 4
48
+ cooldown_start_frac:
49
+ value: 0.8
50
+ d_feature:
51
+ value: 163840
52
+ d_model:
53
+ value: 2560
54
+ device:
55
+ value: cuda:0
56
+ initial_lr:
57
+ value: 0.0002
58
+ layer_idx:
59
+ value: 0
60
+ lr:
61
+ value: 0.0002
62
+ min_lr_ratio:
63
+ value: 0
64
+ model_name:
65
+ value: Qwen/Qwen3-4B
66
+ model_type:
67
+ value: qwen
68
+ n_batches:
69
+ value: 152
70
+ n_grad_steps:
71
+ value: 4
72
+ n_steps:
73
+ value: 122070
74
+ preact_coeff:
75
+ value: 6e-05
76
+ skip_connections:
77
+ value: false
78
+ sparsity_coeff_final:
79
+ value: 8
80
+ x_scale:
81
+ value: 1
82
+ y_scale:
83
+ value: 1