JuncheolK commited on
Commit
68b9fcd
ยท
verified ยท
1 Parent(s): 8653339

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +62 -0
  2. config.json +62 -0
  3. merges.txt +0 -0
  4. pytorch_model.bin +3 -0
  5. training.log +0 -0
  6. vocab.json +0 -0
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: "Qwen/Qwen3-0.6B"
4
+ tags:
5
+ - text-generation
6
+ - deepspeed
7
+ - fine-tuned
8
+ language:
9
+ - en
10
+ library_name: transformers
11
+ pipeline_tag: text-generation
12
+ ---
13
+
14
+ # Qwen3-0.6B-v0.1
15
+
16
+ DeepSpeed-Chat์œผ๋กœ ํŒŒ์ธํŠœ๋‹๋œ ์–ธ์–ด ๋ชจ๋ธ
17
+
18
+ ## Model Details
19
+
20
+ ์ด ๋ชจ๋ธ์€ DeepSpeed-Chat์„ ์‚ฌ์šฉํ•˜์—ฌ ํŒŒ์ธํŠœ๋‹๋œ ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค.
21
+
22
+ - **Base Model**: ๊ธฐ๋ณธ ๋ชจ๋ธ ์ •๋ณด๋ฅผ ์—ฌ๊ธฐ์— ์ถ”๊ฐ€ํ•˜์„ธ์š”
23
+ - **Fine-tuning Method**: DeepSpeed-Chat
24
+ - **Training Data**: ํ•™์Šต ๋ฐ์ดํ„ฐ ์ •๋ณด๋ฅผ ์—ฌ๊ธฐ์— ์ถ”๊ฐ€ํ•˜์„ธ์š”
25
+
26
+ ## Usage
27
+
28
+ ```python
29
+ from transformers import AutoTokenizer, AutoModelForCausalLM
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained("mncai/Qwen3-0.6B-v0.1")
32
+ model = AutoModelForCausalLM.from_pretrained("mncai/Qwen3-0.6B-v0.1")
33
+
34
+ # ํ…์ŠคํŠธ ์ƒ์„ฑ
35
+ input_text = "Your prompt here"
36
+ inputs = tokenizer(input_text, return_tensors="pt")
37
+ outputs = model.generate(**inputs, max_length=100)
38
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
+ ```
40
+
41
+ ## Training Details
42
+
43
+ - **Training Framework**: DeepSpeed
44
+ - **Training Script**: DeepSpeed-Chat Step 1 Supervised Fine-tuning
45
+ - **Upload Date**: N/A
46
+
47
+ ## Limitations and Biases
48
+
49
+ ์ด ๋ชจ๋ธ์˜ ํ•œ๊ณ„์ ๊ณผ ํŽธํ–ฅ์„ฑ์— ๋Œ€ํ•œ ์ •๋ณด๋ฅผ ์—ฌ๊ธฐ์— ์ถ”๊ฐ€ํ•˜์„ธ์š”.
50
+
51
+ ## Citation
52
+
53
+ DeepSpeed-Chat์„ ์‚ฌ์šฉํ–ˆ๋‹ค๋ฉด ๋‹ค์Œ์„ ์ธ์šฉํ•ด์ฃผ์„ธ์š”:
54
+
55
+ ```
56
+ @misc{deepspeed-chat,
57
+ title={DeepSpeed-Chat: Easy, Fast and Affordable RLHF Training of ChatGPT-like Models at All Scales},
58
+ author={Yuxiao Zhuang et al.},
59
+ year={2023},
60
+ url={https://github.com/microsoft/DeepSpeed}
61
+ }
62
+ ```
config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "end_token_id": 151645,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
+ "max_position_embeddings": 40960,
46
+ "max_window_layers": 28,
47
+ "model_type": "qwen3",
48
+ "num_attention_heads": 16,
49
+ "num_hidden_layers": 28,
50
+ "num_key_value_heads": 8,
51
+ "pad_token_id": 151645,
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_scaling": null,
54
+ "rope_theta": 1000000,
55
+ "sliding_window": null,
56
+ "tie_word_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.53.0",
59
+ "use_cache": true,
60
+ "use_sliding_window": false,
61
+ "vocab_size": 151672
62
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05568f391e8e2bb14dde542ba027cc2a37bbc80978ff922b7869b825e4d99358
3
+ size 1191623484
training.log ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff