yujiepan commited on
Commit
e9d97d8
·
verified ·
1 Parent(s): 2f0b6fa

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "meta-llama/Llama-2-7b-hf",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "max_position_embeddings": 4096,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 32,
18
+ "num_key_value_heads": 32,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float16",
25
+ "transformers_version": "4.39.3",
26
+ "use_cache": true,
27
+ "vocab_size": 32000
28
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "do_sample": true,
4
+ "eos_token_id": 2,
5
+ "max_length": 4096,
6
+ "pad_token_id": 0,
7
+ "temperature": 0.6,
8
+ "top_p": 0.9,
9
+ "transformers_version": "4.39.3"
10
+ }
log.log ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/62 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (8987 > 4096). Running this sequence through the model will result in indexing errors
 
1
  2%|▏ | 1/62 [00:06<06:11, 6.09s/it]
2
  3%|▎ | 2/62 [00:06<02:58, 2.98s/it]
3
  5%|▍ | 3/62 [00:10<03:25, 3.48s/it]
4
  6%|▋ | 4/62 [00:17<04:22, 4.53s/it]
5
  8%|▊ | 5/62 [00:21<04:09, 4.38s/it]
6
  10%|▉ | 6/62 [00:27<04:39, 4.99s/it]
7
  11%|█▏ | 7/62 [00:28<03:23, 3.69s/it]
8
  13%|█▎ | 8/62 [00:29<02:34, 2.86s/it]
9
  15%|█▍ | 9/62 [00:33<02:52, 3.25s/it]
10
  16%|█▌ | 10/62 [00:37<03:02, 3.51s/it]
11
  18%|█▊ | 11/62 [00:41<03:08, 3.70s/it]
12
  19%|█▉ | 12/62 [00:42<02:25, 2.91s/it]
13
  21%|██ | 13/62 [00:51<03:42, 4.53s/it]
14
  23%|██▎ | 14/62 [00:53<03:01, 3.78s/it]
15
  24%|██▍ | 15/62 [01:01<04:00, 5.12s/it]
16
  26%|██▌ | 16/62 [01:02<02:58, 3.89s/it]
17
  27%|██▋ | 17/62 [01:03<02:12, 2.94s/it]
18
  29%|██▉ | 18/62 [01:09<02:52, 3.92s/it]
19
  31%|███ | 19/62 [01:09<02:04, 2.89s/it]
20
  32%|███▏ | 20/62 [01:10<01:33, 2.24s/it]
21
  34%|███▍ | 21/62 [01:11<01:20, 1.97s/it]
22
  35%|███▌ | 22/62 [01:12<01:03, 1.58s/it]
23
  37%|███▋ | 23/62 [01:14<01:06, 1.70s/it]
24
  39%|███▊ | 24/62 [01:15<00:53, 1.40s/it]
25
  40%|████ | 25/62 [01:19<01:21, 2.20s/it]
26
  42%|████▏ | 26/62 [01:21<01:15, 2.11s/it]
27
  44%|████▎ | 27/62 [01:25<01:34, 2.71s/it]
28
  45%|████▌ | 28/62 [01:27<01:21, 2.39s/it]
29
  47%|████▋ | 29/62 [01:27<01:02, 1.89s/it]
30
  48%|████▊ | 30/62 [01:28<00:49, 1.56s/it]
31
  50%|█████ | 31/62 [01:34<01:31, 2.94s/it]
32
  52%|█████▏ | 32/62 [01:35<01:05, 2.20s/it]
33
  53%|█████▎ | 33/62 [01:35<00:47, 1.65s/it]
34
  55%|█████▍ | 34/62 [01:37<00:46, 1.65s/it]
35
  56%|█████▋ | 35/62 [01:45<01:37, 3.62s/it]
36
  58%|█████▊ | 36/62 [01:46<01:13, 2.82s/it]
37
  60%|█████▉ | 37/62 [01:47<00:54, 2.17s/it]
38
  61%|██████▏ | 38/62 [01:53<01:20, 3.37s/it]
39
  63%|██████▎ | 39/62 [01:54<01:04, 2.78s/it]
40
  65%|██████▍ | 40/62 [01:58<01:09, 3.17s/it]
41
  66%|██████▌ | 41/62 [02:02<01:12, 3.46s/it]
42
  68%|██████▊ | 42/62 [02:03<00:52, 2.61s/it]
43
  69%|██████▉ | 43/62 [02:04<00:43, 2.28s/it]
44
  71%|███████ | 44/62 [02:05<00:33, 1.86s/it]
45
  73%|███████▎ | 45/62 [02:09<00:43, 2.53s/it]
46
  74%|███████▍ | 46/62 [02:11<00:36, 2.26s/it]
47
  76%|███████▌ | 47/62 [02:12<00:27, 1.81s/it]
48
  77%|███████▋ | 48/62 [02:22<01:00, 4.35s/it]
49
  79%|███████▉ | 49/62 [02:24<00:46, 3.54s/it]
50
  81%|████████ | 50/62 [02:30<00:52, 4.34s/it]
51
  82%|████████▏ | 51/62 [02:31<00:35, 3.25s/it]
52
  84%|████████▍ | 52/62 [02:35<00:35, 3.50s/it]
53
  85%|████████▌ | 53/62 [02:43<00:44, 4.91s/it]
54
  87%|████████▋ | 54/62 [02:47<00:37, 4.66s/it]
55
  89%|████████▊ | 55/62 [02:55<00:40, 5.74s/it]
56
  90%|█████████ | 56/62 [02:56<00:24, 4.14s/it]
57
  92%|█████████▏| 57/62 [02:57<00:16, 3.24s/it]
58
  94%|█████████▎| 58/62 [02:58<00:09, 2.49s/it]
59
  95%|█████████▌| 59/62 [03:04<00:10, 3.58s/it]
60
  97%|█████████▋| 60/62 [03:08<00:07, 3.74s/it]
61
  98%|█████████▊| 61/62 [03:09<00:03, 3.11s/it]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino
2
+ WARNING:nncf:NNCF provides best results with torch==2.2.*, while current torch version is 2.1.0. If you encounter issues, consider switching to torch==2.2.*
3
+ Args(model_id='meta-llama/Llama-2-7b-hf', torch_dtype='float16', device='cuda', compress_weights_mode='int8_asym', up=0.3, gate=0.3, down=0.5, batch_size=4, num_calibration_samples=64, eval_task='wikitext', eval_limit=None, save_folder='./models/Llama-2-7b-hf/int8_asym_up30+down50/')
4
+
5
+ INFO:nncf:Statistics of the bitwidth distribution:
6
+ ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
7
+ │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │
8
+ ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
9
+ │ 8 │ 100% (226 / 226) │ 100% (226 / 226) │
10
+ ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙
11
+ Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 226/226 • 0:00:00 • 0:00:00
12
+ target_sparsity_by_scope: {'{re}.*up_proj.*': 0.3, '{re}.*gate_proj.*': 0.3, '{re}.*down_proj.*': 0.5}
13
+ Activations Sparsifier Calibration ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 16/16 • 0:04:47 • 0:00:00
14
+ LlamaForCausalLM(
15
+ (model): LlamaModel(
16
+ (embed_tokens): Embedding(32000, 4096)
17
+ (layers): ModuleList(
18
+ (0-31): 32 x LlamaDecoderLayer(
19
+ (self_attn): LlamaAttention(
20
+ (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
21
+ (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
22
+ (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
23
+ (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
24
+ (rotary_emb): LlamaRotaryEmbedding()
25
+ )
26
+ (mlp): LlamaMLP(
27
+ (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
28
+ (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
29
+ (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
30
+ (act_fn): SiLU()
31
+ )
32
+ (input_layernorm): LlamaRMSNorm()
33
+ (post_attention_layernorm): LlamaRMSNorm()
34
+ )
35
+ )
36
+ (norm): LlamaRMSNorm()
37
+ )
38
+ (lm_head): Linear(in_features=4096, out_features=32000, bias=False)
39
+ (_nncf): NNCFNetworkInterface(
40
+ (external_op): ModuleDict(
41
+ (weights_decompressor_model_embed_tokens_weight): WeightsDecompressor()
42
+ (weights_decompressor_model_layers_0_self_attn_q_proj_weight): WeightsDecompressor()
43
+ (weights_decompressor_model_layers_0_self_attn_k_proj_weight): WeightsDecompressor()
44
+ (weights_decompressor_model_layers_0_self_attn_v_proj_weight): WeightsDecompressor()
45
+ (weights_decompressor_model_layers_0_self_attn_o_proj_weight): WeightsDecompressor()
46
+ (weights_decompressor_model_layers_0_mlp_gate_proj_weight): WeightsDecompressor()
47
+ (weights_decompressor_model_layers_0_mlp_up_proj_weight): WeightsDecompressor()
48
+ (weights_decompressor_model_layers_0_mlp_down_proj_weight): WeightsDecompressor()
49
+ (weights_decompressor_model_layers_1_self_attn_q_proj_weight): WeightsDecompressor()
50
+ (weights_decompressor_model_layers_1_self_attn_k_proj_weight): WeightsDecompressor()
51
+ (weights_decompressor_model_layers_1_self_attn_v_proj_weight): WeightsDecompressor()
52
+ (weights_decompressor_model_layers_1_self_attn_o_proj_weight): WeightsDecompressor()
53
+ (weights_decompressor_model_layers_1_mlp_gate_proj_weight): WeightsDecompressor()
54
+ (weights_decompressor_model_layers_1_mlp_up_proj_weight): WeightsDecompressor()
55
+ (weights_decompressor_model_layers_1_mlp_down_proj_weight): WeightsDecompressor()
56
+ (weights_decompressor_model_layers_2_self_attn_q_proj_weight): WeightsDecompressor()
57
+ (weights_decompressor_model_layers_2_self_attn_k_proj_weight): WeightsDecompressor()
58
+ (weights_decompressor_model_layers_2_self_attn_v_proj_weight): WeightsDecompressor()
59
+ (weights_decompressor_model_layers_2_self_attn_o_proj_weight): WeightsDecompressor()
60
+ (weights_decompressor_model_layers_2_mlp_gate_proj_weight): WeightsDecompressor()
61
+ (weights_decompressor_model_layers_2_mlp_up_proj_weight): WeightsDecompressor()
62
+ (weights_decompressor_model_layers_2_mlp_down_proj_weight): WeightsDecompressor()
63
+ (weights_decompressor_model_layers_3_self_attn_q_proj_weight): WeightsDecompressor()
64
+ (weights_decompressor_model_layers_3_self_attn_k_proj_weight): WeightsDecompressor()
65
+ (weights_decompressor_model_layers_3_self_attn_v_proj_weight): WeightsDecompressor()
66
+ (weights_decompressor_model_layers_3_self_attn_o_proj_weight): WeightsDecompressor()
67
+ (weights_decompressor_model_layers_3_mlp_gate_proj_weight): WeightsDecompressor()
68
+ (weights_decompressor_model_layers_3_mlp_up_proj_weight): WeightsDecompressor()
69
+ (weights_decompressor_model_layers_3_mlp_down_proj_weight): WeightsDecompressor()
70
+ (weights_decompressor_model_layers_4_self_attn_q_proj_weight): WeightsDecompressor()
71
+ (weights_decompressor_model_layers_4_self_attn_k_proj_weight): WeightsDecompressor()
72
+ (weights_decompressor_model_layers_4_self_attn_v_proj_weight): WeightsDecompressor()
73
+ (weights_decompressor_model_layers_4_self_attn_o_proj_weight): WeightsDecompressor()
74
+ (weights_decompressor_model_layers_4_mlp_gate_proj_weight): WeightsDecompressor()
75
+ (weights_decompressor_model_layers_4_mlp_up_proj_weight): WeightsDecompressor()
76
+ (weights_decompressor_model_layers_4_mlp_down_proj_weight): WeightsDecompressor()
77
+ (weights_decompressor_model_layers_5_self_attn_q_proj_weight): WeightsDecompressor()
78
+ (weights_decompressor_model_layers_5_self_attn_k_proj_weight): WeightsDecompressor()
79
+ (weights_decompressor_model_layers_5_self_attn_v_proj_weight): WeightsDecompressor()
80
+ (weights_decompressor_model_layers_5_self_attn_o_proj_weight): WeightsDecompressor()
81
+ (weights_decompressor_model_layers_5_mlp_gate_proj_weight): WeightsDecompressor()
82
+ (weights_decompressor_model_layers_5_mlp_up_proj_weight): WeightsDecompressor()
83
+ (weights_decompressor_model_layers_5_mlp_down_proj_weight): WeightsDecompressor()
84
+ (weights_decompressor_model_layers_6_self_attn_q_proj_weight): WeightsDecompressor()
85
+ (weights_decompressor_model_layers_6_self_attn_k_proj_weight): WeightsDecompressor()
86
+ (weights_decompressor_model_layers_6_self_attn_v_proj_weight): WeightsDecompressor()
87
+ (weights_decompressor_model_layers_6_self_attn_o_proj_weight): WeightsDecompressor()
88
+ (weights_decompressor_model_layers_6_mlp_gate_proj_weight): WeightsDecompressor()
89
+ (weights_decompressor_model_layers_6_mlp_up_proj_weight): WeightsDecompressor()
90
+ (weights_decompressor_model_layers_6_mlp_down_proj_weight): WeightsDecompressor()
91
+ (weights_decompressor_model_layers_7_self_attn_q_proj_weight): WeightsDecompressor()
92
+ (weights_decompressor_model_layers_7_self_attn_k_proj_weight): WeightsDecompressor()
93
+ (weights_decompressor_model_layers_7_self_attn_v_proj_weight): WeightsDecompressor()
94
+ (weights_decompressor_model_layers_7_self_attn_o_proj_weight): WeightsDecompressor()
95
+ (weights_decompressor_model_layers_7_mlp_gate_proj_weight): WeightsDecompressor()
96
+ (weights_decompressor_model_layers_7_mlp_up_proj_weight): WeightsDecompressor()
97
+ (weights_decompressor_model_layers_7_mlp_down_proj_weight): WeightsDecompressor()
98
+ (weights_decompressor_model_layers_8_self_attn_q_proj_weight): WeightsDecompressor()
99
+ (weights_decompressor_model_layers_8_self_attn_k_proj_weight): WeightsDecompressor()
100
+ (weights_decompressor_model_layers_8_self_attn_v_proj_weight): WeightsDecompressor()
101
+ (weights_decompressor_model_layers_8_self_attn_o_proj_weight): WeightsDecompressor()
102
+ (weights_decompressor_model_layers_8_mlp_gate_proj_weight): WeightsDecompressor()
103
+ (weights_decompressor_model_layers_8_mlp_up_proj_weight): WeightsDecompressor()
104
+ (weights_decompressor_model_layers_8_mlp_down_proj_weight): WeightsDecompressor()
105
+ (weights_decompressor_model_layers_9_self_attn_q_proj_weight): WeightsDecompressor()
106
+ (weights_decompressor_model_layers_9_self_attn_k_proj_weight): WeightsDecompressor()
107
+ (weights_decompressor_model_layers_9_self_attn_v_proj_weight): WeightsDecompressor()
108
+ (weights_decompressor_model_layers_9_self_attn_o_proj_weight): WeightsDecompressor()
109
+ (weights_decompressor_model_layers_9_mlp_gate_proj_weight): WeightsDecompressor()
110
+ (weights_decompressor_model_layers_9_mlp_up_proj_weight): WeightsDecompressor()
111
+ (weights_decompressor_model_layers_9_mlp_down_proj_weight): WeightsDecompressor()
112
+ (weights_decompressor_model_layers_10_self_attn_q_proj_weight): WeightsDecompressor()
113
+ (weights_decompressor_model_layers_10_self_attn_k_proj_weight): WeightsDecompressor()
114
+ (weights_decompressor_model_layers_10_self_attn_v_proj_weight): WeightsDecompressor()
115
+ (weights_decompressor_model_layers_10_self_attn_o_proj_weight): WeightsDecompressor()
116
+ (weights_decompressor_model_layers_10_mlp_gate_proj_weight): WeightsDecompressor()
117
+ (weights_decompressor_model_layers_10_mlp_up_proj_weight): WeightsDecompressor()
118
+ (weights_decompressor_model_layers_10_mlp_down_proj_weight): WeightsDecompressor()
119
+ (weights_decompressor_model_layers_11_self_attn_q_proj_weight): WeightsDecompressor()
120
+ (weights_decompressor_model_layers_11_self_attn_k_proj_weight): WeightsDecompressor()
121
+ (weights_decompressor_model_layers_11_self_attn_v_proj_weight): WeightsDecompressor()
122
+ (weights_decompressor_model_layers_11_self_attn_o_proj_weight): WeightsDecompressor()
123
+ (weights_decompressor_model_layers_11_mlp_gate_proj_weight): WeightsDecompressor()
124
+ (weights_decompressor_model_layers_11_mlp_up_proj_weight): WeightsDecompressor()
125
+ (weights_decompressor_model_layers_11_mlp_down_proj_weight): WeightsDecompressor()
126
+ (weights_decompressor_model_layers_12_self_attn_q_proj_weight): WeightsDecompressor()
127
+ (weights_decompressor_model_layers_12_self_attn_k_proj_weight): WeightsDecompressor()
128
+ (weights_decompressor_model_layers_12_self_attn_v_proj_weight): WeightsDecompressor()
129
+ (weights_decompressor_model_layers_12_self_attn_o_proj_weight): WeightsDecompressor()
130
+ (weights_decompressor_model_layers_12_mlp_gate_proj_weight): WeightsDecompressor()
131
+ (weights_decompressor_model_layers_12_mlp_up_proj_weight): WeightsDecompressor()
132
+ (weights_decompressor_model_layers_12_mlp_down_proj_weight): WeightsDecompressor()
133
+ (weights_decompressor_model_layers_13_self_attn_q_proj_weight): WeightsDecompressor()
134
+ (weights_decompressor_model_layers_13_self_attn_k_proj_weight): WeightsDecompressor()
135
+ (weights_decompressor_model_layers_13_self_attn_v_proj_weight): WeightsDecompressor()
136
+ (weights_decompressor_model_layers_13_self_attn_o_proj_weight): WeightsDecompressor()
137
+ (weights_decompressor_model_layers_13_mlp_gate_proj_weight): WeightsDecompressor()
138
+ (weights_decompressor_model_layers_13_mlp_up_proj_weight): WeightsDecompressor()
139
+ (weights_decompressor_model_layers_13_mlp_down_proj_weight): WeightsDecompressor()
140
+ (weights_decompressor_model_layers_14_self_attn_q_proj_weight): WeightsDecompressor()
141
+ (weights_decompressor_model_layers_14_self_attn_k_proj_weight): WeightsDecompressor()
142
+ (weights_decompressor_model_layers_14_self_attn_v_proj_weight): WeightsDecompressor()
143
+ (weights_decompressor_model_layers_14_self_attn_o_proj_weight): WeightsDecompressor()
144
+ (weights_decompressor_model_layers_14_mlp_gate_proj_weight): WeightsDecompressor()
145
+ (weights_decompressor_model_layers_14_mlp_up_proj_weight): WeightsDecompressor()
146
+ (weights_decompressor_model_layers_14_mlp_down_proj_weight): WeightsDecompressor()
147
+ (weights_decompressor_model_layers_15_self_attn_q_proj_weight): WeightsDecompressor()
148
+ (weights_decompressor_model_layers_15_self_attn_k_proj_weight): WeightsDecompressor()
149
+ (weights_decompressor_model_layers_15_self_attn_v_proj_weight): WeightsDecompressor()
150
+ (weights_decompressor_model_layers_15_self_attn_o_proj_weight): WeightsDecompressor()
151
+ (weights_decompressor_model_layers_15_mlp_gate_proj_weight): WeightsDecompressor()
152
+ (weights_decompressor_model_layers_15_mlp_up_proj_weight): WeightsDecompressor()
153
+ (weights_decompressor_model_layers_15_mlp_down_proj_weight): WeightsDecompressor()
154
+ (weights_decompressor_model_layers_16_self_attn_q_proj_weight): WeightsDecompressor()
155
+ (weights_decompressor_model_layers_16_self_attn_k_proj_weight): WeightsDecompressor()
156
+ (weights_decompressor_model_layers_16_self_attn_v_proj_weight): WeightsDecompressor()
157
+ (weights_decompressor_model_layers_16_self_attn_o_proj_weight): WeightsDecompressor()
158
+ (weights_decompressor_model_layers_16_mlp_gate_proj_weight): WeightsDecompressor()
159
+ (weights_decompressor_model_layers_16_mlp_up_proj_weight): WeightsDecompressor()
160
+ (weights_decompressor_model_layers_16_mlp_down_proj_weight): WeightsDecompressor()
161
+ (weights_decompressor_model_layers_17_self_attn_q_proj_weight): WeightsDecompressor()
162
+ (weights_decompressor_model_layers_17_self_attn_k_proj_weight): WeightsDecompressor()
163
+ (weights_decompressor_model_layers_17_self_attn_v_proj_weight): WeightsDecompressor()
164
+ (weights_decompressor_model_layers_17_self_attn_o_proj_weight): WeightsDecompressor()
165
+ (weights_decompressor_model_layers_17_mlp_gate_proj_weight): WeightsDecompressor()
166
+ (weights_decompressor_model_layers_17_mlp_up_proj_weight): WeightsDecompressor()
167
+ (weights_decompressor_model_layers_17_mlp_down_proj_weight): WeightsDecompressor()
168
+ (weights_decompressor_model_layers_18_self_attn_q_proj_weight): WeightsDecompressor()
169
+ (weights_decompressor_model_layers_18_self_attn_k_proj_weight): WeightsDecompressor()
170
+ (weights_decompressor_model_layers_18_self_attn_v_proj_weight): WeightsDecompressor()
171
+ (weights_decompressor_model_layers_18_self_attn_o_proj_weight): WeightsDecompressor()
172
+ (weights_decompressor_model_layers_18_mlp_gate_proj_weight): WeightsDecompressor()
173
+ (weights_decompressor_model_layers_18_mlp_up_proj_weight): WeightsDecompressor()
174
+ (weights_decompressor_model_layers_18_mlp_down_proj_weight): WeightsDecompressor()
175
+ (weights_decompressor_model_layers_19_self_attn_q_proj_weight): WeightsDecompressor()
176
+ (weights_decompressor_model_layers_19_self_attn_k_proj_weight): WeightsDecompressor()
177
+ (weights_decompressor_model_layers_19_self_attn_v_proj_weight): WeightsDecompressor()
178
+ (weights_decompressor_model_layers_19_self_attn_o_proj_weight): WeightsDecompressor()
179
+ (weights_decompressor_model_layers_19_mlp_gate_proj_weight): WeightsDecompressor()
180
+ (weights_decompressor_model_layers_19_mlp_up_proj_weight): WeightsDecompressor()
181
+ (weights_decompressor_model_layers_19_mlp_down_proj_weight): WeightsDecompressor()
182
+ (weights_decompressor_model_layers_20_self_attn_q_proj_weight): WeightsDecompressor()
183
+ (weights_decompressor_model_layers_20_self_attn_k_proj_weight): WeightsDecompressor()
184
+ (weights_decompressor_model_layers_20_self_attn_v_proj_weight): WeightsDecompressor()
185
+ (weights_decompressor_model_layers_20_self_attn_o_proj_weight): WeightsDecompressor()
186
+ (weights_decompressor_model_layers_20_mlp_gate_proj_weight): WeightsDecompressor()
187
+ (weights_decompressor_model_layers_20_mlp_up_proj_weight): WeightsDecompressor()
188
+ (weights_decompressor_model_layers_20_mlp_down_proj_weight): WeightsDecompressor()
189
+ (weights_decompressor_model_layers_21_self_attn_q_proj_weight): WeightsDecompressor()
190
+ (weights_decompressor_model_layers_21_self_attn_k_proj_weight): WeightsDecompressor()
191
+ (weights_decompressor_model_layers_21_self_attn_v_proj_weight): WeightsDecompressor()
192
+ (weights_decompressor_model_layers_21_self_attn_o_proj_weight): WeightsDecompressor()
193
+ (weights_decompressor_model_layers_21_mlp_gate_proj_weight): WeightsDecompressor()
194
+ (weights_decompressor_model_layers_21_mlp_up_proj_weight): WeightsDecompressor()
195
+ (weights_decompressor_model_layers_21_mlp_down_proj_weight): WeightsDecompressor()
196
+ (weights_decompressor_model_layers_22_self_attn_q_proj_weight): WeightsDecompressor()
197
+ (weights_decompressor_model_layers_22_self_attn_k_proj_weight): WeightsDecompressor()
198
+ (weights_decompressor_model_layers_22_self_attn_v_proj_weight): WeightsDecompressor()
199
+ (weights_decompressor_model_layers_22_self_attn_o_proj_weight): WeightsDecompressor()
200
+ (weights_decompressor_model_layers_22_mlp_gate_proj_weight): WeightsDecompressor()
201
+ (weights_decompressor_model_layers_22_mlp_up_proj_weight): WeightsDecompressor()
202
+ (weights_decompressor_model_layers_22_mlp_down_proj_weight): WeightsDecompressor()
203
+ (weights_decompressor_model_layers_23_self_attn_q_proj_weight): WeightsDecompressor()
204
+ (weights_decompressor_model_layers_23_self_attn_k_proj_weight): WeightsDecompressor()
205
+ (weights_decompressor_model_layers_23_self_attn_v_proj_weight): WeightsDecompressor()
206
+ (weights_decompressor_model_layers_23_self_attn_o_proj_weight): WeightsDecompressor()
207
+ (weights_decompressor_model_layers_23_mlp_gate_proj_weight): WeightsDecompressor()
208
+ (weights_decompressor_model_layers_23_mlp_up_proj_weight): WeightsDecompressor()
209
+ (weights_decompressor_model_layers_23_mlp_down_proj_weight): WeightsDecompressor()
210
+ (weights_decompressor_model_layers_24_self_attn_q_proj_weight): WeightsDecompressor()
211
+ (weights_decompressor_model_layers_24_self_attn_k_proj_weight): WeightsDecompressor()
212
+ (weights_decompressor_model_layers_24_self_attn_v_proj_weight): WeightsDecompressor()
213
+ (weights_decompressor_model_layers_24_self_attn_o_proj_weight): WeightsDecompressor()
214
+ (weights_decompressor_model_layers_24_mlp_gate_proj_weight): WeightsDecompressor()
215
+ (weights_decompressor_model_layers_24_mlp_up_proj_weight): WeightsDecompressor()
216
+ (weights_decompressor_model_layers_24_mlp_down_proj_weight): WeightsDecompressor()
217
+ (weights_decompressor_model_layers_25_self_attn_q_proj_weight): WeightsDecompressor()
218
+ (weights_decompressor_model_layers_25_self_attn_k_proj_weight): WeightsDecompressor()
219
+ (weights_decompressor_model_layers_25_self_attn_v_proj_weight): WeightsDecompressor()
220
+ (weights_decompressor_model_layers_25_self_attn_o_proj_weight): WeightsDecompressor()
221
+ (weights_decompressor_model_layers_25_mlp_gate_proj_weight): WeightsDecompressor()
222
+ (weights_decompressor_model_layers_25_mlp_up_proj_weight): WeightsDecompressor()
223
+ (weights_decompressor_model_layers_25_mlp_down_proj_weight): WeightsDecompressor()
224
+ (weights_decompressor_model_layers_26_self_attn_q_proj_weight): WeightsDecompressor()
225
+ (weights_decompressor_model_layers_26_self_attn_k_proj_weight): WeightsDecompressor()
226
+ (weights_decompressor_model_layers_26_self_attn_v_proj_weight): WeightsDecompressor()
227
+ (weights_decompressor_model_layers_26_self_attn_o_proj_weight): WeightsDecompressor()
228
+ (weights_decompressor_model_layers_26_mlp_gate_proj_weight): WeightsDecompressor()
229
+ (weights_decompressor_model_layers_26_mlp_up_proj_weight): WeightsDecompressor()
230
+ (weights_decompressor_model_layers_26_mlp_down_proj_weight): WeightsDecompressor()
231
+ (weights_decompressor_model_layers_27_self_attn_q_proj_weight): WeightsDecompressor()
232
+ (weights_decompressor_model_layers_27_self_attn_k_proj_weight): WeightsDecompressor()
233
+ (weights_decompressor_model_layers_27_self_attn_v_proj_weight): WeightsDecompressor()
234
+ (weights_decompressor_model_layers_27_self_attn_o_proj_weight): WeightsDecompressor()
235
+ (weights_decompressor_model_layers_27_mlp_gate_proj_weight): WeightsDecompressor()
236
+ (weights_decompressor_model_layers_27_mlp_up_proj_weight): WeightsDecompressor()
237
+ (weights_decompressor_model_layers_27_mlp_down_proj_weight): WeightsDecompressor()
238
+ (weights_decompressor_model_layers_28_self_attn_q_proj_weight): WeightsDecompressor()
239
+ (weights_decompressor_model_layers_28_self_attn_k_proj_weight): WeightsDecompressor()
240
+ (weights_decompressor_model_layers_28_self_attn_v_proj_weight): WeightsDecompressor()
241
+ (weights_decompressor_model_layers_28_self_attn_o_proj_weight): WeightsDecompressor()
242
+ (weights_decompressor_model_layers_28_mlp_gate_proj_weight): WeightsDecompressor()
243
+ (weights_decompressor_model_layers_28_mlp_up_proj_weight): WeightsDecompressor()
244
+ (weights_decompressor_model_layers_28_mlp_down_proj_weight): WeightsDecompressor()
245
+ (weights_decompressor_model_layers_29_self_attn_q_proj_weight): WeightsDecompressor()
246
+ (weights_decompressor_model_layers_29_self_attn_k_proj_weight): WeightsDecompressor()
247
+ (weights_decompressor_model_layers_29_self_attn_v_proj_weight): WeightsDecompressor()
248
+ (weights_decompressor_model_layers_29_self_attn_o_proj_weight): WeightsDecompressor()
249
+ (weights_decompressor_model_layers_29_mlp_gate_proj_weight): WeightsDecompressor()
250
+ (weights_decompressor_model_layers_29_mlp_up_proj_weight): WeightsDecompressor()
251
+ (weights_decompressor_model_layers_29_mlp_down_proj_weight): WeightsDecompressor()
252
+ (weights_decompressor_model_layers_30_self_attn_q_proj_weight): WeightsDecompressor()
253
+ (weights_decompressor_model_layers_30_self_attn_k_proj_weight): WeightsDecompressor()
254
+ (weights_decompressor_model_layers_30_self_attn_v_proj_weight): WeightsDecompressor()
255
+ (weights_decompressor_model_layers_30_self_attn_o_proj_weight): WeightsDecompressor()
256
+ (weights_decompressor_model_layers_30_mlp_gate_proj_weight): WeightsDecompressor()
257
+ (weights_decompressor_model_layers_30_mlp_up_proj_weight): WeightsDecompressor()
258
+ (weights_decompressor_model_layers_30_mlp_down_proj_weight): WeightsDecompressor()
259
+ (weights_decompressor_model_layers_31_self_attn_q_proj_weight): WeightsDecompressor()
260
+ (weights_decompressor_model_layers_31_self_attn_k_proj_weight): WeightsDecompressor()
261
+ (weights_decompressor_model_layers_31_self_attn_v_proj_weight): WeightsDecompressor()
262
+ (weights_decompressor_model_layers_31_self_attn_o_proj_weight): WeightsDecompressor()
263
+ (weights_decompressor_model_layers_31_mlp_gate_proj_weight): WeightsDecompressor()
264
+ (weights_decompressor_model_layers_31_mlp_up_proj_weight): WeightsDecompressor()
265
+ (weights_decompressor_model_layers_31_mlp_down_proj_weight): WeightsDecompressor()
266
+ (weights_decompressor_lm_head_weight): WeightsDecompressor()
267
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
268
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
269
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
270
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
271
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
272
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
273
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
274
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
275
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
276
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
277
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
278
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
279
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
280
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
281
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
282
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
283
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
284
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
285
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
286
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
287
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
288
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
289
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
290
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
291
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
292
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
293
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
294
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
295
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
296
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
297
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
298
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
299
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
300
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
301
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
302
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
303
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
304
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
305
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
306
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
307
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
308
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
309
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
310
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
311
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
312
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
313
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
314
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
315
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
316
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
317
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
318
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
319
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
320
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
321
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
322
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
323
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
324
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
325
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
326
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
327
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
328
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
329
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
330
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
331
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
332
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
333
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[22]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
334
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[22]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
335
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[22]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
336
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[23]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
337
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[23]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
338
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[23]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
339
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[24]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
340
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[24]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
341
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[24]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
342
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[25]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
343
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[25]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
344
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[25]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
345
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[26]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
346
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[26]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
347
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[26]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
348
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[27]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
349
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[27]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
350
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[27]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
351
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[28]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
352
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[28]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
353
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[28]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
354
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[29]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
355
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[29]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
356
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[29]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
357
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[30]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
358
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[30]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
359
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[30]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
360
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[31]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
361
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[31]/LlamaMLP[mlp]/Linear[up_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.3)
362
+ (activations_sparsifier_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[31]/LlamaMLP[mlp]/Linear[down_proj]/linear_0): ActivationsSparsifier(target_sparsity=0.5)
363
+ )
364
+ )
365
+ )
366
+ Manually setting max_length=4096 for meta-llama/Llama-2-7b-hf to avoid potential OOM.
367
+ Repo card metadata block was not found. Setting CardData to empty.
368
+ Task: wikitext; number of docs: 62
369
+ Task: wikitext; document 0; context prompt (starting on next line):
370
+
371
+ (end of prompt on previous line)
372
+ Requests: Req_loglikelihood_rolling(' = Commonwealth War Graves Commission =\n\nThe Commonwealth War Graves Commission (CWGC) is an intergovernmental organisation of six independent member states whose principal function is to mark, record and maintain the graves and places of commemoration of Commonwealth of Nations military service members who died in the two World Wars. The Commission is also responsible for commemorating Commonwealth civilians who died as a result of enemy action during World War II. The Commission was founded by Fabian Ware and constituted through Royal Charter in 1917 named the Imperial War Graves Commission. The change to the present name took place in 1960.\nThe Commission, as part of its mandate, is responsible for commemorating all Commonwealth war dead individually and equally. To this end, the war dead are commemorated by name on a headstone, at an identified site of a burial, or on a memorial. War dead are commemorated uniformly and equally, irrespective of military or civil rank, race or creed.\nThe Commission is currently responsible for the continued commemoration of 1.7 million deceased Commonwealth military service members in 153 countries. Since its inception, the Commission has constructed approximately 2,500 war cemeteries and numerous memorials. The Commission is currently responsible for the care of war dead at over 23,000 separate burial sites and the maintenance of more than 200 memorials worldwide. In addition to commemorating Commonwealth military service members, the Commission maintains, under arrangement with applicable governments, over 40,000 non-Commonwealth war graves and over 25,000 non-war military and civilian graves. The Commission operates through the continued financial support of the member states: United Kingdom, Canada, Australia, New Zealand, India and South Africa. The current President of the Commonwealth War Graves Commission is Prince Edward, Duke of Kent.\n\n== History ==\n\n\n=== World War I ===\n\nOn the outbreak of World War I in 1914, Fabian Ware, a director of the Rio Tinto Company, found that at 45 years old he was too old to join the British Army. He used the influence of Rio Tinto chairman, Viscount Milner, to become the commander of a mobile unit of the British Red Cross. He arrived in France in September 1914 and whilst there was struck by the lack of any official mechanism for documenting or marking the location of graves of those who had been killed and felt compelled to create an organisation within the Red Cross for this purpose. In March 1915, with the support of Nevil Macready, Adjutant-General of the British Expeditionary Force, Ware\'s work was given official recognition and support by the Imperial War Office and the unit was transferred to the British Army as the Graves Registration Commission. The new Graves Registration Commission had over 31,000 graves of British and Imperial soldiers registered by October 1915 and 50,000 registered by May 1916.\nWhen municipal graveyards began to overfill Ware began negotiations with various local authorities to acquire land for further cemeteries. Ware began with an agreement with France to build joint British and French cemeteries under the understanding that these would be maintained by the French government. Ware eventually concluded that it was not prudent to leave the maintenance responsibilities solely to the French government and subsequently arranged for France to purchase the land, grant it in perpetuity, and leave the management and maintenance responsibilities to the British. The French government agreed under the condition that cemeteries respected certain dimensions, were accessible by public road, were in the vicinity of medical aid stations and were not too close to towns or villages. Similar negotiations were started with the Belgian government.\nAs reports of the grave registration work became public, the Commission began to receive letters of enquiry and requests for photographs of graves from relatives of deceased soldiers. By 1917, 17,000 photographs had been dispatched to relatives. In March 1915, the Commission, with the support of the Red Cross, began to dispatch photographic prints and cemetery location information in answer to the requests. The Graves Registration Commission became the Directorate of Graves Registration and Enquiries in the spring of 1916 in recognition of the fact that the scope of work began to extend beyond simple grave registration and began to include responding to enquiries from relatives of those killed. The directorate\'s work was also extended beyond the Western Front and into other theatres of war, with units deployed in Greece, Egypt and Mesopotamia.\n\n=== Formal establishment ===\n\nAs the war continued, Ware and others became concerned about the fate of the graves in the post-war period. Following a suggestion by the British Army, the National Committee for the Care of Soldiers\'Graves was appointed by the British government in January 1916, with Edward, Prince of Wales agreeing to serve as president. The National Committee for the Care of Soldiers\' Graves was created with the intention of taking over the work of the Directorate of Graves Registration and Enquiries after the war. The government felt that it was more appropriate to entrust the work to a specially appointed body rather than to any existing government department. By early 1917 a number of members of the committee believed a formal imperial organisation would be needed to care for the graves. With the help of Edward, Prince of Wales, Ware submitted a memorandum to the Imperial War Conference in 1917 suggesting that an imperial organisation be constituted. The suggestion was accepted and on 21 May 1917 the Imperial War Graves Commission was established by Royal Charter, with the Prince of Wales serving as president, Secretary of State for War Lord Derby as chairman and Ware as vice-chairman. The Commission\'s undertakings began in earnest at the end of the First World War. Once land for cemeteries and memorials had been guaranteed, the enormous task of recording the details of the dead could begin. By 1918, some 587,000 graves had been identified and a further 559,000 casualties were registered as having no known grave.\nThe scale, and associated high number of casualties, of the war produced an entirely new attitude towards the commemoration of war dead. Previous to World War I, individual commemoration of war dead was often on an ad hoc basis and was almost exclusively limited to commissioned officers. However, the war required mobilisation of a significant percentage of the population, either as volunteers or through conscription. An expectation had consequently arisen that individual soldiers would expect to be commemorated, even if they were low-ranking members of the military. A committee under Frederic Kenyon, Director of the British Museum, presented a report to the Commission in November 1918 detailing how it envisioned the development of the cemeteries. Two key elements of this report were that bodies should not be repatriated and that uniform memorials should be used to avoid class distinctions. Beyond the logistical nightmare of returning home so many corpses, it was felt that repatriation would conflict with the feeling of brotherhood that had developed between serving ranks.\nAn article in The Times on 17 February 1919 by Rudyard Kipling carried the Commission\'s proposal to a wider audience and described what the graves would look like. The article entitled War Graves: Work of Imperial Commission: Mr. Kipling\'s Survey was quickly republished as an illustrated booklet, Graves of the Fallen. The illustrated booklet was intended to soften the impact of Kenyon\'s report as it included illustrations of cemeteries with mature trees and shrubs; contrasting the bleak landscapes depicted in published battlefield photos. There was an immediate public outcry following the publication of the reports, particularly with regards to the decision to not repatriate the bodies of the dead. The reports generated considerable discussion in the press which ultimately led to a heated debate in Parliament on 4 May 1920. Sir James Remnant started the debate, followed by speeches by William Burdett-Coutts in favour of the Commission\'s principles and Robert Cecil speaking for those desiring repatriation and opposing uniformity of grave markers. Winston Churchill closed the debate and asked that the issue not proceed to a vote. Remnant withdrew his motion, allowing the Commission to carry out its work assured of support for its principles.\n\n=== First cemeteries and memorials to the missing ===\n\nThree of the most eminent architects of their day, Sir Herbert Baker, Sir Reginald Blomfield, and Sir Edwin Lutyens were commissioned to design the cemeteries and memorials. Rudyard Kipling was appointed literary advisor for the language used for memorial inscriptions.\nIn 1920, the Commission built three experimental cemeteries at Le Treport, Forceville and Louvencourt, following the principles outlined in the Kenyon report. Of these, the Forceville Communal Cemetery and Extension was agreed to be the most successful. Having consulted with garden designer Gertrude Jekyll, the architects created a walled cemetery with uniform headstones in a garden setting, augmented by Blomfield\'s Cross of Sacrifice and Lutyens\'Stone of Remembrance. After some adjustments, Forceville became the template for the Commission\'s building programme. Adjustments were required because all three experimental cemeteries went over budget. To ensure future cemeteries remained within their budget the Commission decided to not build shelters in cemeteries that contained less than 200 graves, to not place a Stone of Remembrance in any cemetery with less than 400 graves, and to limit the height of cemetery walls to 1 metre (3.3 ft).\nAt the end of 1919, the Commission had spent £ 7,500, and this figure rose to £ 250,000 in 1920 as construction of cemeteries and memorials increased. By 1921, the Commission had established 1,000 cemeteries which were ready for headstone erections, and burials. Between 1920 and 1923, the Commission was shipping 4,000 headstones a week to France. In many cases small cemeteries were closed and the graves concentrated in larger ones. By 1927, when the majority of construction had been completed, over 500 cemeteries had been built, with 400,000 headstones, a thousand Crosses of Sacrifice, and 400 Stones of Remembrance.\nThe Commission had also been mandated to individually commemorate each soldier who had no known grave, which amounted to 315,000 in France and Belgium alone. The Commission initially decided to build 12 monuments on which to commemorate the missing; each memorial being located at the site of an important battle along the Western Front. After resistance from the French committee responsible for the approvals of memorials on French territory, the Commission revised their plan and reduced the number of memorials, and in some cases built memorials to the missing in existing cemeteries rather than as separate structures.\nReginald Blomfield\'s Menin Gate was the first memorial to the missing located in Europe to be completed, and was unveiled on 24 July 1927. The Menin Gate (Menenpoort) was found to have insufficient space to contain all the names as originally planned and 34,984 names of the missing were instead inscribed on Herbert Baker\'s Tyne Cot Memorial to the Missing. Other memorials followed: the Helles Memorial in Gallipoli designed by John James Burnet; the Thiepval Memorial on the Somme and the Arras Memorial designed by Edwin Lutyens; and the Basra Memorial in Iraq designed by Edward Prioleau Warren. The Dominions and India also erected memorials on which they commemorated their missing: the Neuve-Chapelle Memorial for the forces of India, the Vimy Memorial by Canada, the Villers-Bretonneux Memorial by Australia, the Delville Wood Memorial by South Africa and the Beaumont-Hamel Memorial by Newfoundland. The programme of commemorating the dead of the Great War was considered essentially complete with the inauguration of the Thiepval Memorial in 1932, though the Vimy Memorial would not be finished until 1936, the Villers-Bretonneux Memorial until 1938 and stonemasons were still conducting work on the Menin Gate when Germany invaded Belgium in 1940.\nThe only memorial created by the Commission that was not in the form of a monument or cemetery was the Opththalmic Institute at Giza, Egypt — complete with library, and bacteriology and pathology departments — as its memorial to men of the Egyptian Labour Corps and Camel Transport Corps. Its erection was agreed with local political pressure.\n\n=== World War II ===\n\nFrom the start of the Second World War in 1939, the Commission organised grave registration units and, planning ahead based on the experience gained from the First World War, earmarked land for use as cemeteries. When the war began turning in favour of the Allies, the Commission was able to begin restoring its First World War cemeteries and memorials. It also began the task of commemorating the 600,000 Commonwealth casualties from the Second World War. In 1949, the Commission completed Dieppe Canadian War Cemetery, the first of 559 new cemeteries and 36 new memorials. Eventually, over 350,000 new headstones were erected. Many were made from Hopton Wood stone. The wider scale of World War II, coupled with manpower shortages and unrest in some countries, meant that the construction and restoration programmes took much longer. Following the war, the Commission implemented a five-year horticultural renovation programme. The horticultural neglect was largely addressed by 1950 but there were necessary structural repairs to be made. These, together with the backlog of maintenance tasks from before the war, took a further 10 years to complete and the programme was not completed until the 1960s.\nWith the increased number of civilian casualties compared with the World War I, Winston Churchill agreed to Ware\'s proposal that the Commission also maintain a record of Commonwealth civilian war deaths. A supplemental chapter was added to the Imperial War Graves Commission\'s charter on 7 February 1941, empowering the organisation to collect and record the names of civilians who died from enemy action during the Second World War, which resulted in the creation of the Civilian War Dead Roll of Honour. The roll eventually contained the names of nearly 67,000 civilians. The Commission and the Dean of Westminster reached an agreement that the roll would eventually be placed in Westminster Abbey but not until the roll was complete and hostilities had ended. The Commission handed over the first six volumes to the Dean of Westminster on 21 February 1956; the final volume was added to the showcase in 1958.\n\n=== Post – World War II ===\n\nFollowing World War II the Commission recognised that the word \'Imperial\' within its name was no longer appropriate. In the spirit of strengthening national and regional feelings the organisation\'s name was changed to Commonwealth War Graves Commission in 1960.\nMore recent conflicts have sometimes made it impossible for the Commission to care for cemeteries in a given region or resulted in the destruction of sites altogether. Zehrensdorf Indian Cemetery in Germany was unkempt after the end of World War II and until the German reunification because it was located in an area occupied by Russian forces and was not entirely rebuilt until 2005. The Six-Day War and War of Attrition resulted in the destruction of Port Tewfik Memorial and Aden Memorial, and the death of a Commission gardener at Suez War Memorial Cemetery. During the Lebanese Civil War two cemeteries in Beirut were destroyed and had to be rebuilt. The maintenance of war graves and memorials in Iraq has remained difficult since Iran – Iraq War in the 1980s, with regular maintenance being impractical since after the Gulf War.\nThe Commission has, and continues to, also provide support for war graves outside its traditional mandate. In 1982, the British Ministry of Defence requested the Commission\'s assistance to design and construct cemeteries in the Falkland Islands for those killed during the Falklands War. Although these cemeteries are not Commonwealth War Graves Commission cemeteries, the Commission manages the administrative responsibilities of these cemeteries. Since 2005, the Commission has carried out similar management duties on behalf of the British Ministry of Defence for cemeteries and graves of British and Imperial soldiers who died during the Second Boer War. In 2003, Veterans Affairs Canada employed the Commission to develop an approach to locate grave markers for which the Canadian Minister of Veterans Affairs has responsibility. As of 2011, the Commission conducts a twelve-year cyclical inspection programme of Canadian veterans\'markers installed at the expense of the Government of Canada.\nIn 2008, an exploratory excavation discovered mass graves on the edge of Pheasant Wood outside of Fromelles. Two-hundred and fifty British and Australian bodies were excavated from five mass graves which were interred in the newly constructed Fromelles (Pheasant Wood) Military Cemetery. This was the first new Commonwealth War Graves Commission cemetery in more than 50 years, the last such cemeteries having been built after the Second World War.\n\n== Burial sites and memorials ==\n\nThe Commission is currently responsible for the continued commemoration of 1.7 million deceased Commonwealth military service members in 153 countries and approximately 67,000 civilians who died as a result of enemy action during World War II. Commonwealth military service members are commemorated by name on either a headstone, at an identified site of a burial, or on a memorial. As a result, the Commission is currently responsible for the care of war dead at over 23,000 separate burial sites and maintenance of more than 200 memorials worldwide. The vast majority of burial sites are pre-existing communal or municipal cemeteries and parish churchyards located in the United Kingdom, however the Commission has itself constructed approximately 2,500 war cemeteries worldwide. The Commission has also constructed or commissioned memorials to commemorate the dead who have no known grave; the largest of these is the Thiepval Memorial.\n\n=== Qualifications for inclusion ===\n\nThe Commission only commemorates those who have died during the designated war years, while in Commonwealth military service or of causes attributable to service. The applicable periods of consideration are 4 August 1914 to 31 August 1921 for the First World War and 3 September 1939 to 31 December 1947 for the Second World War. The end date for the First World War period is the official end of the war, while for the Second World War the Commission selected a date approximately the same period after VE Day as the official end of the First World War was after the 1918 Armistice.\nCivilians who died as a result of enemy action during the Second World War are commemorated differently from those that died as a result of military service. They are commemorated by name through the Civilian War Dead Roll of Honour located in St George\'s Chapel in Westminster Abbey. In addition to its mandated duties, the Commission maintains, under arrangement with applicable governments, over 40,000 non-Commonwealth war graves and over 25,000 non-war military and civilian graves.\n\n=== Architects and sculptors ===\n\nAs well as the main Principal Architects for France and Belgium (Baker, Blomfield and Lutyens), there were Principal Architects appointed for other regions as well. Sir Robert Lorimer was Principal Architect for Italy, Macedonia and Egypt, while Sir John James Burnet was Principal Architect for Palestine and Gallipoli, assisted by Thomas Smith Tait. The Principal Architect for Mesopotamia was Edward Prioleau Warren.\nAs well as these senior architects, there was a team of Assistant Architects who were actually responsible for many of the cemetery and memorial designs. These architects were younger, and many of them had served in the war. The Assistant Architects were: George Esselmont Gordon Leith, Wilfred Clement von Berg, Charles Henry Holden (who in 1920 became a Principal Architect), William Harrison Cowlishaw, William Bryce Binnie, George Hartley Goldsmith, Frank Higginson, Arthur James Scott Hutton, Noel Ackroyd Rew, and John Reginald Truelove. Other architects that worked for the Commission, or won competitions for the Commission memorials, included George Salway Nicol, Harold Chalton Bradshaw, Verner Owen Rees, Gordon H. Holt, and Henry Philip Cart de Lafontaine.\nIn January 1944, Edward Maufe was appointed Principal Architect for the UK. Maufe worked extensively for the Commission for 25 years until 1969, becoming Chief Architect and also succeeding Kenyon as Artistic Advisor. Together with Maufe, the other Principal Architects appointed during and after the Second World War were Hubert Worthington, Louis de Soissons, Philip Hepworth and Colin St Clair Oakes.\nLeading sculptors that worked on the memorials and cemeteries after the First World War included Eric Henri Kennington, Charles Thomas Wheeler, Gilbert Ledward, and Charles Sargeant Jagger. Other sculptors, both in the inter-war period and after the Second World War, included William Reid Dick, Ernest Gillick, Basil Gotto, Alfred Turner, Laurence A. Turner, Walter Gilbert, Henry Poole, Vernon Hill, Robert Anning Bell, Ferdinand Victor Blundstone, Joseph Armitage, and Gilbert Bayes.\n\n=== Cemetery design ===\n\n\n==== Common architectural design features ====\n\nStructural design has always played an important part in the Commission\'s cemeteries. Apart from a few exceptions, due to local geological conditions, the cemeteries follow the same design and uniform aesthetic all over the world. This makes the cemeteries easily recognisable and distinguishes them from war graves administered by other groups or countries.\nA typical cemetery is surrounded by a low wall or hedge and with a wrought-iron gate entrance. For cemeteries in France and Belgium, a land tablet near the entrance or along a wall identifies the cemetery grounds as having been provided by the French or Belgian governments. All but the smallest cemeteries contain a register with an inventory of the burials, a plan of the plots and rows, and a basic history of the cemetery. The register is located within a metal cupboard that is marked with a cross located in either the wall near the cemetery entrance or in a shelter within the cemetery. More recently, in larger sites, a stainless steel notice gives details of the respective military campaign. The headstones within the cemetery are of a uniform size and design and mark plots of equal size.\nThe cemetery grounds are, except in drier climates, grass covered with a floral border around the headstones. There is also an absence of any paving between the headstone rows which is intended to make the cemetery feel like a traditional walled garden where visitors could experience a sense of peace. However, Carter and Jackson argue that the uniform aesthetics are designed to evoke a positive experience which deliberately masks and sanitises the nature of the war deaths.\n\n==== Cross of Sacrifice and Stone of Remembrance ====\n\nTypically, cemeteries of more than 40 graves contain a Cross of Sacrifice designed by architect Reginald Blomfield. This cross was designed to imitate medieval crosses found in churchyards in England with proportions more commonly seen in the Celtic cross. The cross is normally a freestanding four-point limestone Latin cross, mounted on an octagonal base, and ranging in height from 14 to 32 feet. A bronze longsword, blade down, is embedded on the face of the cross. This cross represents the faith of the majority of the dead and the sword represents the military character of the cemetery, intended to link British soldiers and the Christian concept of self-sacrifice.\nCemeteries with more than 1000 burials typically have a Stone of Remembrance, designed by Edwin Lutyens with the inscription "Their Name Liveth for Evermore". The concept of the Stone of Remembrance stone was developed by Rudyard Kipling to commemorate those of all faiths and none respectively. In contrast to the Cross of Sacrifice, the design for the stone deliberately avoided "shapes associated with particular religions". The geometry of the structure was based on studies of the Parthenon. Each stone is 3.5 metres (11 ft) long and 1.5 metres (4.9 ft) high. The shape of the stone has been compared both to that of a sarcophagus and an altar. The feature was designed using the principle of entasis. The subtle curves in the design, if extended, would form a sphere 1,801 feet 8 inches (549.15 m) in diameter.\n\n==== Headstones ====\n\nEvery grave is marked with a headstone. Each headstone contains the national emblem or regimental badge, rank, name, unit, date of death and age of each casualty inscribed above an appropriate religious symbol and a more personal dedication chosen by relatives. The headstones use a standard upper case lettering designed by MacDonald Gill. Individual graves are arranged, where possible, in straight rows and marked by uniform headstones, the vast majority of which are made of Portland stone. The original headstone dimensions were 76 centimetres (30 in) tall, 38 cm (15 in) wide, and 7.6 cm (3.0 in) thick.\nMost headstones are inscribed with a cross, except for those deceased known to be atheist or non-Christian. In the case of burials of Victoria Cross or George Cross recipients, the regimental badge is supplemented by the Victoria Cross or George Cross emblem. Sometimes a soldier employed a pseudonym because they were too young to serve or were sought by law enforcement; in such cases their primary name is shown along with the notation "served as". Many headstones are for unidentified casualties; they consequently bear only what could be discovered from the body. The epitaph, developed by Rudyard Kipling, that appears on the graves of unidentified soldiers for which no details are known is "A Soldier of the Great War known unto God". Some headstones bear the text "believed to be buried in this cemetery" when they are believed to be buried in the cemetery but the exact location of the grave is not known. In some cases soldiers were buried in collective graves and distinguishing one body from another was not possible and thus one headstone covers more than one grave. The headstone does not denote any specific details of the death except for its date, and even then only if it is known, and are deliberately ambiguous about the cause of death.\nDue to local conditions it was sometimes necessary for the Commission to deviate from its standard design. In places prone to extreme weather or earthquakes, such as Thailand and Turkey, stone-faced pedestal markers are used instead of the normal headstones. These measures are intended to prevent masonry being damaged during earthquakes or sinking into sodden ground. In Italy headstones were carved from Chiampo Perla limestone because it was in more plentiful supply. In Struma Military Cemetery, in Greece, to avoid risk of earthquake damage, small headstones are laid flat on the ground. The smaller size of the markers mean that they often lack unit insignia.\n\n==== Horticulture ====\n\nCommission cemeteries are distinctive in treating floriculture as an integral part of the cemetery design. Originally, the horticultural concept was to create an environment where visitors could experience a sense of peace in a setting, in contrast to traditionally bleak graveyards. Recommendations given by Arthur William Hill, the Assistant Director of the Royal Botanical Gardens at Kew enabled the Commission to develop cemetery layouts and architectural structures that took into account the placement of suitable plant life. Combining structural and horticultural elements was not unfamiliar to the Commission\'s architects. Sir Edwin Lutyens furthered his long-standing working relationship with horticulturist Gertrude Jekyll, whose devotion to traditional cottage garden plants and roses greatly influenced the appearance of the cemeteries. Where possible, indigenous plants were utilised to enhance sentimental associations with the gardens of home.\nVariety in texture, height and timing of floral display were equally important horticultural considerations. The beds around each headstone are planted with a mixture of floribunda roses and herbaceous perennials. Low-growing plants are chosen for areas immediately in front of headstones, ensuring that inscriptions are not obscured and preventing soil from splashing back during rain. In cemeteries where there are pedestal grave markers, dwarf varieties of plants are used instead.\nThe absence of any form of paving between the headstone rows contributes to the simplicity of the cemetery designs. Lawn paths add to the garden ambiance, and are irrigated during the dry season in countries where there is insufficient rain. Where irrigation is inappropriate or impractical, dry landscaping is an ecological alternative favoured by the Commission\'s horticulturists, as is the case in Iraq. Drier areas require a different approach not only for lawns, but also to plants and styles of planting. Similarly, there are separate horticultural considerations in tropical climates. When many cemeteries are concentrated within a limited area, like along the Western Front or Gallipoli peninsula, mobile teams of gardeners operate from a local base. Elsewhere, larger cemeteries have their own dedicated staff while small cemeteries are usually tended by a single gardener working part-time.\n\n== Organisation ==\n\n\n=== Commissioners ===\n\nThe affairs of the CWGC are overseen by a Board of Commissioners. The president of the board is Prince Edward, Duke of Kent, the chairman is United Kingdom Secretary of State for Defence Michael Fallon and the vice-chairman Vice-Admiral Tim Laurence. The members are: the High Commissioner for New Zealand to the United Kingdom Lockwood Smith, the High Commissioners of Australia to the United Kingdom Alexander Downer, the Acting High Commissioner of the Republic of South Africa to the United Kingdom Obed Mlaba, the High Commissioner for India to the United Kingdom Ranjan Mathai, the High Commissioner for Canada to the United Kingdom Gordon Campbell, Hew Strachan, Keith Simpson, Kevan Jones, Edward Chaplin, Robert Fox, Ros Kelly and Lieutenant General Bill Rollo. Victoria Wallace is the Director-General of the CWGC and serves as secretary. The board also has an Honorary Artistic Adviser, Peter Inskip.\n\n=== Functional structure ===\n\nThe CWGC is headquartered in Maidenhead, England. Offices or agencies that are each responsible for a specific geographical area manage the worldwide affairs of the organisation. They are:\nFrance Area is headed by a director and is responsible for France (including the island of Corsica), Monaco and Switzerland.\nNorthern Europe Area, headed by a director and responsible for Austria, Belgium, Czech Republic, Denmark, Estonia, Germany, Hungary, Latvia, Lithuania, Luxembourg, Netherlands, Norway, Poland and Sweden.\nUnited Kingdom Area, headed by a director and responsible for Channel Islands, Faroe Islands, Iceland, Ireland, Isle of Man and the United Kingdom\nMediterranean Area headed by a director and responsible for Albania, Algeria, Azerbaijan, Azores, Bahrain, Canary Islands, Croatia, Cyprus, Egypt, Gibraltar, Greece, Israel and Palestine, Italy, Jordan, Lebanon, Libya, Macedonia, Madeira, Malta, Mauritania, Morocco, Oman, Portugal, San Marino, Saudi Arabia, Serbia, Spain, Syria, Tunisia, Turkey, United Arab Emirates and Yemen\nCanadian Agency is headed by a secretary-general and responsible for Canada, the entire Americas (including the Caribbean)\nAustralia, managed by the Office of Australian War Graves in the Australian Department of Veterans Affairs on behalf of the CWGC, is responsible for Australia, Norfolk Island, Papua New Guinea and the Solomon Islands\nNew Zealand, managed by the New Zealand Ministry of Culture and Heritage on behalf of the CWGC, is responsible for New Zealand, New Caledonia, Samoa, Society Islands, Tonga and Vanuatu\nSouth Africa Agency is headed by a secretary and is responsible for Republic of South Africa, Namibia, Saint Helena and Ascension Island\nAfrica, Asia and Pacific Area is headed by a director and is responsible for areas not covered by any of the other bodies.\n\n=== Financing ===\n\nThe CWGC\'s work is funded predominantly by grants from the governments of the six member states. In the fiscal year 2012 / 13, these grants amounted to £ 58.6 million of the organisation\'s £ 66.5 million of income. This equates to an approximate cost of C $ 85 per commemorated war dead. The contribution from each country is proportionate to the number of graves the CWGC maintains on behalf of that country. The percentage of total annual contributions for which each country is responsible is United Kingdom 78.4 %, Canada 10.1 %, Australia 6.1 %, New Zealand 2.1 %, South Africa 2.1 % and India 1.2 %.\n\n== Ongoing projects and issues ==\n\n\n=== War Graves Photographic Project ===\n\nA project is underway to photograph the graves of and memorials to all service personnel from 1914 to the present day and make the images available to the public. The work is being carried out by The War Graves Photographic Project in conjunction with the CWGC. As of August 2013, the project has recorded 1.7 million photographs for posterity.\n\n=== Reburials and identifications ===\n\nImmediately following the First World War, the British Army remained responsible for the exhumation of remains. The Western Front was divided into sectors and combed for bodies by 12-man exhumation units. Between the Armistice and September 1921, the exhumation units reburied 204,695 bodies. After 1921, no further widespread search for bodies was undertaken and in February 1921 responsibility of the cemeteries was transferred to the Commission. Despite the rigorous searches, bodies continued to be discovered in numbers. In the three years following the conclusion of the general search 38,000 bodies were discovered. In the mid 1920s, 20 to 30 bodies were being discovered weekly.\nThe discovery of remains of First and Second World War casualties remains a common occurrence with approximately 30 bodies discovered annually. For example, in 2006 eight bodies of Canadian soldiers from the 78th Battalion (Winnipeg Grenadiers), CEF were discovered in a backyard in Hallu, France. In April 2013, the remains of four British soldiers discovered by a French farmer clearing land with metal detector in 2009 were re-interred at H.A.C. Cemetery near Arras, France. In March 2014, the remains of 20 Commonwealth and 30 German soldiers were discovered in Vendin-le-Vieil, France with the Commonwealth soldiers being subsequently reburied at Loos British Cemetery.\nWhen the remains of a Commonwealth soldier from the First or Second World War is discovered the Commission is notified and a Commission burial officer tries to collect any associated artifacts that may help in identify the remains. The details are then registered and archived at the Commission\'s headquarters. the collection of evidence can include artifacts with the remains, anthropological data and DNA. The archival records of the commission are open to the public to permit individuals to conduct their own research. Investigation of archival records by members of the public periodically result in the identification of previously buried casualties. In December 2013, it was discovered that Second Lieutenant Philip Frederick Cormack, who was previously commemorated on the Arras Flying Services Memorial, had in fact been buried in a French military cemetery in Machelen, East-Flanders in Belgium. Sergeant Leonard Maidment was identified in 2013 after a visitor to Marfaux British Cemetery discovered a headstone of an unknown sergeant with the Hampshire Regiment killed on 20 July 1918 and was subsequently able to show that only one sergeant from that regiment had been killed in France on that date.\n\n=== Vandalism ===\n\nCemeteries, including those of war dead, are targets for vandalism. The gravestones, cemeteries and buildings of the Commission are no exception. The Commission believes that graffiti and damage to stonework are usually the pursuits partaken by young people, noting the number of incidents increases when schoolchildren are on school holidays. Determined thieves will also steal the bronze swords off the Cross of Sacrifice, which are now replaced with identical ones made in fibreglass.\nThe vandalism of Commission cemeteries has also been connected to the participation of Commonwealth countries in contemporary conflicts. In the 1970s, in The Troubles, Commission cemeteries in Ireland experienced vandalism. Vandals defaced the central memorial of the Étaples Military Cemetery in northern France with anti-British and anti-American graffiti on 20 March 2003 immediately after the beginning of the Iraq War. On 9 May 2004, thirty-three headstones were demolished in the Gaza cemetery, which contains 3,691 graves, allegedly in retaliation for the Abu Ghraib prisoner abuse scandal. On 24 February 2012, during the Libyan Civil War, an Islamist militia damaged over 200 headstones in the Benghazi war cemetery as well as the central memorial.\n\n',)[None]
373
+
374
+ Running loglikelihood_rolling requests
375
+
376
  0%| | 0/62 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (8987 > 4096). Running this sequence through the model will result in indexing errors
377
+
378
  2%|▏ | 1/62 [00:06<06:11, 6.09s/it]
379
  3%|▎ | 2/62 [00:06<02:58, 2.98s/it]
380
  5%|▍ | 3/62 [00:10<03:25, 3.48s/it]
381
  6%|▋ | 4/62 [00:17<04:22, 4.53s/it]
382
  8%|▊ | 5/62 [00:21<04:09, 4.38s/it]
383
  10%|▉ | 6/62 [00:27<04:39, 4.99s/it]
384
  11%|█▏ | 7/62 [00:28<03:23, 3.69s/it]
385
  13%|█▎ | 8/62 [00:29<02:34, 2.86s/it]
386
  15%|█▍ | 9/62 [00:33<02:52, 3.25s/it]
387
  16%|█▌ | 10/62 [00:37<03:02, 3.51s/it]
388
  18%|█▊ | 11/62 [00:41<03:08, 3.70s/it]
389
  19%|█▉ | 12/62 [00:42<02:25, 2.91s/it]
390
  21%|██ | 13/62 [00:51<03:42, 4.53s/it]
391
  23%|██▎ | 14/62 [00:53<03:01, 3.78s/it]
392
  24%|██▍ | 15/62 [01:01<04:00, 5.12s/it]
393
  26%|██▌ | 16/62 [01:02<02:58, 3.89s/it]
394
  27%|██▋ | 17/62 [01:03<02:12, 2.94s/it]
395
  29%|██▉ | 18/62 [01:09<02:52, 3.92s/it]
396
  31%|███ | 19/62 [01:09<02:04, 2.89s/it]
397
  32%|███▏ | 20/62 [01:10<01:33, 2.24s/it]
398
  34%|███▍ | 21/62 [01:11<01:20, 1.97s/it]
399
  35%|███▌ | 22/62 [01:12<01:03, 1.58s/it]
400
  37%|███▋ | 23/62 [01:14<01:06, 1.70s/it]
401
  39%|███▊ | 24/62 [01:15<00:53, 1.40s/it]
402
  40%|████ | 25/62 [01:19<01:21, 2.20s/it]
403
  42%|████▏ | 26/62 [01:21<01:15, 2.11s/it]
404
  44%|████▎ | 27/62 [01:25<01:34, 2.71s/it]
405
  45%|████▌ | 28/62 [01:27<01:21, 2.39s/it]
406
  47%|████▋ | 29/62 [01:27<01:02, 1.89s/it]
407
  48%|████▊ | 30/62 [01:28<00:49, 1.56s/it]
408
  50%|█████ | 31/62 [01:34<01:31, 2.94s/it]
409
  52%|█████▏ | 32/62 [01:35<01:05, 2.20s/it]
410
  53%|█████▎ | 33/62 [01:35<00:47, 1.65s/it]
411
  55%|█████▍ | 34/62 [01:37<00:46, 1.65s/it]
412
  56%|█████▋ | 35/62 [01:45<01:37, 3.62s/it]
413
  58%|█████▊ | 36/62 [01:46<01:13, 2.82s/it]
414
  60%|█████▉ | 37/62 [01:47<00:54, 2.17s/it]
415
  61%|██████▏ | 38/62 [01:53<01:20, 3.37s/it]
416
  63%|██████▎ | 39/62 [01:54<01:04, 2.78s/it]
417
  65%|██████▍ | 40/62 [01:58<01:09, 3.17s/it]
418
  66%|██████▌ | 41/62 [02:02<01:12, 3.46s/it]
419
  68%|██████▊ | 42/62 [02:03<00:52, 2.61s/it]
420
  69%|██████▉ | 43/62 [02:04<00:43, 2.28s/it]
421
  71%|███████ | 44/62 [02:05<00:33, 1.86s/it]
422
  73%|███████▎ | 45/62 [02:09<00:43, 2.53s/it]
423
  74%|███████▍ | 46/62 [02:11<00:36, 2.26s/it]
424
  76%|███████▌ | 47/62 [02:12<00:27, 1.81s/it]
425
  77%|███████▋ | 48/62 [02:22<01:00, 4.35s/it]
426
  79%|███████▉ | 49/62 [02:24<00:46, 3.54s/it]
427
  81%|████████ | 50/62 [02:30<00:52, 4.34s/it]
428
  82%|████████▏ | 51/62 [02:31<00:35, 3.25s/it]
429
  84%|████████▍ | 52/62 [02:35<00:35, 3.50s/it]
430
  85%|████████▌ | 53/62 [02:43<00:44, 4.91s/it]
431
  87%|████████▋ | 54/62 [02:47<00:37, 4.66s/it]
432
  89%|████████▊ | 55/62 [02:55<00:40, 5.74s/it]
433
  90%|█████████ | 56/62 [02:56<00:24, 4.14s/it]
434
  92%|█████████▏| 57/62 [02:57<00:16, 3.24s/it]
435
  94%|█████████▎| 58/62 [02:58<00:09, 2.49s/it]
436
  95%|█████████▌| 59/62 [03:04<00:10, 3.58s/it]
437
  97%|█████████▋| 60/62 [03:08<00:07, 3.74s/it]
438
  98%|█████████▊| 61/62 [03:09<00:03, 3.11s/it]
439
+ Torch evaluation result: {'results': {'wikitext': {'word_perplexity': 9.010890639006227, 'byte_perplexity': 1.5085035483088687, 'bits_per_byte': 0.5931180900024063}}, 'versions': {'wikitext': 1}, 'config': {'model': None, 'model_args': None, 'num_fewshot': 0, 'batch_size': 1, 'batch_sizes': [], 'device': 'cuda', 'no_cache': True, 'limit': None, 'bootstrap_iters': 100000, 'description_dict': None}}
440
+ Automatic task detection to: text-generation-with-past.
441
+ Using framework PyTorch: 2.1.0
442
+ Overriding 1 configuration item(s)
443
+ - use_cache -> True
444
+ WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
445
+ NNCF relies on custom-wrapping the `forward` call in order to function properly.
446
+ Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behavior.
447
+ If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
448
+ model.nncf.set_original_unbound_forward(fn)
449
+ if `fn` has an unbound 0-th `self` argument, or
450
+ with model.nncf.temporary_bound_original_forward(fn): ...
451
+ if `fn` already had 0-th `self` argument bound or never had it in the first place.
452
+ The cos_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class
453
+ The sin_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class
454
+ /nvme2/yujiepan/tools/miniconda3/envs/llm-sparse-training-autoawq/lib/python3.10/site-packages/optimum/exporters/openvino/model_patcher.py:340: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
455
+ if sequence_length != 1:
456
+ /nvme2/yujiepan/tools/miniconda3/envs/llm-sparse-training-autoawq/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:382: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
457
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
458
+ WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
459
+ NNCF relies on custom-wrapping the `forward` call in order to function properly.
460
+ Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behavior.
461
+ If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
462
+ model.nncf.set_original_unbound_forward(fn)
463
+ if `fn` has an unbound 0-th `self` argument, or
464
+ with model.nncf.temporary_bound_original_forward(fn): ...
465
+ if `fn` already had 0-th `self` argument bound or never had it in the first place.
466
+ Provided model does not contain state. It may lead to sub-optimal performance.Please reexport model with updated OpenVINO version >= 2023.3.0 calling the `from_pretrained` method with original model and `export=True` parameter
467
+ Compiling the model to CPU ...
468
+ [{'generated_text': 'Hello, I am an AI chatbot 🤖, how can I help you today?\n февруа'}]
openvino_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c666f6a35aa3117c715b0339982c6203fd20da5bd32edbe199e4d69a58139342
3
+ size 6742954132
openvino_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": false,
34
+ "model_max_length": 4096,
35
+ "pad_token": "</s>",
36
+ "padding_side": "right",
37
+ "sp_model_kwargs": {},
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false
41
+ }
torch_eval_results.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "wikitext": {
4
+ "word_perplexity": 9.010890639006227,
5
+ "byte_perplexity": 1.5085035483088687,
6
+ "bits_per_byte": 0.5931180900024063
7
+ }
8
+ },
9
+ "versions": {
10
+ "wikitext": 1
11
+ },
12
+ "config": {
13
+ "model": null,
14
+ "model_args": null,
15
+ "num_fewshot": 0,
16
+ "batch_size": 1,
17
+ "batch_sizes": [],
18
+ "device": "cuda",
19
+ "no_cache": true,
20
+ "limit": null,
21
+ "bootstrap_iters": 100000,
22
+ "description_dict": null
23
+ }
24
+ }