| test_stage: | |
| quant_modifiers: | |
| vLLMQuantizationModifier: | |
| ignore: [lm_head, model.layers.0.mlp.down_proj] | |
| config_groups: | |
| group_0: | |
| weights: {num_bits: 8, type: int, symmetric: true, strategy: tensor} | |
| input_activations: {num_bits: 8, type: int, symmetric: false, strategy: tensor} | |
| output_activations: null | |
| targets: [Linear] | |
| group_1: | |
| weights: {num_bits: 8, type: int, symmetric: true, strategy: tensor} | |
| input_activations: null | |
| output_activations: null | |
| targets: [Embedding] | |
| SparseGPTModifier: | |
| sparsity: 0.0 | |
| block_size: 128 | |
| sequential_update: false | |
| quantize: true | |
| targets: ['re:model.layers.\d+$'] | |