Add model
Browse files- acip_model.py +44 -24
- config.json +5 -5
- parametrized_model.py +3 -3
acip_model.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from typing import Any
|
| 2 |
|
| 3 |
import torch
|
|
@@ -5,6 +6,8 @@ from transformers import PreTrainedModel
|
|
| 5 |
|
| 6 |
from .parametrized_model import ParametrizedModel, ParametrizedModelConfig
|
| 7 |
|
|
|
|
|
|
|
| 8 |
|
| 9 |
class ACIPModelConfig(ParametrizedModelConfig):
|
| 10 |
"""
|
|
@@ -24,7 +27,7 @@ class ACIPModel(ParametrizedModel):
|
|
| 24 |
It manages a `score_map` that stores the scores of the parametrized modules' target parameters,
|
| 25 |
which are updated during tuning by the ACIP method.
|
| 26 |
Moreover, it provides `prune_model_by_score` that prunes the target parameters of the model according to
|
| 27 |
-
their scores to achieve any given
|
| 28 |
|
| 29 |
Notes: The `score_map` is managed in float32 internally because a lower precision may lead to unexpected numerical
|
| 30 |
inaccuracies in the resulting parameter ranking. Fortunately, the memory consumption is negligible compared to
|
|
@@ -92,10 +95,10 @@ class ACIPModel(ParametrizedModel):
|
|
| 92 |
buffer.copy_(score.detach().float())
|
| 93 |
self._score_map[p_name] = buffer
|
| 94 |
|
| 95 |
-
def
|
| 96 |
"""
|
| 97 |
Helper function that checks what would happen if the k smallest target parameters are pruned
|
| 98 |
-
according to the global score map ranking. It returns the resulting
|
| 99 |
and the corresponding parameter masks.
|
| 100 |
|
| 101 |
Args:
|
|
@@ -103,7 +106,7 @@ class ACIPModel(ParametrizedModel):
|
|
| 103 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
| 104 |
See also `ParametrizedModel.get_num_params`.
|
| 105 |
|
| 106 |
-
Returns: Tuple of
|
| 107 |
"""
|
| 108 |
# Find the threshold value for the k smallest entries according to the global score map ranking.
|
| 109 |
score_map_cat = torch.cat([param.flatten() for param in self.score_map.values()])
|
|
@@ -114,55 +117,72 @@ class ACIPModel(ParametrizedModel):
|
|
| 114 |
for p_name, score in self.score_map.items():
|
| 115 |
param_masks[p_name] = (score > threshold).to(dtype=score.dtype)
|
| 116 |
|
| 117 |
-
# Compute hypothetical
|
| 118 |
-
|
| 119 |
-
return
|
| 120 |
|
| 121 |
-
def _get_param_masks(self,
|
| 122 |
"""
|
| 123 |
-
Helper function that determines which parameters to keep to reach a target
|
| 124 |
-
Instead of looping over `k ->
|
| 125 |
-
the
|
| 126 |
|
| 127 |
Args:
|
| 128 |
-
|
| 129 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
| 130 |
See also `ParametrizedModel.get_num_params`.
|
| 131 |
|
| 132 |
-
Returns: Parameter masks indicating which parameters to keep to reach the target
|
| 133 |
"""
|
| 134 |
-
if
|
| 135 |
return {p_name: torch.ones_like(score) for p_name, score in self.score_map.items()}
|
| 136 |
|
| 137 |
-
# Perform a binary search to find the smallest k such that the
|
| 138 |
# Here, k_lo and k_hi are the lower and upper bound of the search interval.
|
| 139 |
k_lo, k_hi = 1, sum(score.numel() for score in self.score_map.values())
|
| 140 |
while k_lo < k_hi:
|
| 141 |
k_mid = (k_lo + k_hi + 1) // 2 # round up to ensure low <= mid
|
| 142 |
-
ratio, _ = self.
|
| 143 |
-
if ratio >
|
| 144 |
k_lo = k_mid
|
| 145 |
else:
|
| 146 |
k_hi = k_mid - 1
|
| 147 |
k = k_lo
|
| 148 |
# TODO: handle tie-breaks
|
| 149 |
-
return self.
|
| 150 |
-
|
| 151 |
-
def prune_model_by_score(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
"""
|
| 153 |
This method prunes the target parameters of the model according to their scores to achieve
|
| 154 |
-
a given
|
| 155 |
|
| 156 |
This can be efficiently implemented by a simple binary search strategy:
|
| 157 |
We find the smallest number of parameters to be pruned according to the score map ranking
|
| 158 |
-
such that the resulting
|
| 159 |
|
| 160 |
Args:
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
| 163 |
See also `ParametrizedModel.get_num_params`.
|
| 164 |
"""
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
# Reset the target parameters according to the parameter masks
|
| 168 |
for p_name, param in self.get_target_params().items():
|
|
|
|
| 1 |
+
import logging
|
| 2 |
from typing import Any
|
| 3 |
|
| 4 |
import torch
|
|
|
|
| 6 |
|
| 7 |
from .parametrized_model import ParametrizedModel, ParametrizedModelConfig
|
| 8 |
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
|
| 12 |
class ACIPModelConfig(ParametrizedModelConfig):
|
| 13 |
"""
|
|
|
|
| 27 |
It manages a `score_map` that stores the scores of the parametrized modules' target parameters,
|
| 28 |
which are updated during tuning by the ACIP method.
|
| 29 |
Moreover, it provides `prune_model_by_score` that prunes the target parameters of the model according to
|
| 30 |
+
their scores to achieve any given size ratio.
|
| 31 |
|
| 32 |
Notes: The `score_map` is managed in float32 internally because a lower precision may lead to unexpected numerical
|
| 33 |
inaccuracies in the resulting parameter ranking. Fortunately, the memory consumption is negligible compared to
|
|
|
|
| 95 |
buffer.copy_(score.detach().float())
|
| 96 |
self._score_map[p_name] = buffer
|
| 97 |
|
| 98 |
+
def _predict_size_ratio_by_score(self, k: int, full: bool = False) -> tuple[float, dict[str, torch.Tensor]]:
|
| 99 |
"""
|
| 100 |
Helper function that checks what would happen if the k smallest target parameters are pruned
|
| 101 |
+
according to the global score map ranking. It returns the resulting size ratio
|
| 102 |
and the corresponding parameter masks.
|
| 103 |
|
| 104 |
Args:
|
|
|
|
| 106 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
| 107 |
See also `ParametrizedModel.get_num_params`.
|
| 108 |
|
| 109 |
+
Returns: Tuple of size ratio and parameter masks. The masks indicate which parameters to keep.
|
| 110 |
"""
|
| 111 |
# Find the threshold value for the k smallest entries according to the global score map ranking.
|
| 112 |
score_map_cat = torch.cat([param.flatten() for param in self.score_map.values()])
|
|
|
|
| 117 |
for p_name, score in self.score_map.items():
|
| 118 |
param_masks[p_name] = (score > threshold).to(dtype=score.dtype)
|
| 119 |
|
| 120 |
+
# Compute hypothetical size ratio if param_masks would be used as masks for the target parameters.
|
| 121 |
+
size_ratio = self.get_size_ratio(full=full, target_params=param_masks)
|
| 122 |
+
return size_ratio, param_masks
|
| 123 |
|
| 124 |
+
def _get_param_masks(self, size_ratio: float, full: bool = False) -> dict[str, torch.Tensor]:
|
| 125 |
"""
|
| 126 |
+
Helper function that determines which parameters to keep to reach a target size ratio.
|
| 127 |
+
Instead of looping over `k -> _predict_size_ratio_by_score(k)`, a binary search can be used because
|
| 128 |
+
the size ratio is monotonically increasing in k.
|
| 129 |
|
| 130 |
Args:
|
| 131 |
+
size_ratio: Target size ratio.
|
| 132 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
| 133 |
See also `ParametrizedModel.get_num_params`.
|
| 134 |
|
| 135 |
+
Returns: Parameter masks indicating which parameters to keep to reach the target size ratio.
|
| 136 |
"""
|
| 137 |
+
if size_ratio == 1.0:
|
| 138 |
return {p_name: torch.ones_like(score) for p_name, score in self.score_map.items()}
|
| 139 |
|
| 140 |
+
# Perform a binary search to find the smallest k such that the size ratio is at least size_ratio.
|
| 141 |
# Here, k_lo and k_hi are the lower and upper bound of the search interval.
|
| 142 |
k_lo, k_hi = 1, sum(score.numel() for score in self.score_map.values())
|
| 143 |
while k_lo < k_hi:
|
| 144 |
k_mid = (k_lo + k_hi + 1) // 2 # round up to ensure low <= mid
|
| 145 |
+
ratio, _ = self._predict_size_ratio_by_score(k=k_mid, full=full)
|
| 146 |
+
if ratio > size_ratio:
|
| 147 |
k_lo = k_mid
|
| 148 |
else:
|
| 149 |
k_hi = k_mid - 1
|
| 150 |
k = k_lo
|
| 151 |
# TODO: handle tie-breaks
|
| 152 |
+
return self._predict_size_ratio_by_score(k=k, full=full)[1]
|
| 153 |
+
|
| 154 |
+
def prune_model_by_score(
|
| 155 |
+
self,
|
| 156 |
+
size_ratio: float | None = None,
|
| 157 |
+
compression_rate: float | None = None,
|
| 158 |
+
full: bool = False,
|
| 159 |
+
) -> None:
|
| 160 |
"""
|
| 161 |
This method prunes the target parameters of the model according to their scores to achieve
|
| 162 |
+
a given size ratio.
|
| 163 |
|
| 164 |
This can be efficiently implemented by a simple binary search strategy:
|
| 165 |
We find the smallest number of parameters to be pruned according to the score map ranking
|
| 166 |
+
such that the resulting size ratio is at least the target `size_ratio`.
|
| 167 |
|
| 168 |
Args:
|
| 169 |
+
size_ratio: The target size ratio, which is the ratio between the size of the compressed model and
|
| 170 |
+
the original model (where size is measured in number of parameters).
|
| 171 |
+
If not provided, `compression_rate` must be provided.
|
| 172 |
+
compression_rate: This is a convenience parameter that allows you to set the target compression rate
|
| 173 |
+
instead of `size_ratio`. It is equivalent to `size_ratio = 1.0 - compression_rate`.
|
| 174 |
+
If both `size_ratio` and `compression_rate` are provided, `size_ratio` is used.
|
| 175 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
| 176 |
See also `ParametrizedModel.get_num_params`.
|
| 177 |
"""
|
| 178 |
+
if size_ratio is None and compression_rate is None:
|
| 179 |
+
raise ValueError("Either `size_ratio` or `compression_rate` must be provided.")
|
| 180 |
+
elif size_ratio is None and compression_rate is not None:
|
| 181 |
+
size_ratio = 1.0 - compression_rate
|
| 182 |
+
else:
|
| 183 |
+
logger.warning("Both `size_ratio` and `compression_rate` are provided. Using `size_ratio`.")
|
| 184 |
+
|
| 185 |
+
param_masks = self._get_param_masks(size_ratio=size_ratio, full=full)
|
| 186 |
|
| 187 |
# Reset the target parameters according to the parameter masks
|
| 188 |
for p_name, param in self.get_target_params().items():
|
config.json
CHANGED
|
@@ -32,14 +32,14 @@
|
|
| 32 |
"revision": null,
|
| 33 |
"target_modules": [
|
| 34 |
"up_proj",
|
|
|
|
|
|
|
|
|
|
| 35 |
"k_proj",
|
| 36 |
"o_proj",
|
|
|
|
| 37 |
"v_proj",
|
| 38 |
-
"
|
| 39 |
-
"down_proj",
|
| 40 |
-
"q_proj",
|
| 41 |
-
"gate_proj",
|
| 42 |
-
"ortho"
|
| 43 |
],
|
| 44 |
"task_type": "CAUSAL_LM",
|
| 45 |
"use_dora": false,
|
|
|
|
| 32 |
"revision": null,
|
| 33 |
"target_modules": [
|
| 34 |
"up_proj",
|
| 35 |
+
"down_proj",
|
| 36 |
+
"gate_proj",
|
| 37 |
+
"base",
|
| 38 |
"k_proj",
|
| 39 |
"o_proj",
|
| 40 |
+
"ortho",
|
| 41 |
"v_proj",
|
| 42 |
+
"q_proj"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
],
|
| 44 |
"task_type": "CAUSAL_LM",
|
| 45 |
"use_dora": false,
|
parametrized_model.py
CHANGED
|
@@ -353,7 +353,7 @@ class ParametrizedModel(PreTrainedModel):
|
|
| 353 |
The corresponding modules are accessed via `parametrized_modules`, `adapter_modules`,
|
| 354 |
and `quantized_modules`, respectively.
|
| 355 |
The class also provides several convenience methods to manage the parametrization: `get_target_params`,
|
| 356 |
-
`get_num_params`, `
|
| 357 |
|
| 358 |
Standard functionality (`forward`, `generate`, `save_pretrained`, `from_pretrained`) is essentially forwarded
|
| 359 |
to the wrapped model.
|
|
@@ -698,9 +698,9 @@ class ParametrizedModel(PreTrainedModel):
|
|
| 698 |
num_params = 1e-6
|
| 699 |
return num_params
|
| 700 |
|
| 701 |
-
def
|
| 702 |
"""
|
| 703 |
-
Convenience function to compute the
|
| 704 |
|
| 705 |
See Also:
|
| 706 |
`get_num_params`
|
|
|
|
| 353 |
The corresponding modules are accessed via `parametrized_modules`, `adapter_modules`,
|
| 354 |
and `quantized_modules`, respectively.
|
| 355 |
The class also provides several convenience methods to manage the parametrization: `get_target_params`,
|
| 356 |
+
`get_num_params`, `get_size_ratio`, `reset_target_params`, `compress`.
|
| 357 |
|
| 358 |
Standard functionality (`forward`, `generate`, `save_pretrained`, `from_pretrained`) is essentially forwarded
|
| 359 |
to the wrapped model.
|
|
|
|
| 698 |
num_params = 1e-6
|
| 699 |
return num_params
|
| 700 |
|
| 701 |
+
def get_size_ratio(self, full: bool = False, target_params: dict[str, torch.Tensor] | None = None) -> float:
|
| 702 |
"""
|
| 703 |
+
Convenience function to compute the size ratio of the present model.
|
| 704 |
|
| 705 |
See Also:
|
| 706 |
`get_num_params`
|