xMerge remote-tracking branch 'refs/remotes/origin/main'
Browse files- modeling_chatglm.py +2 -2
modeling_chatglm.py
CHANGED
|
@@ -29,7 +29,7 @@ from transformers.utils import logging
|
|
| 29 |
from transformers.generation.logits_process import LogitsProcessor
|
| 30 |
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
|
| 31 |
|
| 32 |
-
from configuration_chatglm import ChatGLMConfig
|
| 33 |
|
| 34 |
|
| 35 |
# flags required to enable jit fusion kernels
|
|
@@ -1273,7 +1273,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
| 1273 |
if bits == 0:
|
| 1274 |
return
|
| 1275 |
|
| 1276 |
-
from quantization import quantize, QuantizedEmbedding, QuantizedLinear, load_cpu_kernel
|
| 1277 |
|
| 1278 |
if self.quantized:
|
| 1279 |
if self.device == torch.device("cpu"):
|
|
|
|
| 29 |
from transformers.generation.logits_process import LogitsProcessor
|
| 30 |
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
|
| 31 |
|
| 32 |
+
from .configuration_chatglm import ChatGLMConfig
|
| 33 |
|
| 34 |
|
| 35 |
# flags required to enable jit fusion kernels
|
|
|
|
| 1273 |
if bits == 0:
|
| 1274 |
return
|
| 1275 |
|
| 1276 |
+
from .quantization import quantize, QuantizedEmbedding, QuantizedLinear, load_cpu_kernel
|
| 1277 |
|
| 1278 |
if self.quantized:
|
| 1279 |
if self.device == torch.device("cpu"):
|