yongdong commited on
Commit
9d3765e
·
1 Parent(s): 51982fa

perf: use safetensors for faster model loading.

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -73,14 +73,16 @@ def load_model_on_gpu():
73
  device_map="auto",
74
  torch_dtype=torch.float16,
75
  trust_remote_code=True,
76
- low_cpu_mem_usage=True
 
77
  )
78
 
79
  # Load LoRA adapter
80
  model = PeftModel.from_pretrained(
81
  base_model,
82
  LORA_MODEL,
83
- torch_dtype=torch.float16
 
84
  )
85
  model.eval()
86
 
 
73
  device_map="auto",
74
  torch_dtype=torch.float16,
75
  trust_remote_code=True,
76
+ low_cpu_mem_usage=True,
77
+ use_safetensors=True
78
  )
79
 
80
  # Load LoRA adapter
81
  model = PeftModel.from_pretrained(
82
  base_model,
83
  LORA_MODEL,
84
+ torch_dtype=torch.float16,
85
+ use_safetensors=True
86
  )
87
  model.eval()
88