royyy74 commited on
Commit
e1ae8b1
·
verified ·
1 Parent(s): 0674520

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -9
app.py CHANGED
@@ -196,14 +196,31 @@ if device.type == 'cuda':
196
  print(f"Failed to load LLM on CUDA: {e}")
197
  raise
198
  else:
199
- # CPU-only loading: Explicitly use CPU and float32
200
- print("Attempting to load LLM on CPU with float32...")
201
- text_model = AutoModelForCausalLM.from_pretrained(
202
- CHECKPOINT_PATH / "text_model",
203
- device_map={'': 'cpu'}, # Explicitly map all to CPU
204
- torch_dtype=torch.float32 # Use float32 for CPU
205
- )
206
- print("LLM loaded on CPU with float32.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  text_model.eval()
209
 
@@ -408,4 +425,4 @@ async def caption_image_endpoint(
408
 
409
  if __name__ == "__main__":
410
  import uvicorn
411
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
196
  print(f"Failed to load LLM on CUDA: {e}")
197
  raise
198
  else:
199
+ # CPU-only loading: Attempt float32 first, then fallback to offloading if it fails.
200
+ print("Attempting to load LLM on CPU with float32 (direct to RAM)...")
201
+ try:
202
+ text_model = AutoModelForCausalLM.from_pretrained(
203
+ CHECKPOINT_PATH / "text_model",
204
+ device_map={'': 'cpu'}, # Explicitly map all to CPU
205
+ torch_dtype=torch.float32 # Use float32 for CPU
206
+ )
207
+ print("LLM loaded on CPU with float32 (direct to RAM).")
208
+ except Exception as e_cpu_direct: # Catch a broad exception, could be OOM (RuntimeError) or others
209
+ print(f"Direct CPU float32 loading failed: {e_cpu_direct}")
210
+ print("Attempting to load LLM on CPU with disk offloading (float32)...")
211
+ try:
212
+ model_offload_dir_cpu = TemporaryDirectory().name
213
+ text_model = AutoModelForCausalLM.from_pretrained(
214
+ CHECKPOINT_PATH / "text_model",
215
+ device_map="auto", # Allow accelerate to use CPU and disk
216
+ torch_dtype=torch.float32,
217
+ offload_folder=model_offload_dir_cpu,
218
+ offload_state_dict=True
219
+ )
220
+ print(f"LLM loaded on CPU with offloading to {model_offload_dir_cpu}. WARNING: This will be very slow.")
221
+ except Exception as e_cpu_offload:
222
+ print(f"CPU loading with disk offloading also failed: {e_cpu_offload}")
223
+ raise # Re-raise the exception if all CPU loading strategies fail
224
 
225
  text_model.eval()
226
 
 
425
 
426
  if __name__ == "__main__":
427
  import uvicorn
428
+ uvicorn.run(app, host="0.0.0.0", port=8000)