captions

Runtime error

App Files Files Community

royyy74 commited on Jul 7

Commit

e1ae8b1

verified ·

1 Parent(s): 0674520

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -9

app.py CHANGED Viewed

@@ -196,14 +196,31 @@ if device.type == 'cuda':
         print(f"Failed to load LLM on CUDA: {e}")
         raise
 else:
-    # CPU-only loading: Explicitly use CPU and float32
-    print("Attempting to load LLM on CPU with float32...")
-    text_model = AutoModelForCausalLM.from_pretrained(
-        CHECKPOINT_PATH / "text_model",
-        device_map={'': 'cpu'}, # Explicitly map all to CPU
-        torch_dtype=torch.float32 # Use float32 for CPU
-    )
-    print("LLM loaded on CPU with float32.")
 text_model.eval()
@@ -408,4 +425,4 @@ async def caption_image_endpoint(
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

         print(f"Failed to load LLM on CUDA: {e}")
         raise
 else:
+    # CPU-only loading: Attempt float32 first, then fallback to offloading if it fails.
+    print("Attempting to load LLM on CPU with float32 (direct to RAM)...")
+    try:
+        text_model = AutoModelForCausalLM.from_pretrained(
+            CHECKPOINT_PATH / "text_model",
+            device_map={'': 'cpu'}, # Explicitly map all to CPU
+            torch_dtype=torch.float32 # Use float32 for CPU
+        )
+        print("LLM loaded on CPU with float32 (direct to RAM).")
+    except Exception as e_cpu_direct: # Catch a broad exception, could be OOM (RuntimeError) or others
+        print(f"Direct CPU float32 loading failed: {e_cpu_direct}")
+        print("Attempting to load LLM on CPU with disk offloading (float32)...")
+        try:
+            model_offload_dir_cpu = TemporaryDirectory().name
+            text_model = AutoModelForCausalLM.from_pretrained(
+                CHECKPOINT_PATH / "text_model",
+                device_map="auto", # Allow accelerate to use CPU and disk
+                torch_dtype=torch.float32,
+                offload_folder=model_offload_dir_cpu,
+                offload_state_dict=True
+            )
+            print(f"LLM loaded on CPU with offloading to {model_offload_dir_cpu}. WARNING: This will be very slow.")
+        except Exception as e_cpu_offload:
+            print(f"CPU loading with disk offloading also failed: {e_cpu_offload}")
+            raise # Re-raise the exception if all CPU loading strategies fail
 text_model.eval()
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)