captions

Runtime error

App Files Files Community

royyy74 commited on Jul 7

Commit

011b368

verified ·

1 Parent(s): 613218d

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -3

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import os
 import torchvision.transforms.functional as TVF
 import io
 import json # For parsing extra_options_json
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from pydantic import BaseModel
@@ -164,8 +165,46 @@ assert isinstance(tokenizer, PreTrainedTokenizer) or isinstance(tokenizer, PreTr
 # LLM
 print("Loading LLM")
 print("Loading VLM's custom text model")
-# Use device_map="auto" to allow accelerate to handle model placement, including CPU
-text_model = AutoModelForCausalLM.from_pretrained(CHECKPOINT_PATH / "text_model", device_map="auto", torch_dtype=torch.bfloat16)
 text_model.eval()
 # Image Adapter
@@ -365,4 +404,4 @@ async def caption_image_endpoint(
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import torchvision.transforms.functional as TVF
 import io
 import json # For parsing extra_options_json
+from tempfile import TemporaryDirectory # For offload_folder
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from pydantic import BaseModel
 # LLM
 print("Loading LLM")
 print("Loading VLM's custom text model")
+# Use device_map="auto" and load_in_8bit for quantization to reduce memory footprint
+try:
+    text_model = AutoModelForCausalLM.from_pretrained(
+        CHECKPOINT_PATH / "text_model",
+        device_map="auto",
+        load_in_8bit=True # Enable 8-bit quantization
+        # torch_dtype is generally not specified with load_in_8bit,
+        # as bitsandbytes handles the underlying types.
+    )
+except Exception as e:
+    print(f"Failed to load model with 8-bit quantization: {e}")
+    print("Attempting to load without 8-bit quantization (this may fail due to memory or require offloading)...")
+    # Fallback or alternative loading strategy can be placed here if needed
+    # For now, let it re-raise or try a different approach if the primary fails.
+    # As a simple fallback for now, try original loading which might hit the offload error
+    try:
+        text_model = AutoModelForCausalLM.from_pretrained(
+            CHECKPOINT_PATH / "text_model",
+            device_map="auto",
+            torch_dtype=torch.bfloat16 # Try with bfloat16 first
+        )
+    except ValueError as ve:
+        if "offload_dir" in str(ve): # Check if the error is about needing offload_dir
+            print(f"Original loading failed with ValueError (likely needing offload_dir): {ve}")
+            print("Attempting to load model with disk offloading...")
+            model_offload_dir = TemporaryDirectory().name
+            text_model = AutoModelForCausalLM.from_pretrained(
+                CHECKPOINT_PATH / "text_model",
+                device_map="auto",
+                torch_dtype=torch.bfloat16, # Keep bfloat16 if possible
+                offload_folder=model_offload_dir,
+                offload_state_dict=True # Recommended when offloading
+            )
+            print(f"Model loaded with offloading to {model_offload_dir}. WARNING: This will be very slow.")
+        else:
+            raise # Re-raise other ValueErrors
+    except Exception as final_e: # Catch any other exceptions during the last fallback attempt
+        print(f"All model loading attempts failed. Last error: {final_e}")
+        raise
 text_model.eval()
 # Image Adapter
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)