Spaces:

6Genix
/

MultiAgent-XAI-Demo

Sleeping

6Genix commited on Jan 14

Commit

eb857d5

1 Parent(s): e79e7ca

Updated model loading to include fp16 precision fallback for DeepSeek-V3. Enhanced error handling for quantization issues and improved robustness.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,13 +28,13 @@ def load_model_engineer():
             raise
     else:
         try:
-            # Fallback: Load model directly
             tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
                 "unsloth/DeepSeek-V3",
                 trust_remote_code=True
             )
-            model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
             return tokenizer, model
         except Exception as e:
             st.error(f"Direct model loading failed for Engineer: {str(e)}")
@@ -56,13 +56,13 @@ def load_model_analyst():
             raise
     else:
         try:
-            # Fallback: Load model directly
             tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
                 "unsloth/DeepSeek-V3",
                 trust_remote_code=True
             )
-            model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
             return tokenizer, model
         except Exception as e:
             st.error(f"Direct model loading failed for Analyst: {str(e)}")

             raise
     else:
         try:
+            # Fallback: Load model directly with fp16 precision
             tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
                 "unsloth/DeepSeek-V3",
                 trust_remote_code=True
             )
+            model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
             return tokenizer, model
         except Exception as e:
             st.error(f"Direct model loading failed for Engineer: {str(e)}")
             raise
     else:
         try:
+            # Fallback: Load model directly with fp16 precision
             tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
                 "unsloth/DeepSeek-V3",
                 trust_remote_code=True
             )
+            model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
             return tokenizer, model
         except Exception as e:
             st.error(f"Direct model loading failed for Analyst: {str(e)}")