Spaces:
Sleeping
Sleeping
Updated model loading to include fp16 precision fallback for DeepSeek-V3. Enhanced error handling for quantization issues and improved robustness.
Browse files
app.py
CHANGED
@@ -28,13 +28,13 @@ def load_model_engineer():
|
|
28 |
raise
|
29 |
else:
|
30 |
try:
|
31 |
-
# Fallback: Load model directly
|
32 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
33 |
model = AutoModelForCausalLM.from_pretrained(
|
34 |
"unsloth/DeepSeek-V3",
|
35 |
trust_remote_code=True
|
36 |
)
|
37 |
-
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
38 |
return tokenizer, model
|
39 |
except Exception as e:
|
40 |
st.error(f"Direct model loading failed for Engineer: {str(e)}")
|
@@ -56,13 +56,13 @@ def load_model_analyst():
|
|
56 |
raise
|
57 |
else:
|
58 |
try:
|
59 |
-
# Fallback: Load model directly
|
60 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
61 |
model = AutoModelForCausalLM.from_pretrained(
|
62 |
"unsloth/DeepSeek-V3",
|
63 |
trust_remote_code=True
|
64 |
)
|
65 |
-
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
66 |
return tokenizer, model
|
67 |
except Exception as e:
|
68 |
st.error(f"Direct model loading failed for Analyst: {str(e)}")
|
|
|
28 |
raise
|
29 |
else:
|
30 |
try:
|
31 |
+
# Fallback: Load model directly with fp16 precision
|
32 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
33 |
model = AutoModelForCausalLM.from_pretrained(
|
34 |
"unsloth/DeepSeek-V3",
|
35 |
trust_remote_code=True
|
36 |
)
|
37 |
+
model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
38 |
return tokenizer, model
|
39 |
except Exception as e:
|
40 |
st.error(f"Direct model loading failed for Engineer: {str(e)}")
|
|
|
56 |
raise
|
57 |
else:
|
58 |
try:
|
59 |
+
# Fallback: Load model directly with fp16 precision
|
60 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
61 |
model = AutoModelForCausalLM.from_pretrained(
|
62 |
"unsloth/DeepSeek-V3",
|
63 |
trust_remote_code=True
|
64 |
)
|
65 |
+
model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
66 |
return tokenizer, model
|
67 |
except Exception as e:
|
68 |
st.error(f"Direct model loading failed for Analyst: {str(e)}")
|