6Genix commited on
Commit
eb857d5
·
1 Parent(s): e79e7ca

Updated model loading to include fp16 precision fallback for DeepSeek-V3. Enhanced error handling for quantization issues and improved robustness.

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -28,13 +28,13 @@ def load_model_engineer():
28
  raise
29
  else:
30
  try:
31
- # Fallback: Load model directly
32
  tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
33
  model = AutoModelForCausalLM.from_pretrained(
34
  "unsloth/DeepSeek-V3",
35
  trust_remote_code=True
36
  )
37
- model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
38
  return tokenizer, model
39
  except Exception as e:
40
  st.error(f"Direct model loading failed for Engineer: {str(e)}")
@@ -56,13 +56,13 @@ def load_model_analyst():
56
  raise
57
  else:
58
  try:
59
- # Fallback: Load model directly
60
  tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
61
  model = AutoModelForCausalLM.from_pretrained(
62
  "unsloth/DeepSeek-V3",
63
  trust_remote_code=True
64
  )
65
- model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
66
  return tokenizer, model
67
  except Exception as e:
68
  st.error(f"Direct model loading failed for Analyst: {str(e)}")
 
28
  raise
29
  else:
30
  try:
31
+ # Fallback: Load model directly with fp16 precision
32
  tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
33
  model = AutoModelForCausalLM.from_pretrained(
34
  "unsloth/DeepSeek-V3",
35
  trust_remote_code=True
36
  )
37
+ model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
38
  return tokenizer, model
39
  except Exception as e:
40
  st.error(f"Direct model loading failed for Engineer: {str(e)}")
 
56
  raise
57
  else:
58
  try:
59
+ # Fallback: Load model directly with fp16 precision
60
  tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
61
  model = AutoModelForCausalLM.from_pretrained(
62
  "unsloth/DeepSeek-V3",
63
  trust_remote_code=True
64
  )
65
+ model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
66
  return tokenizer, model
67
  except Exception as e:
68
  st.error(f"Direct model loading failed for Analyst: {str(e)}")