6Genix commited on
Commit
e79e7ca
·
1 Parent(s): 9d15a33

Enhanced error handling and fallback mechanism for DeepSeek-V3. Added detailed error messages, graceful termination, and support for unsupported quantization configurations.

Browse files
Files changed (1) hide show
  1. app.py +71 -42
app.py CHANGED
@@ -15,40 +15,65 @@ except ImportError:
15
  @st.cache_resource
16
  def load_model_engineer():
17
  if USE_PIPELINE:
18
- # Engineer: DeepSeek-V3 via pipeline
19
- engineer_pipeline = pipeline(
20
- "text-generation",
21
- model="unsloth/DeepSeek-V3",
22
- trust_remote_code=True
23
- )
24
- return engineer_pipeline
 
 
 
 
25
  else:
26
- # Fallback: Load model directly
27
- tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
28
- model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
29
- model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
30
- return tokenizer, model
 
 
 
 
 
 
 
31
 
32
  @st.cache_resource
33
  def load_model_analyst():
34
  if USE_PIPELINE:
35
- # Analyst: DeepSeek-V3 via pipeline
36
- analyst_pipeline = pipeline(
37
- "text-generation",
38
- model="unsloth/DeepSeek-V3",
39
- trust_remote_code=True
40
- )
41
- return analyst_pipeline
 
 
 
 
42
  else:
43
- # Fallback: Load model directly
44
- tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
45
- model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
46
- model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
47
- return tokenizer, model
 
 
 
 
 
 
 
48
 
49
  # Load models
50
- engineer_model = load_model_engineer()
51
- analyst_model = load_model_analyst()
 
 
 
52
 
53
  ##############################################################################
54
  # ENGINEER / ANALYST GENERATION
@@ -58,22 +83,26 @@ def generate_response(prompt, model, max_sentences=2):
58
  """
59
  Generate a concise response based on the provided prompt.
60
  """
61
- if USE_PIPELINE:
62
- outputs = model(prompt, max_new_tokens=50, temperature=0.6, top_p=0.8)
63
- response = outputs[0]["generated_text"].strip()
64
- else:
65
- tokenizer, model = model
66
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
67
- outputs = model.generate(
68
- inputs["input_ids"],
69
- max_new_tokens=50,
70
- temperature=0.6,
71
- top_p=0.8,
72
- pad_token_id=tokenizer.pad_token_id
73
- )
74
- response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
75
- # Limit to max_sentences by splitting and rejoining
76
- return " ".join(response.split(".")[:max_sentences]) + "."
 
 
 
 
77
 
78
  def summarize_conversation(conversation):
79
  """
 
15
  @st.cache_resource
16
  def load_model_engineer():
17
  if USE_PIPELINE:
18
+ try:
19
+ # Engineer: DeepSeek-V3 via pipeline
20
+ engineer_pipeline = pipeline(
21
+ "text-generation",
22
+ model="unsloth/DeepSeek-V3",
23
+ trust_remote_code=True
24
+ )
25
+ return engineer_pipeline
26
+ except Exception as e:
27
+ st.error(f"Pipeline failed to load for Engineer: {str(e)}")
28
+ raise
29
  else:
30
+ try:
31
+ # Fallback: Load model directly
32
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
33
+ model = AutoModelForCausalLM.from_pretrained(
34
+ "unsloth/DeepSeek-V3",
35
+ trust_remote_code=True
36
+ )
37
+ model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
38
+ return tokenizer, model
39
+ except Exception as e:
40
+ st.error(f"Direct model loading failed for Engineer: {str(e)}")
41
+ raise
42
 
43
  @st.cache_resource
44
  def load_model_analyst():
45
  if USE_PIPELINE:
46
+ try:
47
+ # Analyst: DeepSeek-V3 via pipeline
48
+ analyst_pipeline = pipeline(
49
+ "text-generation",
50
+ model="unsloth/DeepSeek-V3",
51
+ trust_remote_code=True
52
+ )
53
+ return analyst_pipeline
54
+ except Exception as e:
55
+ st.error(f"Pipeline failed to load for Analyst: {str(e)}")
56
+ raise
57
  else:
58
+ try:
59
+ # Fallback: Load model directly
60
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
61
+ model = AutoModelForCausalLM.from_pretrained(
62
+ "unsloth/DeepSeek-V3",
63
+ trust_remote_code=True
64
+ )
65
+ model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
66
+ return tokenizer, model
67
+ except Exception as e:
68
+ st.error(f"Direct model loading failed for Analyst: {str(e)}")
69
+ raise
70
 
71
  # Load models
72
+ try:
73
+ engineer_model = load_model_engineer()
74
+ analyst_model = load_model_analyst()
75
+ except Exception as load_error:
76
+ st.stop()
77
 
78
  ##############################################################################
79
  # ENGINEER / ANALYST GENERATION
 
83
  """
84
  Generate a concise response based on the provided prompt.
85
  """
86
+ try:
87
+ if USE_PIPELINE:
88
+ outputs = model(prompt, max_new_tokens=50, temperature=0.6, top_p=0.8)
89
+ response = outputs[0]["generated_text"].strip()
90
+ else:
91
+ tokenizer, model = model
92
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
93
+ outputs = model.generate(
94
+ inputs["input_ids"],
95
+ max_new_tokens=50,
96
+ temperature=0.6,
97
+ top_p=0.8,
98
+ pad_token_id=tokenizer.pad_token_id
99
+ )
100
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
101
+ # Limit to max_sentences by splitting and rejoining
102
+ return " ".join(response.split(".")[:max_sentences]) + "."
103
+ except Exception as gen_error:
104
+ st.error(f"Error during response generation: {str(gen_error)}")
105
+ return "[Error generating response]"
106
 
107
  def summarize_conversation(conversation):
108
  """