6Genix commited on
Commit
9d15a33
·
1 Parent(s): 57a3eae

Added fallback mechanism to switch from pipeline to direct model loading for compatibility. Ensured robust handling for environments without pipeline support in the Transformers library.

Browse files
Files changed (1) hide show
  1. app.py +57 -23
app.py CHANGED
@@ -1,6 +1,12 @@
1
  import streamlit as st
2
 
3
- from transformers import pipeline
 
 
 
 
 
 
4
 
5
  ##############################################################################
6
  # LOAD MODELS
@@ -8,38 +14,64 @@ from transformers import pipeline
8
 
9
  @st.cache_resource
10
  def load_model_engineer():
11
- # Engineer: DeepSeek-V5 via pipeline
12
- engineer_pipeline = pipeline(
13
- "text-generation",
14
- model="unsloth/DeepSeek-V3",
15
- trust_remote_code=True
16
- )
17
- return engineer_pipeline
 
 
 
 
 
 
 
18
 
19
  @st.cache_resource
20
  def load_model_analyst():
21
- # Analyst: DeepSeek-V5 via pipeline
22
- analyst_pipeline = pipeline(
23
- "text-generation",
24
- model="unsloth/DeepSeek-V3",
25
- trust_remote_code=True
26
- )
27
- return analyst_pipeline
 
 
 
 
 
 
 
28
 
29
  # Load models
30
- engineer_pipeline = load_model_engineer()
31
- analyst_pipeline = load_model_analyst()
32
 
33
  ##############################################################################
34
  # ENGINEER / ANALYST GENERATION
35
  ##############################################################################
36
 
37
- def generate_response(prompt, pipeline_model, max_sentences=2):
38
  """
39
  Generate a concise response based on the provided prompt.
40
  """
41
- outputs = pipeline_model(prompt, max_new_tokens=50, temperature=0.6, top_p=0.8)
42
- response = outputs[0]["generated_text"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Limit to max_sentences by splitting and rejoining
44
  return " ".join(response.split(".")[:max_sentences]) + "."
45
 
@@ -83,7 +115,7 @@ if st.button("Generate Responses"):
83
  with st.spinner(f"Engineer is formulating response {turn + 1}..."):
84
  engineer_resp = generate_response(
85
  prompt=engineer_prompt_base,
86
- pipeline_model=engineer_pipeline
87
  )
88
  st.session_state.conversation.append(("Engineer", engineer_resp))
89
 
@@ -94,7 +126,7 @@ if st.button("Generate Responses"):
94
  with st.spinner(f"Analyst is formulating response {turn + 1}..."):
95
  analyst_resp = generate_response(
96
  prompt=f"Engineer suggested: {engineer_resp}. {analyst_prompt_base}",
97
- pipeline_model=analyst_pipeline
98
  )
99
  st.session_state.conversation.append(("Analyst", analyst_resp))
100
 
@@ -103,4 +135,6 @@ if st.button("Generate Responses"):
103
 
104
  # Summarize the final plan
105
  with st.spinner("Generating the final plan..."):
106
- final_plan = summarize_conversation
 
 
 
1
  import streamlit as st
2
 
3
+ try:
4
+ from transformers import pipeline
5
+ USE_PIPELINE = True
6
+ except ImportError:
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ import torch
9
+ USE_PIPELINE = False
10
 
11
  ##############################################################################
12
  # LOAD MODELS
 
14
 
15
  @st.cache_resource
16
  def load_model_engineer():
17
+ if USE_PIPELINE:
18
+ # Engineer: DeepSeek-V3 via pipeline
19
+ engineer_pipeline = pipeline(
20
+ "text-generation",
21
+ model="unsloth/DeepSeek-V3",
22
+ trust_remote_code=True
23
+ )
24
+ return engineer_pipeline
25
+ else:
26
+ # Fallback: Load model directly
27
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
28
+ model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
29
+ model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
30
+ return tokenizer, model
31
 
32
  @st.cache_resource
33
  def load_model_analyst():
34
+ if USE_PIPELINE:
35
+ # Analyst: DeepSeek-V3 via pipeline
36
+ analyst_pipeline = pipeline(
37
+ "text-generation",
38
+ model="unsloth/DeepSeek-V3",
39
+ trust_remote_code=True
40
+ )
41
+ return analyst_pipeline
42
+ else:
43
+ # Fallback: Load model directly
44
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
45
+ model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
46
+ model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
47
+ return tokenizer, model
48
 
49
  # Load models
50
+ engineer_model = load_model_engineer()
51
+ analyst_model = load_model_analyst()
52
 
53
  ##############################################################################
54
  # ENGINEER / ANALYST GENERATION
55
  ##############################################################################
56
 
57
+ def generate_response(prompt, model, max_sentences=2):
58
  """
59
  Generate a concise response based on the provided prompt.
60
  """
61
+ if USE_PIPELINE:
62
+ outputs = model(prompt, max_new_tokens=50, temperature=0.6, top_p=0.8)
63
+ response = outputs[0]["generated_text"].strip()
64
+ else:
65
+ tokenizer, model = model
66
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
67
+ outputs = model.generate(
68
+ inputs["input_ids"],
69
+ max_new_tokens=50,
70
+ temperature=0.6,
71
+ top_p=0.8,
72
+ pad_token_id=tokenizer.pad_token_id
73
+ )
74
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
75
  # Limit to max_sentences by splitting and rejoining
76
  return " ".join(response.split(".")[:max_sentences]) + "."
77
 
 
115
  with st.spinner(f"Engineer is formulating response {turn + 1}..."):
116
  engineer_resp = generate_response(
117
  prompt=engineer_prompt_base,
118
+ model=engineer_model
119
  )
120
  st.session_state.conversation.append(("Engineer", engineer_resp))
121
 
 
126
  with st.spinner(f"Analyst is formulating response {turn + 1}..."):
127
  analyst_resp = generate_response(
128
  prompt=f"Engineer suggested: {engineer_resp}. {analyst_prompt_base}",
129
+ model=analyst_model
130
  )
131
  st.session_state.conversation.append(("Analyst", analyst_resp))
132
 
 
135
 
136
  # Summarize the final plan
137
  with st.spinner("Generating the final plan..."):
138
+ final_plan = summarize_conversation(st.session_state.conversation)
139
+ st.session_state.conversation.append(("Summary", final_plan))
140
+ st.markdown(final_plan)