Spaces:
Sleeping
Sleeping
Added fallback mechanism to switch from pipeline to direct model loading for compatibility. Ensured robust handling for environments without pipeline support in the Transformers library.
Browse files
app.py
CHANGED
@@ -1,6 +1,12 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
##############################################################################
|
6 |
# LOAD MODELS
|
@@ -8,38 +14,64 @@ from transformers import pipeline
|
|
8 |
|
9 |
@st.cache_resource
|
10 |
def load_model_engineer():
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
@st.cache_resource
|
20 |
def load_model_analyst():
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# Load models
|
30 |
-
|
31 |
-
|
32 |
|
33 |
##############################################################################
|
34 |
# ENGINEER / ANALYST GENERATION
|
35 |
##############################################################################
|
36 |
|
37 |
-
def generate_response(prompt,
|
38 |
"""
|
39 |
Generate a concise response based on the provided prompt.
|
40 |
"""
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# Limit to max_sentences by splitting and rejoining
|
44 |
return " ".join(response.split(".")[:max_sentences]) + "."
|
45 |
|
@@ -83,7 +115,7 @@ if st.button("Generate Responses"):
|
|
83 |
with st.spinner(f"Engineer is formulating response {turn + 1}..."):
|
84 |
engineer_resp = generate_response(
|
85 |
prompt=engineer_prompt_base,
|
86 |
-
|
87 |
)
|
88 |
st.session_state.conversation.append(("Engineer", engineer_resp))
|
89 |
|
@@ -94,7 +126,7 @@ if st.button("Generate Responses"):
|
|
94 |
with st.spinner(f"Analyst is formulating response {turn + 1}..."):
|
95 |
analyst_resp = generate_response(
|
96 |
prompt=f"Engineer suggested: {engineer_resp}. {analyst_prompt_base}",
|
97 |
-
|
98 |
)
|
99 |
st.session_state.conversation.append(("Analyst", analyst_resp))
|
100 |
|
@@ -103,4 +135,6 @@ if st.button("Generate Responses"):
|
|
103 |
|
104 |
# Summarize the final plan
|
105 |
with st.spinner("Generating the final plan..."):
|
106 |
-
final_plan = summarize_conversation
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
try:
|
4 |
+
from transformers import pipeline
|
5 |
+
USE_PIPELINE = True
|
6 |
+
except ImportError:
|
7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
8 |
+
import torch
|
9 |
+
USE_PIPELINE = False
|
10 |
|
11 |
##############################################################################
|
12 |
# LOAD MODELS
|
|
|
14 |
|
15 |
@st.cache_resource
|
16 |
def load_model_engineer():
|
17 |
+
if USE_PIPELINE:
|
18 |
+
# Engineer: DeepSeek-V3 via pipeline
|
19 |
+
engineer_pipeline = pipeline(
|
20 |
+
"text-generation",
|
21 |
+
model="unsloth/DeepSeek-V3",
|
22 |
+
trust_remote_code=True
|
23 |
+
)
|
24 |
+
return engineer_pipeline
|
25 |
+
else:
|
26 |
+
# Fallback: Load model directly
|
27 |
+
tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
28 |
+
model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
29 |
+
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
30 |
+
return tokenizer, model
|
31 |
|
32 |
@st.cache_resource
|
33 |
def load_model_analyst():
|
34 |
+
if USE_PIPELINE:
|
35 |
+
# Analyst: DeepSeek-V3 via pipeline
|
36 |
+
analyst_pipeline = pipeline(
|
37 |
+
"text-generation",
|
38 |
+
model="unsloth/DeepSeek-V3",
|
39 |
+
trust_remote_code=True
|
40 |
+
)
|
41 |
+
return analyst_pipeline
|
42 |
+
else:
|
43 |
+
# Fallback: Load model directly
|
44 |
+
tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
45 |
+
model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
|
46 |
+
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
47 |
+
return tokenizer, model
|
48 |
|
49 |
# Load models
|
50 |
+
engineer_model = load_model_engineer()
|
51 |
+
analyst_model = load_model_analyst()
|
52 |
|
53 |
##############################################################################
|
54 |
# ENGINEER / ANALYST GENERATION
|
55 |
##############################################################################
|
56 |
|
57 |
+
def generate_response(prompt, model, max_sentences=2):
|
58 |
"""
|
59 |
Generate a concise response based on the provided prompt.
|
60 |
"""
|
61 |
+
if USE_PIPELINE:
|
62 |
+
outputs = model(prompt, max_new_tokens=50, temperature=0.6, top_p=0.8)
|
63 |
+
response = outputs[0]["generated_text"].strip()
|
64 |
+
else:
|
65 |
+
tokenizer, model = model
|
66 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
67 |
+
outputs = model.generate(
|
68 |
+
inputs["input_ids"],
|
69 |
+
max_new_tokens=50,
|
70 |
+
temperature=0.6,
|
71 |
+
top_p=0.8,
|
72 |
+
pad_token_id=tokenizer.pad_token_id
|
73 |
+
)
|
74 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
75 |
# Limit to max_sentences by splitting and rejoining
|
76 |
return " ".join(response.split(".")[:max_sentences]) + "."
|
77 |
|
|
|
115 |
with st.spinner(f"Engineer is formulating response {turn + 1}..."):
|
116 |
engineer_resp = generate_response(
|
117 |
prompt=engineer_prompt_base,
|
118 |
+
model=engineer_model
|
119 |
)
|
120 |
st.session_state.conversation.append(("Engineer", engineer_resp))
|
121 |
|
|
|
126 |
with st.spinner(f"Analyst is formulating response {turn + 1}..."):
|
127 |
analyst_resp = generate_response(
|
128 |
prompt=f"Engineer suggested: {engineer_resp}. {analyst_prompt_base}",
|
129 |
+
model=analyst_model
|
130 |
)
|
131 |
st.session_state.conversation.append(("Analyst", analyst_resp))
|
132 |
|
|
|
135 |
|
136 |
# Summarize the final plan
|
137 |
with st.spinner("Generating the final plan..."):
|
138 |
+
final_plan = summarize_conversation(st.session_state.conversation)
|
139 |
+
st.session_state.conversation.append(("Summary", final_plan))
|
140 |
+
st.markdown(final_plan)
|