6Genix commited on
Commit
10965a9
·
1 Parent(s): eb857d5

Removed Hugging Face pipeline dependency and implemented direct model loading for DeepSeek-V3 using AutoTokenizer and AutoModelForCausalLM. Improved fallback robustness and error handling for model operations.

Browse files
Files changed (1) hide show
  1. app.py +36 -71
app.py CHANGED
@@ -1,12 +1,7 @@
1
  import streamlit as st
2
 
3
- try:
4
- from transformers import pipeline
5
- USE_PIPELINE = True
6
- except ImportError:
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
- import torch
9
- USE_PIPELINE = False
10
 
11
  ##############################################################################
12
  # LOAD MODELS
@@ -14,59 +9,33 @@ except ImportError:
14
 
15
  @st.cache_resource
16
  def load_model_engineer():
17
- if USE_PIPELINE:
18
- try:
19
- # Engineer: DeepSeek-V3 via pipeline
20
- engineer_pipeline = pipeline(
21
- "text-generation",
22
- model="unsloth/DeepSeek-V3",
23
- trust_remote_code=True
24
- )
25
- return engineer_pipeline
26
- except Exception as e:
27
- st.error(f"Pipeline failed to load for Engineer: {str(e)}")
28
- raise
29
- else:
30
- try:
31
- # Fallback: Load model directly with fp16 precision
32
- tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
33
- model = AutoModelForCausalLM.from_pretrained(
34
- "unsloth/DeepSeek-V3",
35
- trust_remote_code=True
36
- )
37
- model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
38
- return tokenizer, model
39
- except Exception as e:
40
- st.error(f"Direct model loading failed for Engineer: {str(e)}")
41
- raise
42
 
43
  @st.cache_resource
44
  def load_model_analyst():
45
- if USE_PIPELINE:
46
- try:
47
- # Analyst: DeepSeek-V3 via pipeline
48
- analyst_pipeline = pipeline(
49
- "text-generation",
50
- model="unsloth/DeepSeek-V3",
51
- trust_remote_code=True
52
- )
53
- return analyst_pipeline
54
- except Exception as e:
55
- st.error(f"Pipeline failed to load for Analyst: {str(e)}")
56
- raise
57
- else:
58
- try:
59
- # Fallback: Load model directly with fp16 precision
60
- tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
61
- model = AutoModelForCausalLM.from_pretrained(
62
- "unsloth/DeepSeek-V3",
63
- trust_remote_code=True
64
- )
65
- model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
66
- return tokenizer, model
67
- except Exception as e:
68
- st.error(f"Direct model loading failed for Analyst: {str(e)}")
69
- raise
70
 
71
  # Load models
72
  try:
@@ -84,20 +53,16 @@ def generate_response(prompt, model, max_sentences=2):
84
  Generate a concise response based on the provided prompt.
85
  """
86
  try:
87
- if USE_PIPELINE:
88
- outputs = model(prompt, max_new_tokens=50, temperature=0.6, top_p=0.8)
89
- response = outputs[0]["generated_text"].strip()
90
- else:
91
- tokenizer, model = model
92
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
93
- outputs = model.generate(
94
- inputs["input_ids"],
95
- max_new_tokens=50,
96
- temperature=0.6,
97
- top_p=0.8,
98
- pad_token_id=tokenizer.pad_token_id
99
- )
100
- response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
101
  # Limit to max_sentences by splitting and rejoining
102
  return " ".join(response.split(".")[:max_sentences]) + "."
103
  except Exception as gen_error:
 
1
  import streamlit as st
2
 
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
 
 
 
 
 
5
 
6
  ##############################################################################
7
  # LOAD MODELS
 
9
 
10
  @st.cache_resource
11
  def load_model_engineer():
12
+ try:
13
+ # Engineer: DeepSeek-V3 loaded directly
14
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ "unsloth/DeepSeek-V3",
17
+ trust_remote_code=True
18
+ )
19
+ model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
20
+ return tokenizer, model
21
+ except Exception as e:
22
+ st.error(f"Direct model loading failed for Engineer: {str(e)}")
23
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  @st.cache_resource
26
  def load_model_analyst():
27
+ try:
28
+ # Analyst: DeepSeek-V3 loaded directly
29
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-V3", trust_remote_code=True)
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ "unsloth/DeepSeek-V3",
32
+ trust_remote_code=True
33
+ )
34
+ model = model.half().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
35
+ return tokenizer, model
36
+ except Exception as e:
37
+ st.error(f"Direct model loading failed for Analyst: {str(e)}")
38
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # Load models
41
  try:
 
53
  Generate a concise response based on the provided prompt.
54
  """
55
  try:
56
+ tokenizer, model = model
57
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
58
+ outputs = model.generate(
59
+ inputs["input_ids"],
60
+ max_new_tokens=50,
61
+ temperature=0.6,
62
+ top_p=0.8,
63
+ pad_token_id=tokenizer.pad_token_id
64
+ )
65
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
 
 
 
 
66
  # Limit to max_sentences by splitting and rejoining
67
  return " ".join(response.split(".")[:max_sentences]) + "."
68
  except Exception as gen_error: