File size: 2,770 Bytes
0df8e2c
cb69e12
 
 
92f93f9
6684f10
0df8e2c
 
 
 
 
 
 
 
 
 
 
 
 
cb69e12
0df8e2c
cb69e12
 
 
 
92f93f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb69e12
 
6684f10
 
cb69e12
 
6684f10
cb69e12
 
 
 
 
 
0df8e2c
 
 
 
 
 
cb69e12
 
 
0df8e2c
 
 
 
 
 
 
 
 
 
 
cb69e12
0df8e2c
 
 
 
 
 
cb69e12
0df8e2c
 
 
 
 
 
 
 
 
 
 
cb69e12
 
 
 
0df8e2c
cb69e12
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import threading
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from importlib.metadata import PackageNotFoundError
import gradio as gr
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn

# =======================
# Load Secrets
# =======================
SYSTEM_PROMPT = os.environ.get(
    "prompt",
    "You are a placeholder Sovereign. No secrets found in environment."
)

# =======================
# Model Initialization
# =======================
MODEL_ID = "tiiuae/Falcon3-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Attempt 4-bit quantization; fallback if bitsandbytes is not installed
try:
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        load_in_4bit=True,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True
    )
except PackageNotFoundError:
    print("bitsandbytes not found; loading full model without quantization.")
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True
    )

# Create optimized text-generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    return_full_text=False,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    eos_token_id=tokenizer.eos_token_id
)

# =======================
# Core Chat Function
# =======================
def chat_fn(user_input: str) -> str:
    prompt = f"### System:\n{SYSTEM_PROMPT}\n\n### User:\n{user_input}\n\n### Assistant:"
    output = pipe(prompt)[0]["generated_text"].strip()
    return output

# =======================
# Gradio UI
# =======================
def gradio_chat(user_input: str) -> str:
    return chat_fn(user_input)

iface = gr.Interface(
    fn=gradio_chat,
    inputs=gr.Textbox(lines=5, placeholder="Enter your prompt…"),
    outputs="text",
    title="Prompt Cracking Challenge",
    description="Does he really think he is the king?"
)

# =======================
# FastAPI for API access
# =======================
app = FastAPI(title="Prompt Cracking Challenge API")

class Request(BaseModel):
    prompt: str

@app.post("/generate")
def generate(req: Request):
    return {"response": chat_fn(req.prompt)}

# =======================
# Launch Both Servers
# =======================
def run_api():
    port = int(os.environ.get("API_PORT", 8000))
    uvicorn.run(app, host="0.0.0.0", port=port)

if __name__ == "__main__":
    # Start FastAPI in background thread
    threading.Thread(target=run_api, daemon=True).start()
    # Launch Gradio interface
    iface.launch(server_name="0.0.0.0", server_port=7860)