import subprocess import gradio as gr from openai import OpenAI import json subprocess.Popen("bash /home/user/app/start.sh", shell=True) client = OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="sk-local", timeout=600) def handle_function_call(function_name, arguments): """Handle function calls from the model""" if function_name == "browser_search": # Implement your browser search logic here query = arguments.get("query", "") max_results = arguments.get("max_results", 5) return f"Search results for '{query}' (max {max_results} results): [Implementation needed]" elif function_name == "code_interpreter": # Implement your code interpreter logic here code = arguments.get("code", "") if not code: return "No code provided to execute." return f"Code interpreter results for '{code}': [Implementation needed]" return f"Unknown function: {function_name}" def respond( message, history: list[tuple[str, str]] = [], system_message=None, max_tokens=None, temperature=0.7, ): messages = [] if system_message: messages = [{"role": "system", "content": system_message}] for user, assistant in history: if user: messages.append({"role": "user", "content": user}) if assistant: messages.append({"role": "assistant", "content": assistant}) messages.append({"role": "user", "content": message}) try: stream = client.chat.completions.create( model="Deepseek-R1-0528-Qwen3-8B", messages=messages, max_tokens=max_tokens, temperature=temperature, stream=True, tools=[ { "type": "function", "function": { "name": "browser_search", "description": ( "Search the web for a given query and return the most relevant results." ), "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "The search query string.", }, "max_results": { "type": "integer", "description": ( "Maximum number of search results to return. " "If omitted the service will use its default." ), "default": 5, }, }, "required": ["query"], }, }, }, { "type": "function", "function": { "name": "code_interpreter", "description": ( "Execute Python code and return the results. " "Can generate plots, perform calculations, and data analysis." ), "parameters": { "type": "object", "properties": { "code": { "type": "string", "description": "The Python code to execute.", }, }, "required": ["code"], }, }, }, ], ) print("messages", messages) output = "" reasoning = "" function_calls_to_handle = [] for chunk in stream: delta = chunk.choices[0].delta if hasattr(delta, "tool_calls") and delta.tool_calls: for tool_call in delta.tool_calls: if tool_call.function: function_calls_to_handle.append( { "name": tool_call.function.name, "arguments": json.loads(tool_call.function.arguments), } ) if hasattr(delta, "reasoning_content") and delta.reasoning_content: reasoning += delta.reasoning_content elif delta.content: output += delta.content yield f"*{reasoning}*\n{output}" if function_calls_to_handle: for func_call in function_calls_to_handle: func_result = handle_function_call( func_call["name"], func_call["arguments"] ) output += ( f"\n\n**Function Result ({func_call['name']}):**\n{func_result}" ) yield output except Exception as e: print(f"[Error] {e}") yield "⚠️ Llama.cpp server error" demo = gr.ChatInterface(respond) if __name__ == "__main__": demo.launch(show_api=False)