Spaces:

YongdongWang
/

DART-LLM_Task_Decomposer

No application file

App Files Files Community

DART-LLM_Task_Decomposer / main.py

YongdongWang

Upload folder using huggingface_hub

92ef79b verified about 2 months ago

raw

history blame

15.1 kB

	import gradio as gr
	from loguru import logger
	from gradio_llm_interface import GradioLlmInterface
	from config import GRADIO_MESSAGE_MODES, MODE_CONFIG
	import openai
	import os
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()

	# Speech-to-text function using OpenAI Whisper
	def audio_to_text(audio):
	if audio is None:
	return "No audio file provided."

	try:
	# Get OpenAI API key from environment variable
	openai_api_key = os.getenv("OPENAI_API_KEY")
	if not openai_api_key:
	return "Error: OpenAI API key not found. Please set OPENAI_API_KEY environment variable."

	# Initialize OpenAI client
	client = openai.OpenAI(api_key=openai_api_key)

	# Open and transcribe the audio file
	with open(audio, "rb") as audio_file:
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file
	)

	return transcript.text

	except FileNotFoundError:
	return "Error: Audio file not found."
	except openai.AuthenticationError:
	return "Error: Invalid OpenAI API key."
	except openai.RateLimitError:
	return "Error: OpenAI API rate limit exceeded."
	except Exception as e:
	logger.error(f"Speech-to-text error: {str(e)}")
	return f"Error during speech recognition: {str(e)}"

	def main():
	gradio_ros_interface = GradioLlmInterface()

	title_markdown = ("""
	# 🌋 DART-LLM: Dependency-Aware Multi-Robot Task Decomposition and Execution using Large Language Models
	[[Project Page](https://wyd0817.github.io/project-dart-llm/)] [[Code](https://github.com/wyd0817/gradio_gpt_interface)] [[Model](https://artificialanalysis.ai/)] \| 📚 [[RoboQA](https://www.overleaf.com/project/6614a987ae2994cae02efcb2)]
	""")

	with gr.Blocks(css="""
	#text-input, #audio-input {
	height: 100px; /* Unified height */
	max-height: 100px;
	width: 100%; /* Full container width */
	margin: 0;
	}
	.input-container {
	display: flex; /* Flex layout */
	gap: 10px; /* Spacing */
	align-items: center; /* Vertical alignment */
	}
	#voice-input-container {
	display: flex;
	align-items: center;
	gap: 15px;
	margin: 15px 0;
	padding: 15px;
	background: linear-gradient(135deg, #ffeef8 0%, #fff5f5 100%);
	border-radius: 20px;
	border: 1px solid #ffe4e6;
	}
	#voice-btn {
	width: 50px !important;
	height: 50px !important;
	border-radius: 50% !important;
	font-size: 20px !important;
	background: linear-gradient(135deg, #ff6b9d 0%, #c44569 100%) !important;
	color: white !important;
	border: none !important;
	box-shadow: 0 4px 15px rgba(255, 107, 157, 0.3) !important;
	transition: all 0.3s ease !important;
	}
	#voice-btn:hover {
	transform: scale(1.05) !important;
	box-shadow: 0 6px 20px rgba(255, 107, 157, 0.4) !important;
	}
	#voice-btn:active {
	transform: scale(0.95) !important;
	}
	.voice-recording {
	background: linear-gradient(135deg, #ff4757 0%, #ff3742 100%) !important;
	animation: pulse 1.5s infinite !important;
	}
	@keyframes pulse {
	0% { box-shadow: 0 4px 15px rgba(255, 71, 87, 0.3); }
	50% { box-shadow: 0 4px 25px rgba(255, 71, 87, 0.6); }
	100% { box-shadow: 0 4px 15px rgba(255, 71, 87, 0.3); }
	}
	#voice-status {
	color: #ff6b9d;
	font-size: 14px;
	font-weight: 500;
	text-align: center;
	margin-top: 10px;
	}
	/* Enhanced layout for left-right split */
	.gradio-container .gradio-row {
	gap: 20px; /* Add spacing between columns */
	}
	.gradio-column {
	padding: 10px;
	border-radius: 8px;
	background-color: var(--panel-background-fill);
	}
	/* Chat interface styling */
	.chat-column {
	border: 1px solid var(--border-color-primary);
	}
	/* DAG visualization column styling */
	.dag-column {
	border: 1px solid var(--border-color-primary);
	}
	""") as demo:
	gr.Markdown(title_markdown)

	mode_choices = [MODE_CONFIG[mode]["display_name"] for mode in GRADIO_MESSAGE_MODES]
	mode_selector = gr.Radio(choices=mode_choices, label="Backend model", value=mode_choices[0])
	clear_button = gr.Button("Clear Chat")

	logger.info("Starting Gradio GPT Interface...")

	initial_mode = GRADIO_MESSAGE_MODES[0]

	def update_mode(selected_mode, state):
	mode_key = [key for key, value in MODE_CONFIG.items() if value["display_name"] == selected_mode][0]
	return gradio_ros_interface.update_chatbot(mode_key, state)

	# Main content area with left-right layout
	with gr.Row():
	# Left column: Chat interface
	with gr.Column(scale=1, elem_classes=["chat-column"]):
	gr.Markdown("### 🤖 DART-LLM Chat Interface")
	# Create chatbot component in the left column
	chatbot_container = gr.Chatbot(label="DART-LLM", type="messages")

	# Initialize the interface and get state data
	state_data = gradio_ros_interface.initialize_interface(initial_mode)
	state = gr.State(state_data)

	# Add input area in the left column
	with gr.Row(elem_id="input-container"):
	txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", elem_id="text-input", container=False)

	with gr.Row(elem_id="voice-input-container"):
	with gr.Column(scale=4):
	# Hidden audio component
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	elem_id="audio-input",
	show_label=False,
	interactive=True,
	streaming=False,
	visible=False
	)
	# Voice input status display
	voice_status = gr.Markdown("", elem_id="voice-status", visible=False)

	with gr.Column(scale=1, min_width=80):
	# Main voice button
	voice_btn = gr.Button(
	"🎙️",
	elem_id="voice-btn",
	variant="secondary",
	size="sm",
	scale=1
	)

	# Example prompts in the left column
	gr.Examples(
	examples=[
	"Dump truck 1 goes to the puddle for inspection, after which all robots avoid the puddle",
	"Send Excavator 1 and Dump Truck 1 to the soil area; Excavator 1 will excavate and unload, followed by Dump Truck 1 proceeding to the puddle for unloading."
	],
	inputs=txt
	)

	# Right column: DAG visualization and controls
	with gr.Column(scale=1, elem_classes=["dag-column"]):
	gr.Markdown("### 📊 Task Dependency Visualization")
	# DAG visualization display
	dag_image = gr.Image(label="Task Dependency Graph", visible=True, height=600)

	# Task plan editing section
	task_editor = gr.Code(
	label="Task Plan JSON Editor",
	language="json",
	visible=False,
	lines=15,
	interactive=True
	)

	# Control buttons section
	with gr.Row():
	with gr.Column(scale=2):
	deployment_status = gr.Markdown("", visible=True)
	with gr.Column(scale=1):
	with gr.Row():
	edit_task_btn = gr.Button(
	"📝 Edit Task Plan",
	variant="secondary",
	visible=False,
	size="sm"
	)
	update_dag_btn = gr.Button(
	"🔄 Update DAG Visualization",
	variant="secondary",
	visible=False,
	size="sm"
	)
	validate_deploy_btn = gr.Button(
	"🔒 Validate & Deploy Task Plan",
	variant="primary",
	visible=False,
	size="sm"
	)

	mode_selector.change(update_mode, inputs=[mode_selector, state], outputs=[chatbot_container, state])
	clear_button.click(gradio_ros_interface.clear_chat, inputs=[state], outputs=[chatbot_container])

	# Handle text input submission
	async def handle_text_submit(text, state):
	messages, state, dag_image_path, validate_btn_update = await gradio_ros_interface.predict(text, state)
	# Show edit button when task plan is generated
	edit_btn_visible = validate_btn_update.get('visible', False)
	return (
	"", # Clear the text input after submission
	messages,
	state,
	dag_image_path,
	validate_btn_update,
	gr.update(visible=edit_btn_visible) # Show edit button
	)

	txt.submit(handle_text_submit, [txt, state], [txt, chatbot_container, state, dag_image, validate_deploy_btn, edit_task_btn])

	# Voice input state management
	voice_recording = gr.State(False)

	# Voice button click handler
	def handle_voice_input(audio, is_recording):
	logger.info(f"Voice button clicked, current recording state: {is_recording}")

	if not is_recording:
	# Start recording state
	logger.info("Starting recording...")
	return (
	gr.update(value="🔴", elem_classes=["voice-recording"]), # Change button style
	"💬 Recording in progress...", # Status message
	gr.update(visible=True), # Show status
	gr.update(visible=True), # Show audio component
	True, # Update recording state
	"" # Clear text box
	)
	else:
	# Stop recording and transcribe
	logger.info("Stopping recording, starting transcription...")
	if audio is not None and audio != "":
	try:
	text = audio_to_text(audio)
	logger.info(f"Transcription completed: {text}")
	return (
	gr.update(value="🎙️", elem_classes=[]), # Restore button style
	"✨ Transcription completed!", # Success message
	gr.update(visible=True), # Show status
	gr.update(visible=False), # Hide audio component
	False, # Reset recording state
	text # Fill in transcribed text
	)
	except Exception as e:
	logger.error(f"Transcription error: {e}")
	return (
	gr.update(value="🎙️", elem_classes=[]), # Restore button style
	f"❌ Transcription failed: {str(e)}",
	gr.update(visible=True),
	gr.update(visible=False),
	False,
	""
	)
	else:
	logger.warning("No audio detected")
	return (
	gr.update(value="🎙️", elem_classes=[]), # Restore button style
	"⚠️ No audio detected, please record again",
	gr.update(visible=True),
	gr.update(visible=False),
	False,
	""
	)

	# Voice button event handling
	voice_btn.click(
	handle_voice_input,
	inputs=[audio_input, voice_recording],
	outputs=[voice_btn, voice_status, voice_status, audio_input, voice_recording, txt]
	)

	# Audio state change listener - automatic prompt
	def on_audio_change(audio):
	if audio is not None:
	logger.info("Audio file detected")
	return "🎵 Audio detected, you can click the button to complete transcription"
	return ""

	audio_input.change(
	on_audio_change,
	inputs=[audio_input],
	outputs=[voice_status]
	)

	# Handle task plan editing
	edit_task_btn.click(
	gradio_ros_interface.show_task_plan_editor,
	inputs=[state],
	outputs=[task_editor, update_dag_btn, validate_deploy_btn, deployment_status]
	)

	# Handle DAG update from editor
	update_dag_btn.click(
	gradio_ros_interface.update_dag_from_editor,
	inputs=[task_editor, state],
	outputs=[dag_image, validate_deploy_btn, task_editor, update_dag_btn, deployment_status, state]
	)

	# Handle validation and deployment
	validate_deploy_btn.click(
	gradio_ros_interface.validate_and_deploy_task_plan,
	inputs=[state],
	outputs=[deployment_status, dag_image, validate_deploy_btn, state]
	)

	demo.launch(server_port=8080, share=True)

	if __name__ == "__main__":
	main()