YongdongWang's picture
Upload folder using huggingface_hub
92ef79b verified
raw
history blame
15.1 kB
import gradio as gr
from loguru import logger
from gradio_llm_interface import GradioLlmInterface
from config import GRADIO_MESSAGE_MODES, MODE_CONFIG
import openai
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Speech-to-text function using OpenAI Whisper
def audio_to_text(audio):
if audio is None:
return "No audio file provided."
try:
# Get OpenAI API key from environment variable
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
return "Error: OpenAI API key not found. Please set OPENAI_API_KEY environment variable."
# Initialize OpenAI client
client = openai.OpenAI(api_key=openai_api_key)
# Open and transcribe the audio file
with open(audio, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcript.text
except FileNotFoundError:
return "Error: Audio file not found."
except openai.AuthenticationError:
return "Error: Invalid OpenAI API key."
except openai.RateLimitError:
return "Error: OpenAI API rate limit exceeded."
except Exception as e:
logger.error(f"Speech-to-text error: {str(e)}")
return f"Error during speech recognition: {str(e)}"
def main():
gradio_ros_interface = GradioLlmInterface()
title_markdown = ("""
# πŸŒ‹ DART-LLM: Dependency-Aware Multi-Robot Task Decomposition and Execution using Large Language Models
[[Project Page](https://wyd0817.github.io/project-dart-llm/)] [[Code](https://github.com/wyd0817/gradio_gpt_interface)] [[Model](https://artificialanalysis.ai/)] | πŸ“š [[RoboQA](https://www.overleaf.com/project/6614a987ae2994cae02efcb2)]
""")
with gr.Blocks(css="""
#text-input, #audio-input {
height: 100px; /* Unified height */
max-height: 100px;
width: 100%; /* Full container width */
margin: 0;
}
.input-container {
display: flex; /* Flex layout */
gap: 10px; /* Spacing */
align-items: center; /* Vertical alignment */
}
#voice-input-container {
display: flex;
align-items: center;
gap: 15px;
margin: 15px 0;
padding: 15px;
background: linear-gradient(135deg, #ffeef8 0%, #fff5f5 100%);
border-radius: 20px;
border: 1px solid #ffe4e6;
}
#voice-btn {
width: 50px !important;
height: 50px !important;
border-radius: 50% !important;
font-size: 20px !important;
background: linear-gradient(135deg, #ff6b9d 0%, #c44569 100%) !important;
color: white !important;
border: none !important;
box-shadow: 0 4px 15px rgba(255, 107, 157, 0.3) !important;
transition: all 0.3s ease !important;
}
#voice-btn:hover {
transform: scale(1.05) !important;
box-shadow: 0 6px 20px rgba(255, 107, 157, 0.4) !important;
}
#voice-btn:active {
transform: scale(0.95) !important;
}
.voice-recording {
background: linear-gradient(135deg, #ff4757 0%, #ff3742 100%) !important;
animation: pulse 1.5s infinite !important;
}
@keyframes pulse {
0% { box-shadow: 0 4px 15px rgba(255, 71, 87, 0.3); }
50% { box-shadow: 0 4px 25px rgba(255, 71, 87, 0.6); }
100% { box-shadow: 0 4px 15px rgba(255, 71, 87, 0.3); }
}
#voice-status {
color: #ff6b9d;
font-size: 14px;
font-weight: 500;
text-align: center;
margin-top: 10px;
}
/* Enhanced layout for left-right split */
.gradio-container .gradio-row {
gap: 20px; /* Add spacing between columns */
}
.gradio-column {
padding: 10px;
border-radius: 8px;
background-color: var(--panel-background-fill);
}
/* Chat interface styling */
.chat-column {
border: 1px solid var(--border-color-primary);
}
/* DAG visualization column styling */
.dag-column {
border: 1px solid var(--border-color-primary);
}
""") as demo:
gr.Markdown(title_markdown)
mode_choices = [MODE_CONFIG[mode]["display_name"] for mode in GRADIO_MESSAGE_MODES]
mode_selector = gr.Radio(choices=mode_choices, label="Backend model", value=mode_choices[0])
clear_button = gr.Button("Clear Chat")
logger.info("Starting Gradio GPT Interface...")
initial_mode = GRADIO_MESSAGE_MODES[0]
def update_mode(selected_mode, state):
mode_key = [key for key, value in MODE_CONFIG.items() if value["display_name"] == selected_mode][0]
return gradio_ros_interface.update_chatbot(mode_key, state)
# Main content area with left-right layout
with gr.Row():
# Left column: Chat interface
with gr.Column(scale=1, elem_classes=["chat-column"]):
gr.Markdown("### πŸ€– DART-LLM Chat Interface")
# Create chatbot component in the left column
chatbot_container = gr.Chatbot(label="DART-LLM", type="messages")
# Initialize the interface and get state data
state_data = gradio_ros_interface.initialize_interface(initial_mode)
state = gr.State(state_data)
# Add input area in the left column
with gr.Row(elem_id="input-container"):
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", elem_id="text-input", container=False)
with gr.Row(elem_id="voice-input-container"):
with gr.Column(scale=4):
# Hidden audio component
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
elem_id="audio-input",
show_label=False,
interactive=True,
streaming=False,
visible=False
)
# Voice input status display
voice_status = gr.Markdown("", elem_id="voice-status", visible=False)
with gr.Column(scale=1, min_width=80):
# Main voice button
voice_btn = gr.Button(
"πŸŽ™οΈ",
elem_id="voice-btn",
variant="secondary",
size="sm",
scale=1
)
# Example prompts in the left column
gr.Examples(
examples=[
"Dump truck 1 goes to the puddle for inspection, after which all robots avoid the puddle",
"Send Excavator 1 and Dump Truck 1 to the soil area; Excavator 1 will excavate and unload, followed by Dump Truck 1 proceeding to the puddle for unloading."
],
inputs=txt
)
# Right column: DAG visualization and controls
with gr.Column(scale=1, elem_classes=["dag-column"]):
gr.Markdown("### πŸ“Š Task Dependency Visualization")
# DAG visualization display
dag_image = gr.Image(label="Task Dependency Graph", visible=True, height=600)
# Task plan editing section
task_editor = gr.Code(
label="Task Plan JSON Editor",
language="json",
visible=False,
lines=15,
interactive=True
)
# Control buttons section
with gr.Row():
with gr.Column(scale=2):
deployment_status = gr.Markdown("", visible=True)
with gr.Column(scale=1):
with gr.Row():
edit_task_btn = gr.Button(
"πŸ“ Edit Task Plan",
variant="secondary",
visible=False,
size="sm"
)
update_dag_btn = gr.Button(
"πŸ”„ Update DAG Visualization",
variant="secondary",
visible=False,
size="sm"
)
validate_deploy_btn = gr.Button(
"πŸ”’ Validate & Deploy Task Plan",
variant="primary",
visible=False,
size="sm"
)
mode_selector.change(update_mode, inputs=[mode_selector, state], outputs=[chatbot_container, state])
clear_button.click(gradio_ros_interface.clear_chat, inputs=[state], outputs=[chatbot_container])
# Handle text input submission
async def handle_text_submit(text, state):
messages, state, dag_image_path, validate_btn_update = await gradio_ros_interface.predict(text, state)
# Show edit button when task plan is generated
edit_btn_visible = validate_btn_update.get('visible', False)
return (
"", # Clear the text input after submission
messages,
state,
dag_image_path,
validate_btn_update,
gr.update(visible=edit_btn_visible) # Show edit button
)
txt.submit(handle_text_submit, [txt, state], [txt, chatbot_container, state, dag_image, validate_deploy_btn, edit_task_btn])
# Voice input state management
voice_recording = gr.State(False)
# Voice button click handler
def handle_voice_input(audio, is_recording):
logger.info(f"Voice button clicked, current recording state: {is_recording}")
if not is_recording:
# Start recording state
logger.info("Starting recording...")
return (
gr.update(value="πŸ”΄", elem_classes=["voice-recording"]), # Change button style
"πŸ’¬ Recording in progress...", # Status message
gr.update(visible=True), # Show status
gr.update(visible=True), # Show audio component
True, # Update recording state
"" # Clear text box
)
else:
# Stop recording and transcribe
logger.info("Stopping recording, starting transcription...")
if audio is not None and audio != "":
try:
text = audio_to_text(audio)
logger.info(f"Transcription completed: {text}")
return (
gr.update(value="πŸŽ™οΈ", elem_classes=[]), # Restore button style
"✨ Transcription completed!", # Success message
gr.update(visible=True), # Show status
gr.update(visible=False), # Hide audio component
False, # Reset recording state
text # Fill in transcribed text
)
except Exception as e:
logger.error(f"Transcription error: {e}")
return (
gr.update(value="πŸŽ™οΈ", elem_classes=[]), # Restore button style
f"❌ Transcription failed: {str(e)}",
gr.update(visible=True),
gr.update(visible=False),
False,
""
)
else:
logger.warning("No audio detected")
return (
gr.update(value="πŸŽ™οΈ", elem_classes=[]), # Restore button style
"⚠️ No audio detected, please record again",
gr.update(visible=True),
gr.update(visible=False),
False,
""
)
# Voice button event handling
voice_btn.click(
handle_voice_input,
inputs=[audio_input, voice_recording],
outputs=[voice_btn, voice_status, voice_status, audio_input, voice_recording, txt]
)
# Audio state change listener - automatic prompt
def on_audio_change(audio):
if audio is not None:
logger.info("Audio file detected")
return "🎡 Audio detected, you can click the button to complete transcription"
return ""
audio_input.change(
on_audio_change,
inputs=[audio_input],
outputs=[voice_status]
)
# Handle task plan editing
edit_task_btn.click(
gradio_ros_interface.show_task_plan_editor,
inputs=[state],
outputs=[task_editor, update_dag_btn, validate_deploy_btn, deployment_status]
)
# Handle DAG update from editor
update_dag_btn.click(
gradio_ros_interface.update_dag_from_editor,
inputs=[task_editor, state],
outputs=[dag_image, validate_deploy_btn, task_editor, update_dag_btn, deployment_status, state]
)
# Handle validation and deployment
validate_deploy_btn.click(
gradio_ros_interface.validate_and_deploy_task_plan,
inputs=[state],
outputs=[deployment_status, dag_image, validate_deploy_btn, state]
)
demo.launch(server_port=8080, share=True)
if __name__ == "__main__":
main()