DART-LLM_Task_Decomposer / test_persistent_editing.py
YongdongWang's picture
Upload folder using huggingface_hub
92ef79b verified
#!/usr/bin/env python3
"""
Test script for persistent editing functionality
Tests multiple edit cycles to ensure task plans persist correctly
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from gradio_llm_interface import GradioLlmInterface
import json
def test_edit_cycle():
"""Test complete edit cycle: generate β†’ edit β†’ update β†’ deploy β†’ edit again"""
print("Testing Complete Edit Cycle...")
print("=" * 50)
interface = GradioLlmInterface()
# Step 1: Initial task plan (simulating LLM generation)
initial_plan = {
"tasks": [
{
"task": "move_soil_1",
"instruction_function": {
"name": "move_soil",
"robot_ids": ["robot_excavator_01"],
"dependencies": [],
"object_keywords": ["soil_pile"]
}
}
]
}
state = {'pending_task_plan': initial_plan}
print("βœ“ Step 1: Initial task plan created")
# Step 2: Open editor with initial plan
editor_result = interface.show_task_plan_editor(state)
if not editor_result or len(editor_result) != 4:
print("βœ— Step 2: Failed to open editor")
return False
editor_update, dag_btn, validate_btn, status = editor_result
if "move_soil" in editor_update.get('value', ''):
print("βœ“ Step 2: Editor opened with correct initial plan")
else:
print("βœ— Step 2: Editor does not contain initial plan")
return False
# Step 3: Edit the plan
edited_json = """{
"tasks": [
{
"task": "move_soil_1_edited",
"instruction_function": {
"name": "move_soil_edited",
"robot_ids": ["robot_excavator_01", "robot_dump_truck_01"],
"dependencies": [],
"object_keywords": ["soil_pile", "edited_keyword"]
}
},
{
"task": "transport_soil_1",
"instruction_function": {
"name": "transport_soil",
"robot_ids": ["robot_dump_truck_01"],
"dependencies": ["move_soil_1_edited"],
"object_keywords": ["destination"]
}
}
]
}"""
update_result = interface.update_dag_from_editor(edited_json, state)
if not update_result or len(update_result) != 6:
print("βœ— Step 3: Failed to update DAG from editor")
return False
print("βœ“ Step 3: DAG updated with edited plan")
# Step 4: Deploy the plan
deploy_result = interface.validate_and_deploy_task_plan(state)
if not deploy_result:
print("βœ— Step 4: Failed to deploy plan")
return False
print("βœ“ Step 4: Plan deployed successfully")
# Step 5: Try to edit again (this should show the deployed plan)
second_editor_result = interface.show_task_plan_editor(state)
if not second_editor_result or len(second_editor_result) != 4:
print("βœ— Step 5: Failed to open editor second time")
return False
second_editor_update, _, _, second_status = second_editor_result
if "move_soil_1_edited" in second_editor_update.get('value', ''):
print("βœ“ Step 5: Editor opened with deployed plan (persistent editing working)")
return True
else:
print("βœ— Step 5: Editor lost the deployed plan content")
print(f" Editor content: {second_editor_update.get('value', 'No content')[:100]}...")
return False
def test_empty_state_handling():
"""Test editor behavior with completely empty state"""
print("\nTesting Empty State Handling...")
print("=" * 40)
interface = GradioLlmInterface()
empty_state = {}
result = interface.show_task_plan_editor(empty_state)
if result and len(result) == 4:
editor_update, _, _, status = result
if "example_task_1" in editor_update.get('value', ''):
print("βœ“ Empty state shows example template")
return True
else:
print("βœ— Empty state does not show proper template")
return False
else:
print("βœ— Failed to handle empty state")
return False
def test_malformed_state_handling():
"""Test editor behavior with malformed state data"""
print("\nTesting Malformed State Handling...")
print("=" * 40)
interface = GradioLlmInterface()
# Test with empty tasks array
malformed_state = {
'pending_task_plan': {
'tasks': []
}
}
result = interface.show_task_plan_editor(malformed_state)
if result and len(result) == 4:
editor_update, _, _, status = result
if "example_task_1" in editor_update.get('value', ''):
print("βœ“ Malformed state (empty tasks) handled correctly")
return True
else:
print("βœ— Malformed state not handled properly")
return False
else:
print("βœ— Failed to handle malformed state")
return False
def main():
"""Run all persistent editing tests"""
print("πŸ”„ Persistent Editing Tests")
print("=" * 50)
tests = [
test_edit_cycle,
test_empty_state_handling,
test_malformed_state_handling
]
passed = 0
total = len(tests)
for test in tests:
try:
if test():
passed += 1
except Exception as e:
print(f"βœ— Test failed with exception: {e}")
print("\n" + "=" * 50)
print(f"Persistent Editing Tests passed: {passed}/{total}")
if passed == total:
print("πŸŽ‰ All persistent editing tests passed!")
print("\nπŸ”„ Persistent Editing Features:")
print(" βœ“ Task plans persist through edit cycles")
print(" βœ“ Deployed plans can be re-edited")
print(" βœ“ State management handles edge cases")
print(" βœ“ Proper fallback to templates when needed")
return True
else:
print("❌ Some persistent editing tests failed!")
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)