Spaces:

YongdongWang
/

DART-LLM_Task_Decomposer

No application file

App Files Files Community

DART-LLM_Task_Decomposer / test_persistent_editing.py

YongdongWang

Upload folder using huggingface_hub

92ef79b verified about 1 month ago

raw

history blame contribute delete

6.14 kB

	#!/usr/bin/env python3
	"""
	Test script for persistent editing functionality
	Tests multiple edit cycles to ensure task plans persist correctly
	"""

	import sys
	import os
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))

	from gradio_llm_interface import GradioLlmInterface
	import json

	def test_edit_cycle():
	"""Test complete edit cycle: generate → edit → update → deploy → edit again"""
	print("Testing Complete Edit Cycle...")
	print("=" * 50)

	interface = GradioLlmInterface()

	# Step 1: Initial task plan (simulating LLM generation)
	initial_plan = {
	"tasks": [
	{
	"task": "move_soil_1",
	"instruction_function": {
	"name": "move_soil",
	"robot_ids": ["robot_excavator_01"],
	"dependencies": [],
	"object_keywords": ["soil_pile"]
	}
	}
	]
	}

	state = {'pending_task_plan': initial_plan}
	print("✓ Step 1: Initial task plan created")

	# Step 2: Open editor with initial plan
	editor_result = interface.show_task_plan_editor(state)
	if not editor_result or len(editor_result) != 4:
	print("✗ Step 2: Failed to open editor")
	return False

	editor_update, dag_btn, validate_btn, status = editor_result
	if "move_soil" in editor_update.get('value', ''):
	print("✓ Step 2: Editor opened with correct initial plan")
	else:
	print("✗ Step 2: Editor does not contain initial plan")
	return False

	# Step 3: Edit the plan
	edited_json = """{
	"tasks": [
	{
	"task": "move_soil_1_edited",
	"instruction_function": {
	"name": "move_soil_edited",
	"robot_ids": ["robot_excavator_01", "robot_dump_truck_01"],
	"dependencies": [],
	"object_keywords": ["soil_pile", "edited_keyword"]
	}
	},
	{
	"task": "transport_soil_1",
	"instruction_function": {
	"name": "transport_soil",
	"robot_ids": ["robot_dump_truck_01"],
	"dependencies": ["move_soil_1_edited"],
	"object_keywords": ["destination"]
	}
	}
	]
	}"""

	update_result = interface.update_dag_from_editor(edited_json, state)
	if not update_result or len(update_result) != 6:
	print("✗ Step 3: Failed to update DAG from editor")
	return False

	print("✓ Step 3: DAG updated with edited plan")

	# Step 4: Deploy the plan
	deploy_result = interface.validate_and_deploy_task_plan(state)
	if not deploy_result:
	print("✗ Step 4: Failed to deploy plan")
	return False

	print("✓ Step 4: Plan deployed successfully")

	# Step 5: Try to edit again (this should show the deployed plan)
	second_editor_result = interface.show_task_plan_editor(state)
	if not second_editor_result or len(second_editor_result) != 4:
	print("✗ Step 5: Failed to open editor second time")
	return False

	second_editor_update, _, _, second_status = second_editor_result
	if "move_soil_1_edited" in second_editor_update.get('value', ''):
	print("✓ Step 5: Editor opened with deployed plan (persistent editing working)")
	return True
	else:
	print("✗ Step 5: Editor lost the deployed plan content")
	print(f" Editor content: {second_editor_update.get('value', 'No content')[:100]}...")
	return False

	def test_empty_state_handling():
	"""Test editor behavior with completely empty state"""
	print("\nTesting Empty State Handling...")
	print("=" * 40)

	interface = GradioLlmInterface()
	empty_state = {}

	result = interface.show_task_plan_editor(empty_state)
	if result and len(result) == 4:
	editor_update, _, _, status = result
	if "example_task_1" in editor_update.get('value', ''):
	print("✓ Empty state shows example template")
	return True
	else:
	print("✗ Empty state does not show proper template")
	return False
	else:
	print("✗ Failed to handle empty state")
	return False

	def test_malformed_state_handling():
	"""Test editor behavior with malformed state data"""
	print("\nTesting Malformed State Handling...")
	print("=" * 40)

	interface = GradioLlmInterface()

	# Test with empty tasks array
	malformed_state = {
	'pending_task_plan': {
	'tasks': []
	}
	}

	result = interface.show_task_plan_editor(malformed_state)
	if result and len(result) == 4:
	editor_update, _, _, status = result
	if "example_task_1" in editor_update.get('value', ''):
	print("✓ Malformed state (empty tasks) handled correctly")
	return True
	else:
	print("✗ Malformed state not handled properly")
	return False
	else:
	print("✗ Failed to handle malformed state")
	return False

	def main():
	"""Run all persistent editing tests"""
	print("🔄 Persistent Editing Tests")
	print("=" * 50)

	tests = [
	test_edit_cycle,
	test_empty_state_handling,
	test_malformed_state_handling
	]

	passed = 0
	total = len(tests)

	for test in tests:
	try:
	if test():
	passed += 1
	except Exception as e:
	print(f"✗ Test failed with exception: {e}")

	print("\n" + "=" * 50)
	print(f"Persistent Editing Tests passed: {passed}/{total}")

	if passed == total:
	print("🎉 All persistent editing tests passed!")
	print("\n🔄 Persistent Editing Features:")
	print(" ✓ Task plans persist through edit cycles")
	print(" ✓ Deployed plans can be re-edited")
	print(" ✓ State management handles edge cases")
	print(" ✓ Proper fallback to templates when needed")
	return True
	else:
	print("❌ Some persistent editing tests failed!")
	return False

	if __name__ == "__main__":
	success = main()
	sys.exit(0 if success else 1)