Spaces:
Sleeping
Sleeping
| """ | |
| Calculator Exponentiation Fix - TDD Implementation | |
| Specific fix for exponentiation operations to achieve 100% accuracy. | |
| """ | |
| import pytest | |
| import sys | |
| import os | |
| import logging | |
| from pathlib import Path | |
| # Add the deployment-ready directory to the path | |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) | |
| from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
| logger = logging.getLogger(__name__) | |
| class TestCalculatorExponentiationFix: | |
| """Test suite to fix calculator exponentiation issues.""" | |
| def setup_method(self): | |
| """Set up test fixtures.""" | |
| self.agent = FixedGAIAAgent() | |
| def test_exponentiation_operations_failing(self): | |
| """Test that demonstrates the current exponentiation failure.""" | |
| test_cases = [ | |
| { | |
| 'question': 'What is 2 to the power of 8?', | |
| 'expected': '256', | |
| 'operation': 'exponentiation' | |
| }, | |
| { | |
| 'question': 'Calculate 2^8', | |
| 'expected': '256', | |
| 'operation': 'exponentiation' | |
| }, | |
| { | |
| 'question': 'What is 2**8?', | |
| 'expected': '256', | |
| 'operation': 'exponentiation' | |
| }, | |
| { | |
| 'question': 'Compute 3 to the power of 4', | |
| 'expected': '81', | |
| 'operation': 'exponentiation' | |
| } | |
| ] | |
| failed_operations = [] | |
| for case in test_cases: | |
| if not self.agent.available: | |
| pytest.skip("Agent not available for testing") | |
| try: | |
| result = self.agent(case['question']) | |
| # Extract numeric answer | |
| import re | |
| numbers = re.findall(r'\d+', result) | |
| extracted_answer = numbers[-1] if numbers else result.strip() | |
| expected = case['expected'] | |
| # Check if the result matches | |
| if extracted_answer != expected: | |
| failed_operations.append({ | |
| 'question': case['question'], | |
| 'expected': expected, | |
| 'actual': extracted_answer, | |
| 'full_response': result, | |
| 'operation': case['operation'] | |
| }) | |
| logger.error(f"β {case['operation']} failed: {case['question']}") | |
| logger.error(f" Expected: {expected}") | |
| logger.error(f" Got: {extracted_answer}") | |
| logger.error(f" Full response: {result}") | |
| else: | |
| logger.info(f"β {case['operation']} passed: {case['question']} β {extracted_answer}") | |
| except Exception as e: | |
| failed_operations.append({ | |
| 'question': case['question'], | |
| 'expected': case['expected'], | |
| 'actual': f"ERROR: {e}", | |
| 'full_response': str(e), | |
| 'operation': case['operation'] | |
| }) | |
| logger.error(f"β {case['operation']} error: {case['question']} β {e}") | |
| # Report current state | |
| accuracy = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100 | |
| logger.info(f"π Exponentiation accuracy: {accuracy:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})") | |
| # This test is expected to fail initially - it documents the problem | |
| if failed_operations: | |
| logger.error("β Exponentiation operations that need fixing:") | |
| for failure in failed_operations: | |
| logger.error(f" {failure['operation']}: {failure['question']}") | |
| logger.error(f" Expected: {failure['expected']}") | |
| logger.error(f" Got: {failure['actual']}") | |
| # For now, just report the issues (don't assert failure) | |
| # This allows us to see the current state | |
| logger.info(f"π§ Identified {len(failed_operations)} exponentiation issues to fix") | |
| def test_python_tool_exponentiation_direct(self): | |
| """Test exponentiation using Python tool directly.""" | |
| if not self.agent.available: | |
| pytest.skip("Agent not available for testing") | |
| # Test direct Python calculation | |
| python_questions = [ | |
| "Use Python to calculate 2**8", | |
| "Execute Python code: print(2**8)", | |
| "Run this Python: result = 2**8; print(result)", | |
| ] | |
| for question in python_questions: | |
| try: | |
| result = self.agent(question) | |
| logger.info(f"π Python test: {question}") | |
| logger.info(f" Result: {result}") | |
| # Check if 256 appears in the result | |
| if "256" in result: | |
| logger.info(f"β Python exponentiation working: {question}") | |
| else: | |
| logger.warning(f"β οΈ Python exponentiation unclear: {question} β {result}") | |
| except Exception as e: | |
| logger.error(f"β Python test error: {question} β {e}") | |
| if __name__ == "__main__": | |
| # Run the exponentiation fix tests | |
| pytest.main([__file__, "-v", "-s"]) |