Spaces:

DroolingPanda
/

teachingAssistant

Build error

App Files Files Community

Michael Hu commited on Jul 27

Commit

111538d

1 Parent(s): 48ba7e8

Implement application DTOs

Browse files

Files changed (6) hide show

src/application/dtos/__init__.py +19 -0
src/application/dtos/audio_upload_dto.py +76 -0
src/application/dtos/dto_validation.py +213 -0
src/application/dtos/processing_request_dto.py +114 -0
src/application/dtos/processing_result_dto.py +150 -0
test_dtos.py +182 -0

src/application/dtos/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""Application Data Transfer Objects (DTOs)
+This module contains DTOs used for data transfer between layers.
+DTOs handle serialization/deserialization and validation of data
+crossing layer boundaries.
+"""
+from .audio_upload_dto import AudioUploadDto
+from .processing_request_dto import ProcessingRequestDto
+from .processing_result_dto import ProcessingResultDto
+from .dto_validation import validate_dto, ValidationError
+__all__ = [
+    'AudioUploadDto',
+    'ProcessingRequestDto',
+    'ProcessingResultDto',
+    'validate_dto',
+    'ValidationError'
+]

src/application/dtos/audio_upload_dto.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""Audio Upload Data Transfer Object"""
+from dataclasses import dataclass
+from typing import Optional
+import mimetypes
+import os
+@dataclass
+class AudioUploadDto:
+    """DTO for file upload data
+    Handles audio file upload information including filename,
+    content, and content type validation.
+    """
+    filename: str
+    content: bytes
+    content_type: str
+    size: Optional[int] = None
+    def __post_init__(self):
+        """Validate the DTO after initialization"""
+        self._validate()
+        if self.size is None:
+            self.size = len(self.content)
+    def _validate(self):
+        """Validate audio upload data"""
+        if not self.filename:
+            raise ValueError("Filename cannot be empty")
+        if not self.content:
+            raise ValueError("Audio content cannot be empty")
+        if not self.content_type:
+            raise ValueError("Content type cannot be empty")
+        # Validate file extension
+        _, ext = os.path.splitext(self.filename.lower())
+        supported_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
+        if ext not in supported_extensions:
+            raise ValueError(f"Unsupported file extension: {ext}. Supported: {supported_extensions}")
+        # Validate content type
+        expected_content_type = mimetypes.guess_type(self.filename)[0]
+        if expected_content_type and not self.content_type.startswith('audio/'):
+            raise ValueError(f"Invalid content type: {self.content_type}. Expected audio/* type")
+        # Validate file size (max 100MB)
+        max_size = 100 * 1024 * 1024  # 100MB
+        if len(self.content) > max_size:
+            raise ValueError(f"File too large: {len(self.content)} bytes. Maximum: {max_size} bytes")
+        # Validate minimum file size (at least 1KB)
+        min_size = 1024  # 1KB
+        if len(self.content) < min_size:
+            raise ValueError(f"File too small: {len(self.content)} bytes. Minimum: {min_size} bytes")
+    @property
+    def file_extension(self) -> str:
+        """Get the file extension"""
+        return os.path.splitext(self.filename.lower())[1]
+    @property
+    def base_filename(self) -> str:
+        """Get filename without extension"""
+        return os.path.splitext(self.filename)[0]
+    def to_dict(self) -> dict:
+        """Convert to dictionary representation"""
+        return {
+            'filename': self.filename,
+            'content_type': self.content_type,
+            'size': self.size,
+            'file_extension': self.file_extension
+        }

src/application/dtos/dto_validation.py ADDED Viewed

	@@ -0,0 +1,213 @@

+"""DTO Validation Utilities
+Provides validation functions and utilities for DTOs,
+including custom validation decorators and error handling.
+"""
+from typing import Any, Callable, TypeVar, Union
+from functools import wraps
+import logging
+logger = logging.getLogger(__name__)
+T = TypeVar('T')
+class ValidationError(Exception):
+    """Custom exception for DTO validation errors"""
+    def __init__(self, message: str, field: str = None, value: Any = None):
+        self.message = message
+        self.field = field
+        self.value = value
+        super().__init__(self.message)
+    def __str__(self):
+        if self.field:
+            return f"Validation error for field '{self.field}': {self.message}"
+        return f"Validation error: {self.message}"
+def validate_dto(dto_instance: Any) -> bool:
+    """Validate a DTO instance
+    Args:
+        dto_instance: The DTO instance to validate
+    Returns:
+        bool: True if validation passes
+    Raises:
+        ValidationError: If validation fails
+    """
+    try:
+        # Call the DTO's validation method if it exists
+        if hasattr(dto_instance, '_validate'):
+            dto_instance._validate()
+        # Additional validation can be added here
+        logger.debug(f"Successfully validated {type(dto_instance).__name__}")
+        return True
+    except ValueError as e:
+        logger.error(f"Validation failed for {type(dto_instance).__name__}: {e}")
+        raise ValidationError(str(e)) from e
+    except Exception as e:
+        logger.error(f"Unexpected error during validation of {type(dto_instance).__name__}: {e}")
+        raise ValidationError(f"Validation failed: {e}") from e
+def validation_required(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator to ensure DTO validation before method execution
+    Args:
+        func: The method to decorate
+    Returns:
+        Decorated function that validates 'self' before execution
+    """
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        try:
+            validate_dto(self)
+            return func(self, *args, **kwargs)
+        except ValidationError:
+            raise
+        except Exception as e:
+            raise ValidationError(f"Error in {func.__name__}: {e}") from e
+    return wrapper
+def validate_field(value: Any, field_name: str, validator: Callable[[Any], bool],
+                  error_message: str = None) -> Any:
+    """Validate a single field value
+    Args:
+        value: The value to validate
+        field_name: Name of the field being validated
+        validator: Function that returns True if value is valid
+        error_message: Custom error message
+    Returns:
+        The validated value
+    Raises:
+        ValidationError: If validation fails
+    """
+    try:
+        if not validator(value):
+            message = error_message or f"Invalid value for field '{field_name}'"
+            raise ValidationError(message, field_name, value)
+        return value
+    except ValidationError:
+        raise
+    except Exception as e:
+        raise ValidationError(f"Validation error for field '{field_name}': {e}", field_name, value) from e
+def validate_required(value: Any, field_name: str) -> Any:
+    """Validate that a field is not None or empty
+    Args:
+        value: The value to validate
+        field_name: Name of the field being validated
+    Returns:
+        The validated value
+    Raises:
+        ValidationError: If field is None or empty
+    """
+    if value is None:
+        raise ValidationError(f"Field '{field_name}' is required", field_name, value)
+    if isinstance(value, (str, list, dict)) and len(value) == 0:
+        raise ValidationError(f"Field '{field_name}' cannot be empty", field_name, value)
+    return value
+def validate_type(value: Any, field_name: str, expected_type: Union[type, tuple]) -> Any:
+    """Validate that a field is of the expected type
+    Args:
+        value: The value to validate
+        field_name: Name of the field being validated
+        expected_type: Expected type or tuple of types
+    Returns:
+        The validated value
+    Raises:
+        ValidationError: If type doesn't match
+    """
+    if not isinstance(value, expected_type):
+        if isinstance(expected_type, tuple):
+            type_names = [t.__name__ for t in expected_type]
+            expected_str = " or ".join(type_names)
+        else:
+            expected_str = expected_type.__name__
+        actual_type = type(value).__name__
+        raise ValidationError(
+            f"Field '{field_name}' must be of type {expected_str}, got {actual_type}",
+            field_name, value
+        )
+    return value
+def validate_range(value: Union[int, float], field_name: str,
+                  min_value: Union[int, float] = None,
+                  max_value: Union[int, float] = None) -> Union[int, float]:
+    """Validate that a numeric value is within a specified range
+    Args:
+        value: The numeric value to validate
+        field_name: Name of the field being validated
+        min_value: Minimum allowed value (inclusive)
+        max_value: Maximum allowed value (inclusive)
+    Returns:
+        The validated value
+    Raises:
+        ValidationError: If value is outside the range
+    """
+    if min_value is not None and value < min_value:
+        raise ValidationError(
+            f"Field '{field_name}' must be >= {min_value}, got {value}",
+            field_name, value
+        )
+    if max_value is not None and value > max_value:
+        raise ValidationError(
+            f"Field '{field_name}' must be <= {max_value}, got {value}",
+            field_name, value
+        )
+    return value
+def validate_choices(value: Any, field_name: str, choices: list) -> Any:
+    """Validate that a value is one of the allowed choices
+    Args:
+        value: The value to validate
+        field_name: Name of the field being validated
+        choices: List of allowed values
+    Returns:
+        The validated value
+    Raises:
+        ValidationError: If value is not in choices
+    """
+    if value not in choices:
+        raise ValidationError(
+            f"Field '{field_name}' must be one of {choices}, got '{value}'",
+            field_name, value
+        )
+    return value

src/application/dtos/processing_request_dto.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""Processing Request Data Transfer Object"""
+from dataclasses import dataclass
+from typing import Optional, Dict, Any
+from .audio_upload_dto import AudioUploadDto
+@dataclass
+class ProcessingRequestDto:
+    """DTO for pipeline input parameters
+    Contains all parameters needed to process audio through
+    the STT -> Translation -> TTS pipeline.
+    """
+    audio: AudioUploadDto
+    asr_model: str
+    target_language: str
+    voice: str
+    speed: float = 1.0
+    source_language: Optional[str] = None
+    additional_params: Optional[Dict[str, Any]] = None
+    def __post_init__(self):
+        """Validate the DTO after initialization"""
+        self._validate()
+        if self.additional_params is None:
+            self.additional_params = {}
+    def _validate(self):
+        """Validate processing request parameters"""
+        if not isinstance(self.audio, AudioUploadDto):
+            raise ValueError("Audio must be an AudioUploadDto instance")
+        if not self.asr_model:
+            raise ValueError("ASR model cannot be empty")
+        # Validate ASR model options
+        supported_asr_models = ['whisper-small', 'whisper-medium', 'whisper-large', 'parakeet']
+        if self.asr_model not in supported_asr_models:
+            raise ValueError(f"Unsupported ASR model: {self.asr_model}. Supported: {supported_asr_models}")
+        if not self.target_language:
+            raise ValueError("Target language cannot be empty")
+        # Validate language codes (ISO 639-1)
+        supported_languages = [
+            'en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh',
+            'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi'
+        ]
+        if self.target_language not in supported_languages:
+            raise ValueError(f"Unsupported target language: {self.target_language}. Supported: {supported_languages}")
+        if self.source_language and self.source_language not in supported_languages:
+            raise ValueError(f"Unsupported source language: {self.source_language}. Supported: {supported_languages}")
+        if not self.voice:
+            raise ValueError("Voice cannot be empty")
+        # Validate voice options
+        supported_voices = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
+        if self.voice not in supported_voices:
+            raise ValueError(f"Unsupported voice: {self.voice}. Supported: {supported_voices}")
+        # Validate speed range
+        if not 0.5 <= self.speed <= 2.0:
+            raise ValueError(f"Speed must be between 0.5 and 2.0, got: {self.speed}")
+        # Validate additional params if provided
+        if self.additional_params and not isinstance(self.additional_params, dict):
+            raise ValueError("Additional params must be a dictionary")
+    @property
+    def requires_translation(self) -> bool:
+        """Check if translation is required"""
+        if not self.source_language:
+            return True  # Assume translation needed if source not specified
+        return self.source_language != self.target_language
+    def to_dict(self) -> dict:
+        """Convert to dictionary representation"""
+        return {
+            'audio': self.audio.to_dict(),
+            'asr_model': self.asr_model,
+            'target_language': self.target_language,
+            'source_language': self.source_language,
+            'voice': self.voice,
+            'speed': self.speed,
+            'requires_translation': self.requires_translation,
+            'additional_params': self.additional_params or {}
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> 'ProcessingRequestDto':
+        """Create instance from dictionary"""
+        audio_data = data.get('audio', {})
+        if isinstance(audio_data, dict):
+            # Reconstruct AudioUploadDto if needed
+            audio = AudioUploadDto(
+                filename=audio_data['filename'],
+                content=audio_data.get('content', b''),
+                content_type=audio_data['content_type']
+            )
+        else:
+            audio = audio_data
+        return cls(
+            audio=audio,
+            asr_model=data['asr_model'],
+            target_language=data['target_language'],
+            voice=data['voice'],
+            speed=data.get('speed', 1.0),
+            source_language=data.get('source_language'),
+            additional_params=data.get('additional_params')
+        )

src/application/dtos/processing_result_dto.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""Processing Result Data Transfer Object"""
+from dataclasses import dataclass
+from typing import Optional, Dict, Any
+from datetime import datetime
+@dataclass
+class ProcessingResultDto:
+    """DTO for pipeline output data
+    Contains the results of processing audio through the
+    STT -> Translation -> TTS pipeline.
+    """
+    success: bool
+    original_text: Optional[str] = None
+    translated_text: Optional[str] = None
+    audio_path: Optional[str] = None
+    processing_time: float = 0.0
+    error_message: Optional[str] = None
+    error_code: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+    timestamp: Optional[datetime] = None
+    def __post_init__(self):
+        """Validate and set defaults after initialization"""
+        self._validate()
+        if self.metadata is None:
+            self.metadata = {}
+        if self.timestamp is None:
+            self.timestamp = datetime.utcnow()
+    def _validate(self):
+        """Validate processing result data"""
+        if not isinstance(self.success, bool):
+            raise ValueError("Success must be a boolean value")
+        if self.processing_time < 0:
+            raise ValueError("Processing time cannot be negative")
+        if self.success:
+            # For successful processing, we should have some output
+            if not self.original_text and not self.translated_text and not self.audio_path:
+                raise ValueError("Successful processing must have at least one output (text or audio)")
+        else:
+            # For failed processing, we should have an error message
+            if not self.error_message:
+                raise ValueError("Failed processing must include an error message")
+        # Validate error code format if provided
+        if self.error_code:
+            valid_error_codes = [
+                'STT_ERROR', 'TRANSLATION_ERROR', 'TTS_ERROR',
+                'AUDIO_FORMAT_ERROR', 'VALIDATION_ERROR', 'SYSTEM_ERROR'
+            ]
+            if self.error_code not in valid_error_codes:
+                raise ValueError(f"Invalid error code: {self.error_code}. Valid codes: {valid_error_codes}")
+        # Validate metadata if provided
+        if self.metadata and not isinstance(self.metadata, dict):
+            raise ValueError("Metadata must be a dictionary")
+    @property
+    def has_text_output(self) -> bool:
+        """Check if result has text output"""
+        return bool(self.original_text or self.translated_text)
+    @property
+    def has_audio_output(self) -> bool:
+        """Check if result has audio output"""
+        return bool(self.audio_path)
+    @property
+    def is_complete(self) -> bool:
+        """Check if processing is complete (success or failure with error)"""
+        return self.success or bool(self.error_message)
+    def add_metadata(self, key: str, value: Any) -> None:
+        """Add metadata entry"""
+        if self.metadata is None:
+            self.metadata = {}
+        self.metadata[key] = value
+    def get_metadata(self, key: str, default: Any = None) -> Any:
+        """Get metadata value"""
+        if self.metadata is None:
+            return default
+        return self.metadata.get(key, default)
+    def to_dict(self) -> dict:
+        """Convert to dictionary representation"""
+        return {
+            'success': self.success,
+            'original_text': self.original_text,
+            'translated_text': self.translated_text,
+            'audio_path': self.audio_path,
+            'processing_time': self.processing_time,
+            'error_message': self.error_message,
+            'error_code': self.error_code,
+            'metadata': self.metadata or {},
+            'timestamp': self.timestamp.isoformat() if self.timestamp else None,
+            'has_text_output': self.has_text_output,
+            'has_audio_output': self.has_audio_output,
+            'is_complete': self.is_complete
+        }
+    @classmethod
+    def success_result(cls, original_text: str = None, translated_text: str = None,
+                      audio_path: str = None, processing_time: float = 0.0,
+                      metadata: Dict[str, Any] = None) -> 'ProcessingResultDto':
+        """Create a successful processing result"""
+        return cls(
+            success=True,
+            original_text=original_text,
+            translated_text=translated_text,
+            audio_path=audio_path,
+            processing_time=processing_time,
+            metadata=metadata
+        )
+    @classmethod
+    def error_result(cls, error_message: str, error_code: str = None,
+                    processing_time: float = 0.0, metadata: Dict[str, Any] = None) -> 'ProcessingResultDto':
+        """Create a failed processing result"""
+        return cls(
+            success=False,
+            error_message=error_message,
+            error_code=error_code,
+            processing_time=processing_time,
+            metadata=metadata
+        )
+    @classmethod
+    def from_dict(cls, data: dict) -> 'ProcessingResultDto':
+        """Create instance from dictionary"""
+        timestamp = None
+        if data.get('timestamp'):
+            timestamp = datetime.fromisoformat(data['timestamp'].replace('Z', '+00:00'))
+        return cls(
+            success=data['success'],
+            original_text=data.get('original_text'),
+            translated_text=data.get('translated_text'),
+            audio_path=data.get('audio_path'),
+            processing_time=data.get('processing_time', 0.0),
+            error_message=data.get('error_message'),
+            error_code=data.get('error_code'),
+            metadata=data.get('metadata'),
+            timestamp=timestamp
+        )

test_dtos.py ADDED Viewed

	@@ -0,0 +1,182 @@

+#!/usr/bin/env python3
+"""Test script for DTOs"""
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
+from application.dtos import AudioUploadDto, ProcessingRequestDto, ProcessingResultDto, ValidationError
+def test_audio_upload_dto():
+    """Test AudioUploadDto"""
+    print("Testing AudioUploadDto...")
+    # Test valid DTO
+    try:
+        audio_dto = AudioUploadDto(
+            filename="test.wav",
+            content=b"fake audio content" * 100,  # Make it larger than 1KB
+            content_type="audio/wav"
+        )
+        print(f"✓ Valid AudioUploadDto created: {audio_dto.filename}")
+        print(f"  Size: {audio_dto.size} bytes")
+        print(f"  Extension: {audio_dto.file_extension}")
+        print(f"  Base filename: {audio_dto.base_filename}")
+    except Exception as e:
+        print(f"✗ Failed to create valid AudioUploadDto: {e}")
+    # Test invalid extension
+    try:
+        AudioUploadDto(
+            filename="test.txt",
+            content=b"fake content" * 100,
+            content_type="text/plain"
+        )
+        print("✗ Should have failed with invalid extension")
+    except ValueError as e:
+        print(f"✓ Correctly rejected invalid extension: {e}")
+    # Test empty content
+    try:
+        AudioUploadDto(
+            filename="test.wav",
+            content=b"",
+            content_type="audio/wav"
+        )
+        print("✗ Should have failed with empty content")
+    except ValueError as e:
+        print(f"✓ Correctly rejected empty content: {e}")
+def test_processing_request_dto():
+    """Test ProcessingRequestDto"""
+    print("\nTesting ProcessingRequestDto...")
+    # Create valid audio DTO first
+    audio_dto = AudioUploadDto(
+        filename="test.wav",
+        content=b"fake audio content" * 100,
+        content_type="audio/wav"
+    )
+    # Test valid DTO
+    try:
+        request_dto = ProcessingRequestDto(
+            audio=audio_dto,
+            asr_model="whisper-small",
+            target_language="es",
+            voice="kokoro",
+            speed=1.2,
+            source_language="en"
+        )
+        print(f"✓ Valid ProcessingRequestDto created")
+        print(f"  ASR Model: {request_dto.asr_model}")
+        print(f"  Target Language: {request_dto.target_language}")
+        print(f"  Requires Translation: {request_dto.requires_translation}")
+        print(f"  Dict representation keys: {list(request_dto.to_dict().keys())}")
+    except Exception as e:
+        print(f"✗ Failed to create valid ProcessingRequestDto: {e}")
+    # Test invalid speed
+    try:
+        ProcessingRequestDto(
+            audio=audio_dto,
+            asr_model="whisper-small",
+            target_language="es",
+            voice="kokoro",
+            speed=3.0  # Invalid speed
+        )
+        print("✗ Should have failed with invalid speed")
+    except ValueError as e:
+        print(f"✓ Correctly rejected invalid speed: {e}")
+    # Test invalid ASR model
+    try:
+        ProcessingRequestDto(
+            audio=audio_dto,
+            asr_model="invalid-model",
+            target_language="es",
+            voice="kokoro"
+        )
+        print("✗ Should have failed with invalid ASR model")
+    except ValueError as e:
+        print(f"✓ Correctly rejected invalid ASR model: {e}")
+def test_processing_result_dto():
+    """Test ProcessingResultDto"""
+    print("\nTesting ProcessingResultDto...")
+    # Test successful result
+    try:
+        success_result = ProcessingResultDto.success_result(
+            original_text="Hello world",
+            translated_text="Hola mundo",
+            audio_path="/tmp/output.wav",
+            processing_time=2.5
+        )
+        print(f"✓ Valid success result created")
+        print(f"  Success: {success_result.success}")
+        print(f"  Has text output: {success_result.has_text_output}")
+        print(f"  Has audio output: {success_result.has_audio_output}")
+        print(f"  Is complete: {success_result.is_complete}")
+    except Exception as e:
+        print(f"✗ Failed to create success result: {e}")
+    # Test error result
+    try:
+        error_result = ProcessingResultDto.error_result(
+            error_message="TTS generation failed",
+            error_code="TTS_ERROR",
+            processing_time=1.0
+        )
+        print(f"✓ Valid error result created")
+        print(f"  Success: {error_result.success}")
+        print(f"  Error message: {error_result.error_message}")
+        print(f"  Error code: {error_result.error_code}")
+    except Exception as e:
+        print(f"✗ Failed to create error result: {e}")
+    # Test invalid success result (no outputs)
+    try:
+        ProcessingResultDto(success=True)  # No outputs provided
+        print("✗ Should have failed with no outputs for success")
+    except ValueError as e:
+        print(f"✓ Correctly rejected success result with no outputs: {e}")
+    # Test invalid error result (no error message)
+    try:
+        ProcessingResultDto(success=False)  # No error message
+        print("✗ Should have failed with no error message for failure")
+    except ValueError as e:
+        print(f"✓ Correctly rejected error result with no message: {e}")
+def test_dto_serialization():
+    """Test DTO serialization/deserialization"""
+    print("\nTesting DTO serialization...")
+    # Test ProcessingResultDto serialization
+    try:
+        original_result = ProcessingResultDto.success_result(
+            original_text="Test text",
+            translated_text="Texto de prueba",
+            audio_path="/tmp/test.wav",
+            processing_time=1.5
+        )
+        # Convert to dict and back
+        result_dict = original_result.to_dict()
+        restored_result = ProcessingResultDto.from_dict(result_dict)
+        print(f"✓ ProcessingResultDto serialization successful")
+        print(f"  Original success: {original_result.success}")
+        print(f"  Restored success: {restored_result.success}")
+        print(f"  Original text matches: {original_result.original_text == restored_result.original_text}")
+    except Exception as e:
+        print(f"✗ ProcessingResultDto serialization failed: {e}")
+if __name__ == "__main__":
+    test_audio_upload_dto()
+    test_processing_request_dto()
+    test_processing_result_dto()
+    test_dto_serialization()
+    print("\nDTO testing completed!")