#!/bin/bash # 设置CUDA环境变量 export CUDA_LAUNCH_BLOCKING=1 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # Color variables RESET='\033[0m' RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' MAGENTA='\033[0;35m' CYAN='\033[0;36m' WHITE='\033[0;37m' # 创建日志目录和文件名 LOG_DIR="./logs" mkdir -p "$LOG_DIR" # 记录开始时间 START_TIME=$(date +%s) START_TIME_READABLE=$(date '+%Y-%m-%d %H:%M:%S') LOG_TIME=$(date +%Y%m%d_%H%M%S) LOG_FILE="$LOG_DIR/evaluation_$LOG_TIME.log" SERVER_FILE="$LOG_DIR/server_$LOG_TIME.log" # A function to ensure the server is killed, which we'll call on exit. cleanup() { echo "--- Cleanup ---" # Check if the server process is still running if kill -0 $SERVER_PID 2>/dev/null; then echo "Client has finished. Sending SIGTERM to shut down the server (PID: $SERVER_PID)..." # Send the SIGTERM signal, allowing the server to shut down gracefully if it handles the signal. kill $SERVER_PID # Wait a moment for it to terminate wait $SERVER_PID 2>/dev/null echo "Server has been shut down." else echo "Server (PID: $SERVER_PID) was already stopped." fi } # Use 'trap' to register the 'cleanup' function to be called when the script exits. # This works for normal exit, Ctrl+C (SIGINT), or termination (SIGTERM). trap cleanup EXIT # 1. Start the server in the background echo "Starting alignment models' remote_server.py in the background..." /opt/tritonserver/bin/tritonserver --model-repository=./model_repo 2>&1 > $SERVER_FILE & # 2. Capture the Process ID (PID) of the server SERVER_PID=$! echo "Server started with PID: $SERVER_PID" # Give the server a moment to initialize and start listening on its port. # This is crucial, otherwise the client might try to connect before the server is ready. echo "Waited 3 seconds for server to initialize." echo "------------------------------------------" sleep 3 echo "${GREEN}=========================================" echo "音色相似度评估开始" echo "开始时间: $START_TIME_READABLE" echo "日志文件: $LOG_FILE" echo "=========================================" echo "可以使用以下命令实时查看日志:" echo "tail -f $LOG_FILE${RESET}" echo "" # 将开始时间信息也写入日志文件 { echo "${GREEN}=========================================" echo "音色相似度评估开始" echo "开始时间: $START_TIME_READABLE" echo "进程配置: 每GPU 8个进程" echo "语言设置: zh (中文)" echo "=========================================${RESET}" echo "" } | tee "$LOG_FILE" # 3. Run the client in the foreground echo "Starting similarity test client test.py in the foreground..." # The script will pause here and wait for client.py to complete. # We wrap this in a block to capture the exit code. { # 使用更保守的进程数 python -u ./test_online.py \ --jsonl_path /data-mnt/data/yqzhang/testset_ttsd/test_set_zh_304/output_new.jsonl \ --output_dir ./eval_res/new_test_online \ --processes_per_gpu 8 \ --language zh \ 2>&1 | tee -a "$LOG_FILE" CLIENT_EXIT_CODE=$? } echo "------------------------------------------" echo "${YELLOW}Client.py has finished with exit code: $CLIENT_EXIT_CODE${RESET}" # 记录结束时间 END_TIME=$(date +%s) END_TIME_READABLE=$(date '+%Y-%m-%d %H:%M:%S') # 计算耗时 DURATION=$((END_TIME - START_TIME)) HOURS=$((DURATION / 3600)) MINUTES=$(((DURATION % 3600) / 60)) SECONDS=$((DURATION % 60)) # 输出结束信息 { echo "${GREEN}" echo "=========================================" echo "音色相似度评估完成!" echo "结束时间: $END_TIME_READABLE" echo "总耗时: ${HOURS}小时${MINUTES}分钟${SECONDS}秒 (共${DURATION}秒)" echo "日志文件: $LOG_FILE" echo "=========================================" echo "${RESET}" } | tee -a "$LOG_FILE" # 显示在终端 echo "${GREEN}" echo "评估完成!" echo "开始时间: $START_TIME_READABLE" echo "结束时间: $END_TIME_READABLE" echo "总耗时: ${HOURS}小时${MINUTES}分钟${SECONDS}秒" echo "日志已保存到: $LOG_FILE" echo "${RESET}" # 如果耗时超过1小时,发送额外提醒 if [ $DURATION -gt 3600 ]; then echo "${RED}" echo "⏰ 注意:本次评估耗时较长,超过1小时" echo " 建议检查性能优化效果" echo "${RESET}" fi # The 'trap' will automatically call the 'cleanup' function now that the script is exiting. # The exit is triggered because the client process (the last foreground command) has finished. # You can add logic based on the client's exit code if needed. if [ $CLIENT_EXIT_CODE -ne 0 ]; then echo "Warning: Client exited with an error." exit 1 # Exit the main script with an error code as well fi echo "Script finished successfully." exit 0