Spaces:

fexeak
/

auido-generation-3B-v2

Running

File size: 10,532 Bytes

57cfcfd
 
 
dccded3
 
 
 
57cfcfd
4c90601
0fd89a7
 
dccded3
e992960
 
57cfcfd
4c90601
dccded3
 
 
 
 
 
 
 
 
 
 
57cfcfd
 
 
 
0fd89a7
 
 
 
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fd89a7
57cfcfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dccded3
 
 
 
 
 
 
 
 
 
57cfcfd
 
 
 
 
 
 
 
dccded3
 
57cfcfd
 
dccded3
 
 
 
 
 
 
 
57cfcfd
 
 
 
 
 
 
 
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57cfcfd
dccded3
 
 
 
57cfcfd
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57cfcfd
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f203179
 
 
 
 
 
dccded3

# audio_api.py
import base64
import io
import logging
import platform
import time
from datetime import datetime
from typing import Optional

import torch
import torchaudio
from fastapi import FastAPI, HTTPException, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel, Field

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('audio_generation.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

from boson_multimodal.data_types import ChatMLSample, Message, AudioContent
from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse

# -------------------- 模型加载 --------------------
MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-base"
AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"
device = "cuda" if torch.cuda.is_available() else "cpu"

logger.info(f"开始加载模型，设备: {device}")
logger.info(f"模型路径: {MODEL_PATH}")
logger.info(f"音频分词器路径: {AUDIO_TOKENIZER_PATH}")

try:
    model_load_start = time.time()
    serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)
    model_load_time = time.time() - model_load_start
    logger.info(f"模型加载成功，耗时: {model_load_time:.2f}秒")
    
    # 检查GPU内存使用情况
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
        gpu_allocated = torch.cuda.memory_allocated(0) / 1024**3
        logger.info(f"GPU总内存: {gpu_memory:.2f}GB, 已分配: {gpu_allocated:.2f}GB")
except Exception as e:
    logger.error(f"模型加载失败: {str(e)}")
    raise

# -------------------- FastAPI --------------------
app = FastAPI(title="Higgs Audio Generation API", version="0.1.0")

class AudioRequest(BaseModel):
    user_prompt: str = Field(..., description="需要生成音频的文本")
    max_new_tokens: Optional[int] = Field(1024, ge=1, le=2048)
    temperature: Optional[float] = Field(0.3, ge=0.0, le=2.0)
    top_p: Optional[float] = Field(0.95, ge=0.0, le=1.0)
    top_k: Optional[int] = Field(50, ge=1, le=100)

class AudioResponse(BaseModel):
    audio_base64: str
    sample_rate: int

@app.post("/generate-audio", response_model=AudioResponse)
def generate_audio(req: AudioRequest, request: Request):
    request_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{id(request)}"
    start_time = time.time()
    
    logger.info(f"[{request_id}] 收到音频生成请求")
    logger.info(f"[{request_id}] 客户端IP: {request.client.host if request.client else 'unknown'}")
    logger.info(f"[{request_id}] 请求参数: user_prompt='{req.user_prompt[:100]}{'...' if len(req.user_prompt) > 100 else ''}', "
                f"max_new_tokens={req.max_new_tokens}, temperature={req.temperature}, "
                f"top_p={req.top_p}, top_k={req.top_k}")
    
    system_prompt = (
        "Generate audio following instruction.\n\n<|scene_desc_start|>\n"
        "Audio is recorded from a quiet room.\n<|scene_desc_end|>"
    )
    messages = [
        Message(role="system", content=system_prompt),
        Message(role="user", content=req.user_prompt),
    ]
    
    logger.debug(f"[{request_id}] 构建的消息: {[{'role': m.role, 'content': m.content[:50] + '...' if len(m.content) > 50 else m.content} for m in messages]}")

    try:
        # 记录GPU内存使用情况（生成前）
        if torch.cuda.is_available():
            gpu_memory_before = torch.cuda.memory_allocated(0) / 1024**3
            logger.debug(f"[{request_id}] 生成前GPU内存使用: {gpu_memory_before:.2f}GB")
        
        generation_start = time.time()
        logger.info(f"[{request_id}] 开始音频生成...")
        
        output: HiggsAudioResponse = serve_engine.generate(
            chat_ml_sample=ChatMLSample(messages=messages),
            max_new_tokens=req.max_new_tokens,
            temperature=req.temperature,
            top_p=req.top_p,
            top_k=req.top_k,
            stop_strings=["<|end_of_text|>", "<|eot_id|>"],
        )
        
        generation_time = time.time() - generation_start
        logger.info(f"[{request_id}] 音频生成完成，耗时: {generation_time:.2f}秒")
        
        # 记录生成的音频信息
        audio_duration = len(output.audio) / output.sampling_rate
        logger.info(f"[{request_id}] 生成音频信息: 采样率={output.sampling_rate}Hz, "
                    f"时长={audio_duration:.2f}秒, 样本数={len(output.audio)}")
        
        # 记录GPU内存使用情况（生成后）
        if torch.cuda.is_available():
            gpu_memory_after = torch.cuda.memory_allocated(0) / 1024**3
            logger.debug(f"[{request_id}] 生成后GPU内存使用: {gpu_memory_after:.2f}GB")
            
    except Exception as e:
        error_time = time.time() - start_time
        logger.error(f"[{request_id}] 音频生成失败，耗时: {error_time:.2f}秒，错误: {str(e)}")
        logger.exception(f"[{request_id}] 详细错误信息:")
        raise HTTPException(status_code=500, detail=f"音频生成失败: {str(e)}")

    try:
        # 音频编码处理
        encoding_start = time.time()
        logger.debug(f"[{request_id}] 开始音频编码...")
        
        # 把 numpy 数组转 torch.Tensor 并编码成 WAV 字节流
        waveform = torch.from_numpy(output.audio)[None, :]  # shape=(1, T)
        buf = io.BytesIO()
        torchaudio.save(buf, waveform, output.sampling_rate, format="wav")
        audio_bytes = buf.getvalue()
        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
        
        encoding_time = time.time() - encoding_start
        total_time = time.time() - start_time
        
        logger.info(f"[{request_id}] 音频编码完成，耗时: {encoding_time:.2f}秒")
        logger.info(f"[{request_id}] 请求处理完成，总耗时: {total_time:.2f}秒，"
                    f"编码后大小: {len(audio_b64)} 字符")
        
        return AudioResponse(audio_base64=audio_b64, sample_rate=output.sampling_rate)
        
    except Exception as e:
        error_time = time.time() - start_time
        logger.error(f"[{request_id}] 音频编码失败，耗时: {error_time:.2f}秒，错误: {str(e)}")
        logger.exception(f"[{request_id}] 详细错误信息:")
        raise HTTPException(status_code=500, detail=f"音频编码失败: {str(e)}")

# 健康检查端点
@app.get("/health")
def health_check():
    """健康检查端点，返回服务状态信息"""
    try:
        # 检查GPU状态
        gpu_info = {}
        if torch.cuda.is_available():
            gpu_info = {
                "gpu_available": True,
                "gpu_count": torch.cuda.device_count(),
                "current_device": torch.cuda.current_device(),
                "device_name": torch.cuda.get_device_name(0),
                "memory_allocated_gb": round(torch.cuda.memory_allocated(0) / 1024**3, 2),
                "memory_reserved_gb": round(torch.cuda.memory_reserved(0) / 1024**3, 2),
                "memory_total_gb": round(torch.cuda.get_device_properties(0).total_memory / 1024**3, 2)
            }
        else:
            gpu_info = {"gpu_available": False}
        
        return {
            "status": "healthy",
            "timestamp": datetime.now().isoformat(),
            "device": device,
            "model_path": MODEL_PATH,
            "tokenizer_path": AUDIO_TOKENIZER_PATH,
            "gpu_info": gpu_info
        }
    except Exception as e:
        logger.error(f"健康检查失败: {str(e)}")
        raise HTTPException(status_code=500, detail=f"健康检查失败: {str(e)}")

# 系统信息端点
@app.get("/system-info")
def system_info():
    """返回详细的系统信息"""
    import psutil
    import platform
    
    try:
        # CPU信息
        cpu_info = {
            "cpu_count": psutil.cpu_count(),
            "cpu_percent": psutil.cpu_percent(interval=1),
            "cpu_freq": psutil.cpu_freq()._asdict() if psutil.cpu_freq() else None
        }
        
        # 内存信息
        memory = psutil.virtual_memory()
        memory_info = {
            "total_gb": round(memory.total / 1024**3, 2),
            "available_gb": round(memory.available / 1024**3, 2),
            "used_gb": round(memory.used / 1024**3, 2),
            "percent": memory.percent
        }
        
        # 系统信息
        system_info = {
            "platform": platform.platform(),
            "python_version": platform.python_version(),
            "torch_version": torch.__version__,
            "cuda_version": torch.version.cuda if torch.cuda.is_available() else None
        }
        
        return {
            "timestamp": datetime.now().isoformat(),
            "cpu": cpu_info,
            "memory": memory_info,
            "system": system_info
        }
    except Exception as e:
        logger.error(f"获取系统信息失败: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取系统信息失败: {str(e)}")

# 新增：把 / 指向静态首页
app.mount("/static", StaticFiles(directory="static"), name="static")

@app.get("/", include_in_schema=False)
async def index():
    return FileResponse("static/index.html")

# 启动时记录系统信息
@app.on_event("startup")
async def startup_event():
    """应用启动时的事件处理"""
    logger.info("=" * 50)
    logger.info("音频生成API服务启动")
    logger.info(f"启动时间: {datetime.now().isoformat()}")
    logger.info(f"Python版本: {platform.python_version()}")
    logger.info(f"PyTorch版本: {torch.__version__}")
    logger.info(f"设备: {device}")
    
    if torch.cuda.is_available():
        logger.info(f"CUDA版本: {torch.version.cuda}")
        logger.info(f"GPU设备数量: {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    
    logger.info("=" * 50)

@app.on_event("shutdown")
async def shutdown_event():
    """应用关闭时的事件处理"""
    logger.info("音频生成API服务正在关闭...")
    logger.info(f"关闭时间: {datetime.now().isoformat()}")
    
    # 清理GPU内存
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        logger.info("GPU内存已清理")