Spaces:

fexeak
/

auido-generation-3B-v2

Running

App Files Files Community

fexeak commited on 4 days ago

Commit

dccded3

1 Parent(s): e992960

add static

Browse files

Files changed (2) hide show

app.py +202 -12
requirements.txt +4 -0

app.py CHANGED Viewed

@@ -1,15 +1,30 @@
 # audio_api.py
 import base64
 import io
 from typing import Optional
 import torch
 import torchaudio
-from fastapi import FastAPI, HTTPException
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field
 from boson_multimodal.data_types import ChatMLSample, Message, AudioContent
 from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse
@@ -18,7 +33,24 @@ MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-base"
 AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)
 # -------------------- FastAPI --------------------
 app = FastAPI(title="Higgs Audio Generation API", version="0.1.0")
@@ -35,7 +67,16 @@ class AudioResponse(BaseModel):
     sample_rate: int
 @app.post("/generate-audio", response_model=AudioResponse)
-def generate_audio(req: AudioRequest):
     system_prompt = (
         "Generate audio following instruction.\n\n<|scene_desc_start|>\n"
         "Audio is recorded from a quiet room.\n<|scene_desc_end|>"
@@ -44,8 +85,18 @@ def generate_audio(req: AudioRequest):
         Message(role="system", content=system_prompt),
         Message(role="user", content=req.user_prompt),
     ]
     try:
         output: HiggsAudioResponse = serve_engine.generate(
             chat_ml_sample=ChatMLSample(messages=messages),
             max_new_tokens=req.max_new_tokens,
@@ -54,21 +105,160 @@ def generate_audio(req: AudioRequest):
             top_k=req.top_k,
             stop_strings=["<|end_of_text|>", "<|eot_id|>"],
         )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-    # 把 numpy 数组转 torch.Tensor 并编码成 WAV 字节流
-    waveform = torch.from_numpy(output.audio)[None, :]  # shape=(1, T)
-    buf = io.BytesIO()
-    torchaudio.save(buf, waveform, output.sampling_rate, format="wav")
-    audio_bytes = buf.getvalue()
-    audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
-    return AudioResponse(audio_base64=audio_b64, sample_rate=output.sampling_rate)
 # 新增：把 / 指向静态首页
 app.mount("/static", StaticFiles(directory="static"), name="static")
 @app.get("/", include_in_schema=False)
 async def index():
-    return FileResponse("static/index.html")

 # audio_api.py
 import base64
 import io
+import logging
+import platform
+import time
+from datetime import datetime
 from typing import Optional
 import torch
 import torchaudio
+from fastapi import FastAPI, HTTPException, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('audio_generation.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
 from boson_multimodal.data_types import ChatMLSample, Message, AudioContent
 from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse
 AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+logger.info(f"开始加载模型，设备: {device}")
+logger.info(f"模型路径: {MODEL_PATH}")
+logger.info(f"音频分词器路径: {AUDIO_TOKENIZER_PATH}")
+try:
+    model_load_start = time.time()
+    serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)
+    model_load_time = time.time() - model_load_start
+    logger.info(f"模型加载成功，耗时: {model_load_time:.2f}秒")
+    # 检查GPU内存使用情况
+    if torch.cuda.is_available():
+        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
+        gpu_allocated = torch.cuda.memory_allocated(0) / 1024**3
+        logger.info(f"GPU总内存: {gpu_memory:.2f}GB, 已分配: {gpu_allocated:.2f}GB")
+except Exception as e:
+    logger.error(f"模型加载失败: {str(e)}")
+    raise
 # -------------------- FastAPI --------------------
 app = FastAPI(title="Higgs Audio Generation API", version="0.1.0")
     sample_rate: int
 @app.post("/generate-audio", response_model=AudioResponse)
+def generate_audio(req: AudioRequest, request: Request):
+    request_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{id(request)}"
+    start_time = time.time()
+    logger.info(f"[{request_id}] 收到音频生成请求")
+    logger.info(f"[{request_id}] 客户端IP: {request.client.host if request.client else 'unknown'}")
+    logger.info(f"[{request_id}] 请求参数: user_prompt='{req.user_prompt[:100]}{'...' if len(req.user_prompt) > 100 else ''}', "
+                f"max_new_tokens={req.max_new_tokens}, temperature={req.temperature}, "
+                f"top_p={req.top_p}, top_k={req.top_k}")
     system_prompt = (
         "Generate audio following instruction.\n\n<|scene_desc_start|>\n"
         "Audio is recorded from a quiet room.\n<|scene_desc_end|>"
         Message(role="system", content=system_prompt),
         Message(role="user", content=req.user_prompt),
     ]
+    logger.debug(f"[{request_id}] 构建的消息: {[{'role': m.role, 'content': m.content[:50] + '...' if len(m.content) > 50 else m.content} for m in messages]}")
     try:
+        # 记录GPU内存使用情况（生成前）
+        if torch.cuda.is_available():
+            gpu_memory_before = torch.cuda.memory_allocated(0) / 1024**3
+            logger.debug(f"[{request_id}] 生成前GPU内存使用: {gpu_memory_before:.2f}GB")
+        generation_start = time.time()
+        logger.info(f"[{request_id}] 开始音频生成...")
         output: HiggsAudioResponse = serve_engine.generate(
             chat_ml_sample=ChatMLSample(messages=messages),
             max_new_tokens=req.max_new_tokens,
             top_k=req.top_k,
             stop_strings=["<|end_of_text|>", "<|eot_id|>"],
         )
+        generation_time = time.time() - generation_start
+        logger.info(f"[{request_id}] 音频生成完成，耗时: {generation_time:.2f}秒")
+        # 记录生成的音频信息
+        audio_duration = len(output.audio) / output.sampling_rate
+        logger.info(f"[{request_id}] 生成音频信息: 采样率={output.sampling_rate}Hz, "
+                    f"时长={audio_duration:.2f}秒, 样本数={len(output.audio)}")
+        # 记录GPU内存使用情况（生成后）
+        if torch.cuda.is_available():
+            gpu_memory_after = torch.cuda.memory_allocated(0) / 1024**3
+            logger.debug(f"[{request_id}] 生成后GPU内存使用: {gpu_memory_after:.2f}GB")
+    except Exception as e:
+        error_time = time.time() - start_time
+        logger.error(f"[{request_id}] 音频生成失败，耗时: {error_time:.2f}秒，错误: {str(e)}")
+        logger.exception(f"[{request_id}] 详细错误信息:")
+        raise HTTPException(status_code=500, detail=f"音频生成失败: {str(e)}")
+    try:
+        # 音频编码处理
+        encoding_start = time.time()
+        logger.debug(f"[{request_id}] 开始音频编码...")
+        # 把 numpy 数组转 torch.Tensor 并编码成 WAV 字节流
+        waveform = torch.from_numpy(output.audio)[None, :]  # shape=(1, T)
+        buf = io.BytesIO()
+        torchaudio.save(buf, waveform, output.sampling_rate, format="wav")
+        audio_bytes = buf.getvalue()
+        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+        encoding_time = time.time() - encoding_start
+        total_time = time.time() - start_time
+        logger.info(f"[{request_id}] 音频编码完成，耗时: {encoding_time:.2f}秒")
+        logger.info(f"[{request_id}] 请求处理完成，总耗时: {total_time:.2f}秒，"
+                    f"编码后大小: {len(audio_b64)} 字符")
+        return AudioResponse(audio_base64=audio_b64, sample_rate=output.sampling_rate)
     except Exception as e:
+        error_time = time.time() - start_time
+        logger.error(f"[{request_id}] 音频编码失败，耗时: {error_time:.2f}秒，错误: {str(e)}")
+        logger.exception(f"[{request_id}] 详细错误信息:")
+        raise HTTPException(status_code=500, detail=f"音频编码失败: {str(e)}")
+# 健康检查端点
+@app.get("/health")
+def health_check():
+    """健康检查端点，返回服务状态信息"""
+    try:
+        # 检查GPU状态
+        gpu_info = {}
+        if torch.cuda.is_available():
+            gpu_info = {
+                "gpu_available": True,
+                "gpu_count": torch.cuda.device_count(),
+                "current_device": torch.cuda.current_device(),
+                "device_name": torch.cuda.get_device_name(0),
+                "memory_allocated_gb": round(torch.cuda.memory_allocated(0) / 1024**3, 2),
+                "memory_reserved_gb": round(torch.cuda.memory_reserved(0) / 1024**3, 2),
+                "memory_total_gb": round(torch.cuda.get_device_properties(0).total_memory / 1024**3, 2)
+            }
+        else:
+            gpu_info = {"gpu_available": False}
+        return {
+            "status": "healthy",
+            "timestamp": datetime.now().isoformat(),
+            "device": device,
+            "model_path": MODEL_PATH,
+            "tokenizer_path": AUDIO_TOKENIZER_PATH,
+            "gpu_info": gpu_info
+        }
+    except Exception as e:
+        logger.error(f"健康检查失败: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"健康检查失败: {str(e)}")
+# 系统信息端点
+@app.get("/system-info")
+def system_info():
+    """返回详细的系统信息"""
+    import psutil
+    import platform
+    try:
+        # CPU信息
+        cpu_info = {
+            "cpu_count": psutil.cpu_count(),
+            "cpu_percent": psutil.cpu_percent(interval=1),
+            "cpu_freq": psutil.cpu_freq()._asdict() if psutil.cpu_freq() else None
+        }
+        # 内存信息
+        memory = psutil.virtual_memory()
+        memory_info = {
+            "total_gb": round(memory.total / 1024**3, 2),
+            "available_gb": round(memory.available / 1024**3, 2),
+            "used_gb": round(memory.used / 1024**3, 2),
+            "percent": memory.percent
+        }
+        # 系统信息
+        system_info = {
+            "platform": platform.platform(),
+            "python_version": platform.python_version(),
+            "torch_version": torch.__version__,
+            "cuda_version": torch.version.cuda if torch.cuda.is_available() else None
+        }
+        return {
+            "timestamp": datetime.now().isoformat(),
+            "cpu": cpu_info,
+            "memory": memory_info,
+            "system": system_info
+        }
+    except Exception as e:
+        logger.error(f"获取系统信息失败: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"获取系统信息失败: {str(e)}")
 # 新增：把 / 指向静态首页
 app.mount("/static", StaticFiles(directory="static"), name="static")
 @app.get("/", include_in_schema=False)
 async def index():
+    return FileResponse("static/index.html")
+# 启动时记录系统信息
+@app.on_event("startup")
+async def startup_event():
+    """应用启动时的事件处理"""
+    logger.info("=" * 50)
+    logger.info("音频生成API服务启动")
+    logger.info(f"启动时间: {datetime.now().isoformat()}")
+    logger.info(f"Python版本: {platform.python_version()}")
+    logger.info(f"PyTorch版本: {torch.__version__}")
+    logger.info(f"设备: {device}")
+    if torch.cuda.is_available():
+        logger.info(f"CUDA版本: {torch.version.cuda}")
+        logger.info(f"GPU设备数量: {torch.cuda.device_count()}")
+        for i in range(torch.cuda.device_count()):
+            logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)}")
+    logger.info("=" * 50)
+@app.on_event("shutdown")
+async def shutdown_event():
+    """应用关闭时的事件处理"""
+    logger.info("音频生成API服务正在关闭...")
+    logger.info(f"关闭时间: {datetime.now().isoformat()}")
+    # 清理GPU内存
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        logger.info("GPU内存已清理")

requirements.txt CHANGED Viewed

@@ -1,2 +1,6 @@
 fastapi
 uvicorn[standard]

 fastapi
 uvicorn[standard]
+torch
+torchaudio
+psutil
+pydantic