File size: 10,532 Bytes
57cfcfd
 
 
dccded3
 
 
 
57cfcfd
4c90601
0fd89a7
 
dccded3
e992960
 
57cfcfd
4c90601
dccded3
 
 
 
 
 
 
 
 
 
 
57cfcfd
 
 
 
0fd89a7
 
 
 
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fd89a7
57cfcfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dccded3
 
 
 
 
 
 
 
 
 
57cfcfd
 
 
 
 
 
 
 
dccded3
 
57cfcfd
 
dccded3
 
 
 
 
 
 
 
57cfcfd
 
 
 
 
 
 
 
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57cfcfd
dccded3
 
 
 
57cfcfd
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57cfcfd
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f203179
 
 
 
 
 
dccded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# audio_api.py
import base64
import io
import logging
import platform
import time
from datetime import datetime
from typing import Optional

import torch
import torchaudio
from fastapi import FastAPI, HTTPException, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel, Field

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('audio_generation.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

from boson_multimodal.data_types import ChatMLSample, Message, AudioContent
from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse

# -------------------- 模型加载 --------------------
MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-base"
AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"
device = "cuda" if torch.cuda.is_available() else "cpu"

logger.info(f"开始加载模型,设备: {device}")
logger.info(f"模型路径: {MODEL_PATH}")
logger.info(f"音频分词器路径: {AUDIO_TOKENIZER_PATH}")

try:
    model_load_start = time.time()
    serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)
    model_load_time = time.time() - model_load_start
    logger.info(f"模型加载成功,耗时: {model_load_time:.2f}秒")
    
    # 检查GPU内存使用情况
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
        gpu_allocated = torch.cuda.memory_allocated(0) / 1024**3
        logger.info(f"GPU总内存: {gpu_memory:.2f}GB, 已分配: {gpu_allocated:.2f}GB")
except Exception as e:
    logger.error(f"模型加载失败: {str(e)}")
    raise

# -------------------- FastAPI --------------------
app = FastAPI(title="Higgs Audio Generation API", version="0.1.0")

class AudioRequest(BaseModel):
    user_prompt: str = Field(..., description="需要生成音频的文本")
    max_new_tokens: Optional[int] = Field(1024, ge=1, le=2048)
    temperature: Optional[float] = Field(0.3, ge=0.0, le=2.0)
    top_p: Optional[float] = Field(0.95, ge=0.0, le=1.0)
    top_k: Optional[int] = Field(50, ge=1, le=100)

class AudioResponse(BaseModel):
    audio_base64: str
    sample_rate: int

@app.post("/generate-audio", response_model=AudioResponse)
def generate_audio(req: AudioRequest, request: Request):
    request_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{id(request)}"
    start_time = time.time()
    
    logger.info(f"[{request_id}] 收到音频生成请求")
    logger.info(f"[{request_id}] 客户端IP: {request.client.host if request.client else 'unknown'}")
    logger.info(f"[{request_id}] 请求参数: user_prompt='{req.user_prompt[:100]}{'...' if len(req.user_prompt) > 100 else ''}', "
                f"max_new_tokens={req.max_new_tokens}, temperature={req.temperature}, "
                f"top_p={req.top_p}, top_k={req.top_k}")
    
    system_prompt = (
        "Generate audio following instruction.\n\n<|scene_desc_start|>\n"
        "Audio is recorded from a quiet room.\n<|scene_desc_end|>"
    )
    messages = [
        Message(role="system", content=system_prompt),
        Message(role="user", content=req.user_prompt),
    ]
    
    logger.debug(f"[{request_id}] 构建的消息: {[{'role': m.role, 'content': m.content[:50] + '...' if len(m.content) > 50 else m.content} for m in messages]}")

    try:
        # 记录GPU内存使用情况(生成前)
        if torch.cuda.is_available():
            gpu_memory_before = torch.cuda.memory_allocated(0) / 1024**3
            logger.debug(f"[{request_id}] 生成前GPU内存使用: {gpu_memory_before:.2f}GB")
        
        generation_start = time.time()
        logger.info(f"[{request_id}] 开始音频生成...")
        
        output: HiggsAudioResponse = serve_engine.generate(
            chat_ml_sample=ChatMLSample(messages=messages),
            max_new_tokens=req.max_new_tokens,
            temperature=req.temperature,
            top_p=req.top_p,
            top_k=req.top_k,
            stop_strings=["<|end_of_text|>", "<|eot_id|>"],
        )
        
        generation_time = time.time() - generation_start
        logger.info(f"[{request_id}] 音频生成完成,耗时: {generation_time:.2f}秒")
        
        # 记录生成的音频信息
        audio_duration = len(output.audio) / output.sampling_rate
        logger.info(f"[{request_id}] 生成音频信息: 采样率={output.sampling_rate}Hz, "
                    f"时长={audio_duration:.2f}秒, 样本数={len(output.audio)}")
        
        # 记录GPU内存使用情况(生成后)
        if torch.cuda.is_available():
            gpu_memory_after = torch.cuda.memory_allocated(0) / 1024**3
            logger.debug(f"[{request_id}] 生成后GPU内存使用: {gpu_memory_after:.2f}GB")
            
    except Exception as e:
        error_time = time.time() - start_time
        logger.error(f"[{request_id}] 音频生成失败,耗时: {error_time:.2f}秒,错误: {str(e)}")
        logger.exception(f"[{request_id}] 详细错误信息:")
        raise HTTPException(status_code=500, detail=f"音频生成失败: {str(e)}")

    try:
        # 音频编码处理
        encoding_start = time.time()
        logger.debug(f"[{request_id}] 开始音频编码...")
        
        # 把 numpy 数组转 torch.Tensor 并编码成 WAV 字节流
        waveform = torch.from_numpy(output.audio)[None, :]  # shape=(1, T)
        buf = io.BytesIO()
        torchaudio.save(buf, waveform, output.sampling_rate, format="wav")
        audio_bytes = buf.getvalue()
        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
        
        encoding_time = time.time() - encoding_start
        total_time = time.time() - start_time
        
        logger.info(f"[{request_id}] 音频编码完成,耗时: {encoding_time:.2f}秒")
        logger.info(f"[{request_id}] 请求处理完成,总耗时: {total_time:.2f}秒,"
                    f"编码后大小: {len(audio_b64)} 字符")
        
        return AudioResponse(audio_base64=audio_b64, sample_rate=output.sampling_rate)
        
    except Exception as e:
        error_time = time.time() - start_time
        logger.error(f"[{request_id}] 音频编码失败,耗时: {error_time:.2f}秒,错误: {str(e)}")
        logger.exception(f"[{request_id}] 详细错误信息:")
        raise HTTPException(status_code=500, detail=f"音频编码失败: {str(e)}")

# 健康检查端点
@app.get("/health")
def health_check():
    """健康检查端点,返回服务状态信息"""
    try:
        # 检查GPU状态
        gpu_info = {}
        if torch.cuda.is_available():
            gpu_info = {
                "gpu_available": True,
                "gpu_count": torch.cuda.device_count(),
                "current_device": torch.cuda.current_device(),
                "device_name": torch.cuda.get_device_name(0),
                "memory_allocated_gb": round(torch.cuda.memory_allocated(0) / 1024**3, 2),
                "memory_reserved_gb": round(torch.cuda.memory_reserved(0) / 1024**3, 2),
                "memory_total_gb": round(torch.cuda.get_device_properties(0).total_memory / 1024**3, 2)
            }
        else:
            gpu_info = {"gpu_available": False}
        
        return {
            "status": "healthy",
            "timestamp": datetime.now().isoformat(),
            "device": device,
            "model_path": MODEL_PATH,
            "tokenizer_path": AUDIO_TOKENIZER_PATH,
            "gpu_info": gpu_info
        }
    except Exception as e:
        logger.error(f"健康检查失败: {str(e)}")
        raise HTTPException(status_code=500, detail=f"健康检查失败: {str(e)}")

# 系统信息端点
@app.get("/system-info")
def system_info():
    """返回详细的系统信息"""
    import psutil
    import platform
    
    try:
        # CPU信息
        cpu_info = {
            "cpu_count": psutil.cpu_count(),
            "cpu_percent": psutil.cpu_percent(interval=1),
            "cpu_freq": psutil.cpu_freq()._asdict() if psutil.cpu_freq() else None
        }
        
        # 内存信息
        memory = psutil.virtual_memory()
        memory_info = {
            "total_gb": round(memory.total / 1024**3, 2),
            "available_gb": round(memory.available / 1024**3, 2),
            "used_gb": round(memory.used / 1024**3, 2),
            "percent": memory.percent
        }
        
        # 系统信息
        system_info = {
            "platform": platform.platform(),
            "python_version": platform.python_version(),
            "torch_version": torch.__version__,
            "cuda_version": torch.version.cuda if torch.cuda.is_available() else None
        }
        
        return {
            "timestamp": datetime.now().isoformat(),
            "cpu": cpu_info,
            "memory": memory_info,
            "system": system_info
        }
    except Exception as e:
        logger.error(f"获取系统信息失败: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取系统信息失败: {str(e)}")

# 新增:把 / 指向静态首页
app.mount("/static", StaticFiles(directory="static"), name="static")

@app.get("/", include_in_schema=False)
async def index():
    return FileResponse("static/index.html")

# 启动时记录系统信息
@app.on_event("startup")
async def startup_event():
    """应用启动时的事件处理"""
    logger.info("=" * 50)
    logger.info("音频生成API服务启动")
    logger.info(f"启动时间: {datetime.now().isoformat()}")
    logger.info(f"Python版本: {platform.python_version()}")
    logger.info(f"PyTorch版本: {torch.__version__}")
    logger.info(f"设备: {device}")
    
    if torch.cuda.is_available():
        logger.info(f"CUDA版本: {torch.version.cuda}")
        logger.info(f"GPU设备数量: {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    
    logger.info("=" * 50)

@app.on_event("shutdown")
async def shutdown_event():
    """应用关闭时的事件处理"""
    logger.info("音频生成API服务正在关闭...")
    logger.info(f"关闭时间: {datetime.now().isoformat()}")
    
    # 清理GPU内存
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        logger.info("GPU内存已清理")