fexeak commited on
Commit
dccded3
·
1 Parent(s): e992960

add static

Browse files
Files changed (2) hide show
  1. app.py +202 -12
  2. requirements.txt +4 -0
app.py CHANGED
@@ -1,15 +1,30 @@
1
  # audio_api.py
2
  import base64
3
  import io
 
 
 
 
4
  from typing import Optional
5
 
6
  import torch
7
  import torchaudio
8
- from fastapi import FastAPI, HTTPException
9
  from fastapi.staticfiles import StaticFiles
10
  from fastapi.responses import FileResponse
11
  from pydantic import BaseModel, Field
12
 
 
 
 
 
 
 
 
 
 
 
 
13
  from boson_multimodal.data_types import ChatMLSample, Message, AudioContent
14
  from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse
15
 
@@ -18,7 +33,24 @@ MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-base"
18
  AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
 
21
- serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # -------------------- FastAPI --------------------
24
  app = FastAPI(title="Higgs Audio Generation API", version="0.1.0")
@@ -35,7 +67,16 @@ class AudioResponse(BaseModel):
35
  sample_rate: int
36
 
37
  @app.post("/generate-audio", response_model=AudioResponse)
38
- def generate_audio(req: AudioRequest):
 
 
 
 
 
 
 
 
 
39
  system_prompt = (
40
  "Generate audio following instruction.\n\n<|scene_desc_start|>\n"
41
  "Audio is recorded from a quiet room.\n<|scene_desc_end|>"
@@ -44,8 +85,18 @@ def generate_audio(req: AudioRequest):
44
  Message(role="system", content=system_prompt),
45
  Message(role="user", content=req.user_prompt),
46
  ]
 
 
47
 
48
  try:
 
 
 
 
 
 
 
 
49
  output: HiggsAudioResponse = serve_engine.generate(
50
  chat_ml_sample=ChatMLSample(messages=messages),
51
  max_new_tokens=req.max_new_tokens,
@@ -54,21 +105,160 @@ def generate_audio(req: AudioRequest):
54
  top_k=req.top_k,
55
  stop_strings=["<|end_of_text|>", "<|eot_id|>"],
56
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  except Exception as e:
58
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
59
 
60
- # 把 numpy 数组转 torch.Tensor 并编码成 WAV 字节流
61
- waveform = torch.from_numpy(output.audio)[None, :] # shape=(1, T)
62
- buf = io.BytesIO()
63
- torchaudio.save(buf, waveform, output.sampling_rate, format="wav")
64
- audio_bytes = buf.getvalue()
65
- audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- return AudioResponse(audio_base64=audio_b64, sample_rate=output.sampling_rate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # 新增:把 / 指向静态首页
70
  app.mount("/static", StaticFiles(directory="static"), name="static")
71
 
72
  @app.get("/", include_in_schema=False)
73
  async def index():
74
- return FileResponse("static/index.html")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # audio_api.py
2
  import base64
3
  import io
4
+ import logging
5
+ import platform
6
+ import time
7
+ from datetime import datetime
8
  from typing import Optional
9
 
10
  import torch
11
  import torchaudio
12
+ from fastapi import FastAPI, HTTPException, Request
13
  from fastapi.staticfiles import StaticFiles
14
  from fastapi.responses import FileResponse
15
  from pydantic import BaseModel, Field
16
 
17
+ # 配置日志
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
21
+ handlers=[
22
+ logging.FileHandler('audio_generation.log'),
23
+ logging.StreamHandler()
24
+ ]
25
+ )
26
+ logger = logging.getLogger(__name__)
27
+
28
  from boson_multimodal.data_types import ChatMLSample, Message, AudioContent
29
  from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse
30
 
 
33
  AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"
34
  device = "cuda" if torch.cuda.is_available() else "cpu"
35
 
36
+ logger.info(f"开始加载模型,设备: {device}")
37
+ logger.info(f"模型路径: {MODEL_PATH}")
38
+ logger.info(f"音频分词器路径: {AUDIO_TOKENIZER_PATH}")
39
+
40
+ try:
41
+ model_load_start = time.time()
42
+ serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)
43
+ model_load_time = time.time() - model_load_start
44
+ logger.info(f"模型加载成功,耗时: {model_load_time:.2f}秒")
45
+
46
+ # 检查GPU内存使用情况
47
+ if torch.cuda.is_available():
48
+ gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
49
+ gpu_allocated = torch.cuda.memory_allocated(0) / 1024**3
50
+ logger.info(f"GPU总内存: {gpu_memory:.2f}GB, 已分配: {gpu_allocated:.2f}GB")
51
+ except Exception as e:
52
+ logger.error(f"模型加载失败: {str(e)}")
53
+ raise
54
 
55
  # -------------------- FastAPI --------------------
56
  app = FastAPI(title="Higgs Audio Generation API", version="0.1.0")
 
67
  sample_rate: int
68
 
69
  @app.post("/generate-audio", response_model=AudioResponse)
70
+ def generate_audio(req: AudioRequest, request: Request):
71
+ request_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{id(request)}"
72
+ start_time = time.time()
73
+
74
+ logger.info(f"[{request_id}] 收到音频生成请求")
75
+ logger.info(f"[{request_id}] 客户端IP: {request.client.host if request.client else 'unknown'}")
76
+ logger.info(f"[{request_id}] 请求参数: user_prompt='{req.user_prompt[:100]}{'...' if len(req.user_prompt) > 100 else ''}', "
77
+ f"max_new_tokens={req.max_new_tokens}, temperature={req.temperature}, "
78
+ f"top_p={req.top_p}, top_k={req.top_k}")
79
+
80
  system_prompt = (
81
  "Generate audio following instruction.\n\n<|scene_desc_start|>\n"
82
  "Audio is recorded from a quiet room.\n<|scene_desc_end|>"
 
85
  Message(role="system", content=system_prompt),
86
  Message(role="user", content=req.user_prompt),
87
  ]
88
+
89
+ logger.debug(f"[{request_id}] 构建的消息: {[{'role': m.role, 'content': m.content[:50] + '...' if len(m.content) > 50 else m.content} for m in messages]}")
90
 
91
  try:
92
+ # 记录GPU内存使用情况(生成前)
93
+ if torch.cuda.is_available():
94
+ gpu_memory_before = torch.cuda.memory_allocated(0) / 1024**3
95
+ logger.debug(f"[{request_id}] 生成前GPU内存使用: {gpu_memory_before:.2f}GB")
96
+
97
+ generation_start = time.time()
98
+ logger.info(f"[{request_id}] 开始音频生成...")
99
+
100
  output: HiggsAudioResponse = serve_engine.generate(
101
  chat_ml_sample=ChatMLSample(messages=messages),
102
  max_new_tokens=req.max_new_tokens,
 
105
  top_k=req.top_k,
106
  stop_strings=["<|end_of_text|>", "<|eot_id|>"],
107
  )
108
+
109
+ generation_time = time.time() - generation_start
110
+ logger.info(f"[{request_id}] 音频生成完成,耗时: {generation_time:.2f}秒")
111
+
112
+ # 记录生成的音频信息
113
+ audio_duration = len(output.audio) / output.sampling_rate
114
+ logger.info(f"[{request_id}] 生成音频信息: 采样率={output.sampling_rate}Hz, "
115
+ f"时长={audio_duration:.2f}秒, 样本数={len(output.audio)}")
116
+
117
+ # 记录GPU内存使用情况(生成后)
118
+ if torch.cuda.is_available():
119
+ gpu_memory_after = torch.cuda.memory_allocated(0) / 1024**3
120
+ logger.debug(f"[{request_id}] 生成后GPU内存使用: {gpu_memory_after:.2f}GB")
121
+
122
+ except Exception as e:
123
+ error_time = time.time() - start_time
124
+ logger.error(f"[{request_id}] 音频生成失败,耗时: {error_time:.2f}秒,错误: {str(e)}")
125
+ logger.exception(f"[{request_id}] 详细错误信息:")
126
+ raise HTTPException(status_code=500, detail=f"音频生成失败: {str(e)}")
127
+
128
+ try:
129
+ # 音频编码处理
130
+ encoding_start = time.time()
131
+ logger.debug(f"[{request_id}] 开始音频编码...")
132
+
133
+ # 把 numpy 数组转 torch.Tensor 并编码成 WAV 字节流
134
+ waveform = torch.from_numpy(output.audio)[None, :] # shape=(1, T)
135
+ buf = io.BytesIO()
136
+ torchaudio.save(buf, waveform, output.sampling_rate, format="wav")
137
+ audio_bytes = buf.getvalue()
138
+ audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
139
+
140
+ encoding_time = time.time() - encoding_start
141
+ total_time = time.time() - start_time
142
+
143
+ logger.info(f"[{request_id}] 音频编码完成,耗时: {encoding_time:.2f}秒")
144
+ logger.info(f"[{request_id}] 请求处理完成,总耗时: {total_time:.2f}秒,"
145
+ f"编码后大小: {len(audio_b64)} 字符")
146
+
147
+ return AudioResponse(audio_base64=audio_b64, sample_rate=output.sampling_rate)
148
+
149
  except Exception as e:
150
+ error_time = time.time() - start_time
151
+ logger.error(f"[{request_id}] 音频编码失败,耗时: {error_time:.2f}秒,错误: {str(e)}")
152
+ logger.exception(f"[{request_id}] 详细错误信息:")
153
+ raise HTTPException(status_code=500, detail=f"音频编码失败: {str(e)}")
154
 
155
+ # 健康检查端点
156
+ @app.get("/health")
157
+ def health_check():
158
+ """健康检查端点,返回服务状态信息"""
159
+ try:
160
+ # 检查GPU状态
161
+ gpu_info = {}
162
+ if torch.cuda.is_available():
163
+ gpu_info = {
164
+ "gpu_available": True,
165
+ "gpu_count": torch.cuda.device_count(),
166
+ "current_device": torch.cuda.current_device(),
167
+ "device_name": torch.cuda.get_device_name(0),
168
+ "memory_allocated_gb": round(torch.cuda.memory_allocated(0) / 1024**3, 2),
169
+ "memory_reserved_gb": round(torch.cuda.memory_reserved(0) / 1024**3, 2),
170
+ "memory_total_gb": round(torch.cuda.get_device_properties(0).total_memory / 1024**3, 2)
171
+ }
172
+ else:
173
+ gpu_info = {"gpu_available": False}
174
+
175
+ return {
176
+ "status": "healthy",
177
+ "timestamp": datetime.now().isoformat(),
178
+ "device": device,
179
+ "model_path": MODEL_PATH,
180
+ "tokenizer_path": AUDIO_TOKENIZER_PATH,
181
+ "gpu_info": gpu_info
182
+ }
183
+ except Exception as e:
184
+ logger.error(f"健康检查失败: {str(e)}")
185
+ raise HTTPException(status_code=500, detail=f"健康检查失败: {str(e)}")
186
 
187
+ # 系统信息端点
188
+ @app.get("/system-info")
189
+ def system_info():
190
+ """返回详细的系统信息"""
191
+ import psutil
192
+ import platform
193
+
194
+ try:
195
+ # CPU信息
196
+ cpu_info = {
197
+ "cpu_count": psutil.cpu_count(),
198
+ "cpu_percent": psutil.cpu_percent(interval=1),
199
+ "cpu_freq": psutil.cpu_freq()._asdict() if psutil.cpu_freq() else None
200
+ }
201
+
202
+ # 内存信息
203
+ memory = psutil.virtual_memory()
204
+ memory_info = {
205
+ "total_gb": round(memory.total / 1024**3, 2),
206
+ "available_gb": round(memory.available / 1024**3, 2),
207
+ "used_gb": round(memory.used / 1024**3, 2),
208
+ "percent": memory.percent
209
+ }
210
+
211
+ # 系统信息
212
+ system_info = {
213
+ "platform": platform.platform(),
214
+ "python_version": platform.python_version(),
215
+ "torch_version": torch.__version__,
216
+ "cuda_version": torch.version.cuda if torch.cuda.is_available() else None
217
+ }
218
+
219
+ return {
220
+ "timestamp": datetime.now().isoformat(),
221
+ "cpu": cpu_info,
222
+ "memory": memory_info,
223
+ "system": system_info
224
+ }
225
+ except Exception as e:
226
+ logger.error(f"获取系统信息失败: {str(e)}")
227
+ raise HTTPException(status_code=500, detail=f"获取系统信息失败: {str(e)}")
228
 
229
  # 新增:把 / 指向静态首页
230
  app.mount("/static", StaticFiles(directory="static"), name="static")
231
 
232
  @app.get("/", include_in_schema=False)
233
  async def index():
234
+ return FileResponse("static/index.html")
235
+
236
+ # 启动时记录系统信息
237
+ @app.on_event("startup")
238
+ async def startup_event():
239
+ """应用启动时的事件处理"""
240
+ logger.info("=" * 50)
241
+ logger.info("音频生成API服务启动")
242
+ logger.info(f"启动时间: {datetime.now().isoformat()}")
243
+ logger.info(f"Python版本: {platform.python_version()}")
244
+ logger.info(f"PyTorch版本: {torch.__version__}")
245
+ logger.info(f"设备: {device}")
246
+
247
+ if torch.cuda.is_available():
248
+ logger.info(f"CUDA版本: {torch.version.cuda}")
249
+ logger.info(f"GPU设备数量: {torch.cuda.device_count()}")
250
+ for i in range(torch.cuda.device_count()):
251
+ logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)}")
252
+
253
+ logger.info("=" * 50)
254
+
255
+ @app.on_event("shutdown")
256
+ async def shutdown_event():
257
+ """应用关闭时的事件处理"""
258
+ logger.info("音频生成API服务正在关闭...")
259
+ logger.info(f"关闭时间: {datetime.now().isoformat()}")
260
+
261
+ # 清理GPU内存
262
+ if torch.cuda.is_available():
263
+ torch.cuda.empty_cache()
264
+ logger.info("GPU内存已清理")
requirements.txt CHANGED
@@ -1,2 +1,6 @@
1
  fastapi
2
  uvicorn[standard]
 
 
 
 
 
1
  fastapi
2
  uvicorn[standard]
3
+ torch
4
+ torchaudio
5
+ psutil
6
+ pydantic