🐛 Bug: Fix the bug of incorrect function parameters entering the cooldown state.
Browse files
README.md
CHANGED
@@ -91,6 +91,7 @@ providers:
|
|
91 |
tools: true
|
92 |
preferences:
|
93 |
API_KEY_RATE_LIMIT: 15/min # Each API Key can request up to 15 times per minute, optional. The default is 999999/min.
|
|
|
94 |
API_KEY_COOLDOWN_PERIOD: 60 # Each API Key will be cooled down for 60 seconds after encountering a 429 error. Optional, the default is 60 seconds.
|
95 |
|
96 |
- provider: vertex
|
@@ -142,6 +143,7 @@ api_keys:
|
|
142 |
# When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the model used by the user in order.
|
143 |
AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
|
144 |
RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
|
|
|
145 |
ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, it will moderate the user's message, if inappropriate messages are found, an error message will be returned.
|
146 |
|
147 |
# Channel-level weighted load balancing configuration example
|
|
|
91 |
tools: true
|
92 |
preferences:
|
93 |
API_KEY_RATE_LIMIT: 15/min # Each API Key can request up to 15 times per minute, optional. The default is 999999/min.
|
94 |
+
# API_KEY_RATE_LIMIT: 15/min,10/day # Supports multiple frequency constraints
|
95 |
API_KEY_COOLDOWN_PERIOD: 60 # Each API Key will be cooled down for 60 seconds after encountering a 429 error. Optional, the default is 60 seconds.
|
96 |
|
97 |
- provider: vertex
|
|
|
143 |
# When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the model used by the user in order.
|
144 |
AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
|
145 |
RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
|
146 |
+
# RATE_LIMIT: 2/min,10/day # Supports multiple frequency constraints
|
147 |
ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, it will moderate the user's message, if inappropriate messages are found, an error message will be returned.
|
148 |
|
149 |
# Channel-level weighted load balancing configuration example
|
README_CN.md
CHANGED
@@ -91,6 +91,7 @@ providers:
|
|
91 |
tools: true
|
92 |
preferences:
|
93 |
API_KEY_RATE_LIMIT: 15/min # 每个 API Key 每分钟最多请求次数,选填。默认为 999999/min
|
|
|
94 |
API_KEY_COOLDOWN_PERIOD: 60 # 每个 API Key 遭遇 429 错误后的冷却时间,单位为秒,选填。默认为 60 秒
|
95 |
|
96 |
- provider: vertex
|
@@ -142,6 +143,7 @@ api_keys:
|
|
142 |
# 当 SCHEDULING_ALGORITHM 为 round_robin 时,使用轮训负载均衡,按照顺序请求用户使用的模型的渠道。
|
143 |
AUTO_RETRY: true # 是否自动重试,自动重试下一个提供商,true 为自动重试,false 为不自动重试,默认为 true
|
144 |
RATE_LIMIT: 2/min # 支持限流,每分钟最多请求次数,可以设置为整数,如 2/min,2 次每分钟、5/hour,5 次每小时、10/day,10 次每天,10/month,10 次每月,10/year,10 次每年。默认60/min,选填
|
|
|
145 |
ENABLE_MODERATION: true # 是否开启消息道德审查,true 为开启,false 为不开启,默认为 false,当开启后,会对用户的消息进行道德审查,如果发现不当的消息,会返回错误信息。
|
146 |
|
147 |
# 渠道级加权负载均衡配置示例
|
|
|
91 |
tools: true
|
92 |
preferences:
|
93 |
API_KEY_RATE_LIMIT: 15/min # 每个 API Key 每分钟最多请求次数,选填。默认为 999999/min
|
94 |
+
# API_KEY_RATE_LIMIT: 15/min,10/day # 支持多个频率约束条件
|
95 |
API_KEY_COOLDOWN_PERIOD: 60 # 每个 API Key 遭遇 429 错误后的冷却时间,单位为秒,选填。默认为 60 秒
|
96 |
|
97 |
- provider: vertex
|
|
|
143 |
# 当 SCHEDULING_ALGORITHM 为 round_robin 时,使用轮训负载均衡,按照顺序请求用户使用的模型的渠道。
|
144 |
AUTO_RETRY: true # 是否自动重试,自动重试下一个提供商,true 为自动重试,false 为不自动重试,默认为 true
|
145 |
RATE_LIMIT: 2/min # 支持限流,每分钟最多请求次数,可以设置为整数,如 2/min,2 次每分钟、5/hour,5 次每小时、10/day,10 次每天,10/month,10 次每月,10/year,10 次每年。默认60/min,选填
|
146 |
+
# RATE_LIMIT: 2/min,10/day 支持多个频率约束条件
|
147 |
ENABLE_MODERATION: true # 是否开启消息道德审查,true 为开启,false 为不开启,默认为 false,当开启后,会对用户的消息进行道德审查,如果发现不当的消息,会返回错误信息。
|
148 |
|
149 |
# 渠道级加权负载均衡配置示例
|
main.py
CHANGED
@@ -1015,7 +1015,7 @@ class ModelRequestHandler:
|
|
1015 |
|
1016 |
if status_code == 429:
|
1017 |
current_api = await provider_api_circular_list[channel_id].after_next_current()
|
1018 |
-
await provider_api_circular_list[channel_id].set_cooling(current_api,
|
1019 |
|
1020 |
logger.error(f"Error {status_code} with provider {channel_id}: {error_message}")
|
1021 |
if is_debug:
|
@@ -1045,13 +1045,13 @@ async def rate_limit_dependency(request: Request, credentials: HTTPAuthorization
|
|
1045 |
print("error: Invalid or missing API Key:", token)
|
1046 |
api_index = None
|
1047 |
token = None
|
1048 |
-
limit, period = await get_user_rate_limit(app, api_index)
|
1049 |
|
1050 |
# 使用 IP 地址和 token(如果有)作为限制键
|
1051 |
client_ip = request.client.host
|
1052 |
rate_limit_key = f"{client_ip}:{token}" if token else client_ip
|
1053 |
|
1054 |
-
|
|
|
1055 |
raise HTTPException(status_code=429, detail="Too many requests")
|
1056 |
|
1057 |
def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
@@ -1315,13 +1315,13 @@ async def get_api_key(request: Request, x_api_key: Optional[str] = Depends(api_k
|
|
1315 |
|
1316 |
async def frontend_rate_limit_dependency(request: Request, x_api_key: str = Depends(get_api_key)):
|
1317 |
token = x_api_key if x_api_key else None
|
1318 |
-
limit, period = 100, 60
|
1319 |
|
1320 |
# 使用 IP 地址和 token(如果有)作为限制键
|
1321 |
client_ip = request.client.host
|
1322 |
rate_limit_key = f"{client_ip}:{token}" if token else client_ip
|
1323 |
|
1324 |
-
|
|
|
1325 |
raise HTTPException(status_code=429, detail="Too many requests")
|
1326 |
|
1327 |
# def get_backend_router_api_list():
|
|
|
1015 |
|
1016 |
if status_code == 429:
|
1017 |
current_api = await provider_api_circular_list[channel_id].after_next_current()
|
1018 |
+
await provider_api_circular_list[channel_id].set_cooling(current_api, cooling_time=safe_get(provider, "preferences", "API_KEY_COOLDOWN_PERIOD", default=60))
|
1019 |
|
1020 |
logger.error(f"Error {status_code} with provider {channel_id}: {error_message}")
|
1021 |
if is_debug:
|
|
|
1045 |
print("error: Invalid or missing API Key:", token)
|
1046 |
api_index = None
|
1047 |
token = None
|
|
|
1048 |
|
1049 |
# 使用 IP 地址和 token(如果有)作为限制键
|
1050 |
client_ip = request.client.host
|
1051 |
rate_limit_key = f"{client_ip}:{token}" if token else client_ip
|
1052 |
|
1053 |
+
limits = await get_user_rate_limit(app, api_index)
|
1054 |
+
if await rate_limiter.is_rate_limited(rate_limit_key, limits):
|
1055 |
raise HTTPException(status_code=429, detail="Too many requests")
|
1056 |
|
1057 |
def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
|
|
1315 |
|
1316 |
async def frontend_rate_limit_dependency(request: Request, x_api_key: str = Depends(get_api_key)):
|
1317 |
token = x_api_key if x_api_key else None
|
|
|
1318 |
|
1319 |
# 使用 IP 地址和 token(如果有)作为限制键
|
1320 |
client_ip = request.client.host
|
1321 |
rate_limit_key = f"{client_ip}:{token}" if token else client_ip
|
1322 |
|
1323 |
+
limits = [(100, 60)]
|
1324 |
+
if await rate_limiter.is_rate_limited(rate_limit_key, limits):
|
1325 |
raise HTTPException(status_code=429, detail="Too many requests")
|
1326 |
|
1327 |
# def get_backend_router_api_list():
|
utils.py
CHANGED
@@ -17,32 +17,47 @@ def parse_rate_limit(limit_string):
|
|
17 |
'y': 31536000, 'year': 31536000
|
18 |
}
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
|
26 |
-
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
|
|
|
33 |
|
34 |
-
return
|
35 |
|
36 |
from collections import defaultdict
|
37 |
class InMemoryRateLimiter:
|
38 |
def __init__(self):
|
39 |
self.requests = defaultdict(list)
|
40 |
|
41 |
-
async def is_rate_limited(self, key: str,
|
42 |
now = time()
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
self.requests[key].append(now)
|
47 |
return False
|
48 |
|
@@ -70,10 +85,8 @@ class ThreadSafeCircularList:
|
|
70 |
self.index = 0
|
71 |
self.lock = asyncio.Lock()
|
72 |
self.requests = defaultdict(list) # 用于追踪每个 API key 的请求时间
|
73 |
-
self.cooling_until = defaultdict(float)
|
74 |
-
|
75 |
-
self.rate_limit = count
|
76 |
-
self.period = period
|
77 |
|
78 |
async def set_cooling(self, item: str, cooling_time: int = 60):
|
79 |
"""设置某个 item 进入冷却状态
|
@@ -86,7 +99,7 @@ class ThreadSafeCircularList:
|
|
86 |
async with self.lock:
|
87 |
self.cooling_until[item] = now + cooling_time
|
88 |
# 清空该 item 的请求记录
|
89 |
-
self.requests[item] = []
|
90 |
logger.warning(f"API key {item} 已进入冷却状态,冷却时间 {cooling_time} 秒")
|
91 |
|
92 |
async def is_rate_limited(self, item) -> bool:
|
@@ -95,9 +108,19 @@ class ThreadSafeCircularList:
|
|
95 |
if now < self.cooling_until[item]:
|
96 |
return True
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
self.requests[item].append(now)
|
102 |
return False
|
103 |
|
@@ -111,12 +134,10 @@ class ThreadSafeCircularList:
|
|
111 |
if not await self.is_rate_limited(item):
|
112 |
return item
|
113 |
|
114 |
-
logger.warning(f"API key {item} 已达到速率限制 ({self.rate_limit}/{self.period}秒)")
|
115 |
-
|
116 |
# 如果已经检查了所有的 API key 都被限制
|
117 |
if self.index == start_index:
|
118 |
-
logger.warning(f"
|
119 |
-
|
120 |
|
121 |
async def after_next_current(self):
|
122 |
# 返回当前取出的 API,因为已经调用了 next,所以当前API应该是上一个
|
|
|
17 |
'y': 31536000, 'year': 31536000
|
18 |
}
|
19 |
|
20 |
+
# 处理多个限制条件
|
21 |
+
limits = []
|
22 |
+
for limit in limit_string.split(','):
|
23 |
+
limit = limit.strip()
|
24 |
+
# 使用正则表达式匹配数字和单位
|
25 |
+
match = re.match(r'^(\d+)/(\w+)$', limit)
|
26 |
+
if not match:
|
27 |
+
raise ValueError(f"Invalid rate limit format: {limit}")
|
28 |
|
29 |
+
count, unit = match.groups()
|
30 |
+
count = int(count)
|
31 |
|
32 |
+
# 转换单位到秒
|
33 |
+
if unit not in time_units:
|
34 |
+
raise ValueError(f"Unknown time unit: {unit}")
|
35 |
|
36 |
+
seconds = time_units[unit]
|
37 |
+
limits.append((count, seconds))
|
38 |
|
39 |
+
return limits
|
40 |
|
41 |
from collections import defaultdict
|
42 |
class InMemoryRateLimiter:
|
43 |
def __init__(self):
|
44 |
self.requests = defaultdict(list)
|
45 |
|
46 |
+
async def is_rate_limited(self, key: str, limits) -> bool:
|
47 |
now = time()
|
48 |
+
|
49 |
+
# 检查所有速率限制条件
|
50 |
+
for limit, period in limits:
|
51 |
+
# 计算在当前时间窗口内的请求数量
|
52 |
+
recent_requests = sum(1 for req in self.requests[key] if req > now - period)
|
53 |
+
if recent_requests >= limit:
|
54 |
+
return True
|
55 |
+
|
56 |
+
# 清理太旧的请求记录(比最长时间窗口还要老的记录)
|
57 |
+
max_period = max(period for _, period in limits)
|
58 |
+
self.requests[key] = [req for req in self.requests[key] if req > now - max_period]
|
59 |
+
|
60 |
+
# 记录新的请求
|
61 |
self.requests[key].append(now)
|
62 |
return False
|
63 |
|
|
|
85 |
self.index = 0
|
86 |
self.lock = asyncio.Lock()
|
87 |
self.requests = defaultdict(list) # 用于追踪每个 API key 的请求时间
|
88 |
+
self.cooling_until = defaultdict(float)
|
89 |
+
self.rate_limits = parse_rate_limit(rate_limit) # 现在返回一个限制条件列表
|
|
|
|
|
90 |
|
91 |
async def set_cooling(self, item: str, cooling_time: int = 60):
|
92 |
"""设置某个 item 进入冷却状态
|
|
|
99 |
async with self.lock:
|
100 |
self.cooling_until[item] = now + cooling_time
|
101 |
# 清空该 item 的请求记录
|
102 |
+
# self.requests[item] = []
|
103 |
logger.warning(f"API key {item} 已进入冷却状态,冷却时间 {cooling_time} 秒")
|
104 |
|
105 |
async def is_rate_limited(self, item) -> bool:
|
|
|
108 |
if now < self.cooling_until[item]:
|
109 |
return True
|
110 |
|
111 |
+
# 检查所有速率限制条件
|
112 |
+
for limit_count, limit_period in self.rate_limits:
|
113 |
+
# 计算在当前时间窗口内的请求数量,而不是直接修改请求列表
|
114 |
+
recent_requests = sum(1 for req in self.requests[item] if req > now - limit_period)
|
115 |
+
if recent_requests >= limit_count:
|
116 |
+
logger.warning(f"API key {item} 已达到速率限制 ({limit_count}/{limit_period}秒)")
|
117 |
+
return True
|
118 |
+
|
119 |
+
# 清理太旧的请求记录(比最长时间窗口还要老的记录)
|
120 |
+
max_period = max(period for _, period in self.rate_limits)
|
121 |
+
self.requests[item] = [req for req in self.requests[item] if req > now - max_period]
|
122 |
+
|
123 |
+
# 所有限制条件都通过,记录新的请求
|
124 |
self.requests[item].append(now)
|
125 |
return False
|
126 |
|
|
|
134 |
if not await self.is_rate_limited(item):
|
135 |
return item
|
136 |
|
|
|
|
|
137 |
# 如果已经检查了所有的 API key 都被限制
|
138 |
if self.index == start_index:
|
139 |
+
logger.warning(f"All API keys are rate limited!")
|
140 |
+
raise HTTPException(status_code=429, detail="Too many requests")
|
141 |
|
142 |
async def after_next_current(self):
|
143 |
# 返回当前取出的 API,因为已经调用了 next,所以当前API应该是上一个
|