yym68686 commited on
Commit
68795da
·
1 Parent(s): c64a64e

🐛 Bug: Fix the bug of incorrect function parameters entering the cooldown state.

Browse files
Files changed (4) hide show
  1. README.md +2 -0
  2. README_CN.md +2 -0
  3. main.py +5 -5
  4. utils.py +48 -27
README.md CHANGED
@@ -91,6 +91,7 @@ providers:
91
  tools: true
92
  preferences:
93
  API_KEY_RATE_LIMIT: 15/min # Each API Key can request up to 15 times per minute, optional. The default is 999999/min.
 
94
  API_KEY_COOLDOWN_PERIOD: 60 # Each API Key will be cooled down for 60 seconds after encountering a 429 error. Optional, the default is 60 seconds.
95
 
96
  - provider: vertex
@@ -142,6 +143,7 @@ api_keys:
142
  # When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the model used by the user in order.
143
  AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
144
  RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
 
145
  ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, it will moderate the user's message, if inappropriate messages are found, an error message will be returned.
146
 
147
  # Channel-level weighted load balancing configuration example
 
91
  tools: true
92
  preferences:
93
  API_KEY_RATE_LIMIT: 15/min # Each API Key can request up to 15 times per minute, optional. The default is 999999/min.
94
+ # API_KEY_RATE_LIMIT: 15/min,10/day # Supports multiple frequency constraints
95
  API_KEY_COOLDOWN_PERIOD: 60 # Each API Key will be cooled down for 60 seconds after encountering a 429 error. Optional, the default is 60 seconds.
96
 
97
  - provider: vertex
 
143
  # When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the model used by the user in order.
144
  AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
145
  RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
146
+ # RATE_LIMIT: 2/min,10/day # Supports multiple frequency constraints
147
  ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, it will moderate the user's message, if inappropriate messages are found, an error message will be returned.
148
 
149
  # Channel-level weighted load balancing configuration example
README_CN.md CHANGED
@@ -91,6 +91,7 @@ providers:
91
  tools: true
92
  preferences:
93
  API_KEY_RATE_LIMIT: 15/min # 每个 API Key 每分钟最多请求次数,选填。默认为 999999/min
 
94
  API_KEY_COOLDOWN_PERIOD: 60 # 每个 API Key 遭遇 429 错误后的冷却时间,单位为秒,选填。默认为 60 秒
95
 
96
  - provider: vertex
@@ -142,6 +143,7 @@ api_keys:
142
  # 当 SCHEDULING_ALGORITHM 为 round_robin 时,使用轮训负载均衡,按照顺序请求用户使用的模型的渠道。
143
  AUTO_RETRY: true # 是否自动重试,自动重试下一个提供商,true 为自动重试,false 为不自动重试,默认为 true
144
  RATE_LIMIT: 2/min # 支持限流,每分钟最多请求次数,可以设置为整数,如 2/min,2 次每分钟、5/hour,5 次每小时、10/day,10 次每天,10/month,10 次每月,10/year,10 次每年。默认60/min,选填
 
145
  ENABLE_MODERATION: true # 是否开启消息道德审查,true 为开启,false 为不开启,默认为 false,当开启后,会对用户的消息进行道德审查,如果发现不当的消息,会返回错误信息。
146
 
147
  # 渠道级加权负载均衡配置示例
 
91
  tools: true
92
  preferences:
93
  API_KEY_RATE_LIMIT: 15/min # 每个 API Key 每分钟最多请求次数,选填。默认为 999999/min
94
+ # API_KEY_RATE_LIMIT: 15/min,10/day # 支持多个频率约束条件
95
  API_KEY_COOLDOWN_PERIOD: 60 # 每个 API Key 遭遇 429 错误后的冷却时间,单位为秒,选填。默认为 60 秒
96
 
97
  - provider: vertex
 
143
  # 当 SCHEDULING_ALGORITHM 为 round_robin 时,使用轮训负载均衡,按照顺序请求用户使用的模型的渠道。
144
  AUTO_RETRY: true # 是否自动重试,自动重试下一个提供商,true 为自动重试,false 为不自动重试,默认为 true
145
  RATE_LIMIT: 2/min # 支持限流,每分钟最多请求次数,可以设置为整数,如 2/min,2 次每分钟、5/hour,5 次每小时、10/day,10 次每天,10/month,10 次每月,10/year,10 次每年。默认60/min,选填
146
+ # RATE_LIMIT: 2/min,10/day 支持多个频率约束条件
147
  ENABLE_MODERATION: true # 是否开启消息道德审查,true 为开启,false 为不开启,默认为 false,当开启后,会对用户的消息进行道德审查,如果发现不当的消息,会返回错误信息。
148
 
149
  # 渠道级加权负载均衡配置示例
main.py CHANGED
@@ -1015,7 +1015,7 @@ class ModelRequestHandler:
1015
 
1016
  if status_code == 429:
1017
  current_api = await provider_api_circular_list[channel_id].after_next_current()
1018
- await provider_api_circular_list[channel_id].set_cooling(current_api, cooldown_period=safe_get(provider, "preferences", "API_KEY_COOLDOWN_PERIOD", default=60))
1019
 
1020
  logger.error(f"Error {status_code} with provider {channel_id}: {error_message}")
1021
  if is_debug:
@@ -1045,13 +1045,13 @@ async def rate_limit_dependency(request: Request, credentials: HTTPAuthorization
1045
  print("error: Invalid or missing API Key:", token)
1046
  api_index = None
1047
  token = None
1048
- limit, period = await get_user_rate_limit(app, api_index)
1049
 
1050
  # 使用 IP 地址和 token(如果有)作为限制键
1051
  client_ip = request.client.host
1052
  rate_limit_key = f"{client_ip}:{token}" if token else client_ip
1053
 
1054
- if await rate_limiter.is_rate_limited(rate_limit_key, limit, period):
 
1055
  raise HTTPException(status_code=429, detail="Too many requests")
1056
 
1057
  def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
@@ -1315,13 +1315,13 @@ async def get_api_key(request: Request, x_api_key: Optional[str] = Depends(api_k
1315
 
1316
  async def frontend_rate_limit_dependency(request: Request, x_api_key: str = Depends(get_api_key)):
1317
  token = x_api_key if x_api_key else None
1318
- limit, period = 100, 60
1319
 
1320
  # 使用 IP 地址和 token(如果有)作为限制键
1321
  client_ip = request.client.host
1322
  rate_limit_key = f"{client_ip}:{token}" if token else client_ip
1323
 
1324
- if await rate_limiter.is_rate_limited(rate_limit_key, limit, period):
 
1325
  raise HTTPException(status_code=429, detail="Too many requests")
1326
 
1327
  # def get_backend_router_api_list():
 
1015
 
1016
  if status_code == 429:
1017
  current_api = await provider_api_circular_list[channel_id].after_next_current()
1018
+ await provider_api_circular_list[channel_id].set_cooling(current_api, cooling_time=safe_get(provider, "preferences", "API_KEY_COOLDOWN_PERIOD", default=60))
1019
 
1020
  logger.error(f"Error {status_code} with provider {channel_id}: {error_message}")
1021
  if is_debug:
 
1045
  print("error: Invalid or missing API Key:", token)
1046
  api_index = None
1047
  token = None
 
1048
 
1049
  # 使用 IP 地址和 token(如果有)作为限制键
1050
  client_ip = request.client.host
1051
  rate_limit_key = f"{client_ip}:{token}" if token else client_ip
1052
 
1053
+ limits = await get_user_rate_limit(app, api_index)
1054
+ if await rate_limiter.is_rate_limited(rate_limit_key, limits):
1055
  raise HTTPException(status_code=429, detail="Too many requests")
1056
 
1057
  def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
 
1315
 
1316
  async def frontend_rate_limit_dependency(request: Request, x_api_key: str = Depends(get_api_key)):
1317
  token = x_api_key if x_api_key else None
 
1318
 
1319
  # 使用 IP 地址和 token(如果有)作为限制键
1320
  client_ip = request.client.host
1321
  rate_limit_key = f"{client_ip}:{token}" if token else client_ip
1322
 
1323
+ limits = [(100, 60)]
1324
+ if await rate_limiter.is_rate_limited(rate_limit_key, limits):
1325
  raise HTTPException(status_code=429, detail="Too many requests")
1326
 
1327
  # def get_backend_router_api_list():
utils.py CHANGED
@@ -17,32 +17,47 @@ def parse_rate_limit(limit_string):
17
  'y': 31536000, 'year': 31536000
18
  }
19
 
20
- # 使用正则表达式匹配数字和单位
21
- match = re.match(r'^(\d+)/(\w+)$', limit_string)
22
- if not match:
23
- raise ValueError(f"Invalid rate limit format: {limit_string}")
 
 
 
 
24
 
25
- count, unit = match.groups()
26
- count = int(count)
27
 
28
- # 转换单位到秒
29
- if unit not in time_units:
30
- raise ValueError(f"Unknown time unit: {unit}")
31
 
32
- seconds = time_units[unit]
 
33
 
34
- return (count, seconds)
35
 
36
  from collections import defaultdict
37
  class InMemoryRateLimiter:
38
  def __init__(self):
39
  self.requests = defaultdict(list)
40
 
41
- async def is_rate_limited(self, key: str, limit: int, period: int) -> bool:
42
  now = time()
43
- self.requests[key] = [req for req in self.requests[key] if req > now - period]
44
- if len(self.requests[key]) >= limit:
45
- return True
 
 
 
 
 
 
 
 
 
 
46
  self.requests[key].append(now)
47
  return False
48
 
@@ -70,10 +85,8 @@ class ThreadSafeCircularList:
70
  self.index = 0
71
  self.lock = asyncio.Lock()
72
  self.requests = defaultdict(list) # 用于追踪每个 API key 的请求时间
73
- self.cooling_until = defaultdict(float) # 记录每个 item 的冷却结束时间
74
- count, period = parse_rate_limit(rate_limit)
75
- self.rate_limit = count
76
- self.period = period
77
 
78
  async def set_cooling(self, item: str, cooling_time: int = 60):
79
  """设置某个 item 进入冷却状态
@@ -86,7 +99,7 @@ class ThreadSafeCircularList:
86
  async with self.lock:
87
  self.cooling_until[item] = now + cooling_time
88
  # 清空该 item 的请求记录
89
- self.requests[item] = []
90
  logger.warning(f"API key {item} 已进入冷却状态,冷却时间 {cooling_time} 秒")
91
 
92
  async def is_rate_limited(self, item) -> bool:
@@ -95,9 +108,19 @@ class ThreadSafeCircularList:
95
  if now < self.cooling_until[item]:
96
  return True
97
 
98
- self.requests[item] = [req for req in self.requests[item] if req > now - self.period]
99
- if len(self.requests[item]) >= self.rate_limit:
100
- return True
 
 
 
 
 
 
 
 
 
 
101
  self.requests[item].append(now)
102
  return False
103
 
@@ -111,12 +134,10 @@ class ThreadSafeCircularList:
111
  if not await self.is_rate_limited(item):
112
  return item
113
 
114
- logger.warning(f"API key {item} 已达到速率限制 ({self.rate_limit}/{self.period}秒)")
115
-
116
  # 如果已经检查了所有的 API key 都被限制
117
  if self.index == start_index:
118
- logger.warning(f"所有 API key 都已达到速率限制 ({self.rate_limit}/{self.period}秒)")
119
- return None
120
 
121
  async def after_next_current(self):
122
  # 返回当前取出的 API,因为已经调用了 next,所以当前API应该是上一个
 
17
  'y': 31536000, 'year': 31536000
18
  }
19
 
20
+ # 处理多个限制条件
21
+ limits = []
22
+ for limit in limit_string.split(','):
23
+ limit = limit.strip()
24
+ # 使用正则表达式匹配数字和单位
25
+ match = re.match(r'^(\d+)/(\w+)$', limit)
26
+ if not match:
27
+ raise ValueError(f"Invalid rate limit format: {limit}")
28
 
29
+ count, unit = match.groups()
30
+ count = int(count)
31
 
32
+ # 转换单位到秒
33
+ if unit not in time_units:
34
+ raise ValueError(f"Unknown time unit: {unit}")
35
 
36
+ seconds = time_units[unit]
37
+ limits.append((count, seconds))
38
 
39
+ return limits
40
 
41
  from collections import defaultdict
42
  class InMemoryRateLimiter:
43
  def __init__(self):
44
  self.requests = defaultdict(list)
45
 
46
+ async def is_rate_limited(self, key: str, limits) -> bool:
47
  now = time()
48
+
49
+ # 检查所有速率限制条件
50
+ for limit, period in limits:
51
+ # 计算在当前时间窗口内的请求数量
52
+ recent_requests = sum(1 for req in self.requests[key] if req > now - period)
53
+ if recent_requests >= limit:
54
+ return True
55
+
56
+ # 清理太旧的请求记录(比最长时间窗口还要老的记录)
57
+ max_period = max(period for _, period in limits)
58
+ self.requests[key] = [req for req in self.requests[key] if req > now - max_period]
59
+
60
+ # 记录新的请求
61
  self.requests[key].append(now)
62
  return False
63
 
 
85
  self.index = 0
86
  self.lock = asyncio.Lock()
87
  self.requests = defaultdict(list) # 用于追踪每个 API key 的请求时间
88
+ self.cooling_until = defaultdict(float)
89
+ self.rate_limits = parse_rate_limit(rate_limit) # 现在返回一个限制条件列表
 
 
90
 
91
  async def set_cooling(self, item: str, cooling_time: int = 60):
92
  """设置某个 item 进入冷却状态
 
99
  async with self.lock:
100
  self.cooling_until[item] = now + cooling_time
101
  # 清空该 item 的请求记录
102
+ # self.requests[item] = []
103
  logger.warning(f"API key {item} 已进入冷却状态,冷却时间 {cooling_time} 秒")
104
 
105
  async def is_rate_limited(self, item) -> bool:
 
108
  if now < self.cooling_until[item]:
109
  return True
110
 
111
+ # 检查所有速率限制条件
112
+ for limit_count, limit_period in self.rate_limits:
113
+ # 计算在当前时间窗口内的请求数量,而不是直接修改请求列表
114
+ recent_requests = sum(1 for req in self.requests[item] if req > now - limit_period)
115
+ if recent_requests >= limit_count:
116
+ logger.warning(f"API key {item} 已达到速率限制 ({limit_count}/{limit_period}秒)")
117
+ return True
118
+
119
+ # 清理太旧的请求记录(比最长时间窗口还要老的记录)
120
+ max_period = max(period for _, period in self.rate_limits)
121
+ self.requests[item] = [req for req in self.requests[item] if req > now - max_period]
122
+
123
+ # 所有限制条件都通过,记录新的请求
124
  self.requests[item].append(now)
125
  return False
126
 
 
134
  if not await self.is_rate_limited(item):
135
  return item
136
 
 
 
137
  # 如果已经检查了所有的 API key 都被限制
138
  if self.index == start_index:
139
+ logger.warning(f"All API keys are rate limited!")
140
+ raise HTTPException(status_code=429, detail="Too many requests")
141
 
142
  async def after_next_current(self):
143
  # 返回当前取出的 API,因为已经调用了 next,所以当前API应该是上一个