Spaces:

yym68686
/

uni-api

Sleeping

App Files Files Community

yym68686 commited on Nov 4, 2024

Commit

68795da

1 Parent(s): c64a64e

🐛 Bug: Fix the bug of incorrect function parameters entering the cooldown state.

Browse files

Files changed (4) hide show

README.md +2 -0
README_CN.md +2 -0
main.py +5 -5
utils.py +48 -27

README.md CHANGED Viewed

@@ -91,6 +91,7 @@ providers:
     tools: true
     preferences:
       API_KEY_RATE_LIMIT: 15/min # Each API Key can request up to 15 times per minute, optional. The default is 999999/min.
       API_KEY_COOLDOWN_PERIOD: 60 # Each API Key will be cooled down for 60 seconds after encountering a 429 error. Optional, the default is 60 seconds.
   - provider: vertex
@@ -142,6 +143,7 @@ api_keys:
       # When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the model used by the user in order.
       AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
       RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
       ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, it will moderate the user's message, if inappropriate messages are found, an error message will be returned.
   # Channel-level weighted load balancing configuration example

     tools: true
     preferences:
       API_KEY_RATE_LIMIT: 15/min # Each API Key can request up to 15 times per minute, optional. The default is 999999/min.
+      # API_KEY_RATE_LIMIT: 15/min,10/day # Supports multiple frequency constraints
       API_KEY_COOLDOWN_PERIOD: 60 # Each API Key will be cooled down for 60 seconds after encountering a 429 error. Optional, the default is 60 seconds.
   - provider: vertex
       # When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the model used by the user in order.
       AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
       RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
+      # RATE_LIMIT: 2/min,10/day # Supports multiple frequency constraints
       ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, it will moderate the user's message, if inappropriate messages are found, an error message will be returned.
   # Channel-level weighted load balancing configuration example

README_CN.md CHANGED Viewed

@@ -91,6 +91,7 @@ providers:
     tools: true
     preferences:
       API_KEY_RATE_LIMIT: 15/min # 每个 API Key 每分钟最多请求次数，选填。默认为 999999/min
       API_KEY_COOLDOWN_PERIOD: 60 # 每个 API Key 遭遇 429 错误后的冷却时间，单位为秒，选填。默认为 60 秒
   - provider: vertex
@@ -142,6 +143,7 @@ api_keys:
       # 当 SCHEDULING_ALGORITHM 为 round_robin 时，使用轮训负载均衡，按照顺序请求用户使用的模型的渠道。
       AUTO_RETRY: true # 是否自动重试，自动重试下一个提供商，true 为自动重试，false 为不自动重试，默认为 true
       RATE_LIMIT: 2/min # 支持限流，每分钟最多请求次数，可以设置为整数，如 2/min，2 次每分钟、5/hour，5 次每小时、10/day，10 次每天，10/month，10 次每月，10/year，10 次每年。默认60/min，选填
       ENABLE_MODERATION: true # 是否开启消息道德审查，true 为开启，false 为不开启，默认为 false，当开启后，会对用户的消息进行道德审查，如果发现不当的消息，会返回错误信息。
   # 渠道级加权负载均衡配置示例

     tools: true
     preferences:
       API_KEY_RATE_LIMIT: 15/min # 每个 API Key 每分钟最多请求次数，选填。默认为 999999/min
+      # API_KEY_RATE_LIMIT: 15/min,10/day # 支持多个频率约束条件
       API_KEY_COOLDOWN_PERIOD: 60 # 每个 API Key 遭遇 429 错误后的冷却时间，单位为秒，选填。默认为 60 秒
   - provider: vertex
       # 当 SCHEDULING_ALGORITHM 为 round_robin 时，使用轮训负载均衡，按照顺序请求用户使用的模型的渠道。
       AUTO_RETRY: true # 是否自动重试，自动重试下一个提供商，true 为自动重试，false 为不自动重试，默认为 true
       RATE_LIMIT: 2/min # 支持限流，每分钟最多请求次数，可以设置为整数，如 2/min，2 次每分钟、5/hour，5 次每小时、10/day，10 次每天，10/month，10 次每月，10/year，10 次每年。默认60/min，选填
+      # RATE_LIMIT: 2/min,10/day 支持多个频率约束条件
       ENABLE_MODERATION: true # 是否开启消息道德审查，true 为开启，false 为不开启，默认为 false，当开启后，会对用户的消息进行道德审查，如果发现不当的消息，会返回错误信息。
   # 渠道级加权负载均衡配置示例

main.py CHANGED Viewed

@@ -1015,7 +1015,7 @@ class ModelRequestHandler:
                 if status_code == 429:
                     current_api = await provider_api_circular_list[channel_id].after_next_current()
-                    await provider_api_circular_list[channel_id].set_cooling(current_api, cooldown_period=safe_get(provider, "preferences", "API_KEY_COOLDOWN_PERIOD", default=60))
                 logger.error(f"Error {status_code} with provider {channel_id}: {error_message}")
                 if is_debug:
@@ -1045,13 +1045,13 @@ async def rate_limit_dependency(request: Request, credentials: HTTPAuthorization
         print("error: Invalid or missing API Key:", token)
         api_index = None
         token = None
-    limit, period = await get_user_rate_limit(app, api_index)
     # 使用 IP 地址和 token（如果有）作为限制键
     client_ip = request.client.host
     rate_limit_key = f"{client_ip}:{token}" if token else client_ip
-    if await rate_limiter.is_rate_limited(rate_limit_key, limit, period):
         raise HTTPException(status_code=429, detail="Too many requests")
 def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
@@ -1315,13 +1315,13 @@ async def get_api_key(request: Request, x_api_key: Optional[str] = Depends(api_k
 async def frontend_rate_limit_dependency(request: Request, x_api_key: str = Depends(get_api_key)):
     token = x_api_key if x_api_key else None
-    limit, period = 100, 60
     # 使用 IP 地址和 token（如果有）作为限制键
     client_ip = request.client.host
     rate_limit_key = f"{client_ip}:{token}" if token else client_ip
-    if await rate_limiter.is_rate_limited(rate_limit_key, limit, period):
         raise HTTPException(status_code=429, detail="Too many requests")
 # def get_backend_router_api_list():

                 if status_code == 429:
                     current_api = await provider_api_circular_list[channel_id].after_next_current()
+                    await provider_api_circular_list[channel_id].set_cooling(current_api, cooling_time=safe_get(provider, "preferences", "API_KEY_COOLDOWN_PERIOD", default=60))
                 logger.error(f"Error {status_code} with provider {channel_id}: {error_message}")
                 if is_debug:
         print("error: Invalid or missing API Key:", token)
         api_index = None
         token = None
     # 使用 IP 地址和 token（如果有）作为限制键
     client_ip = request.client.host
     rate_limit_key = f"{client_ip}:{token}" if token else client_ip
+    limits = await get_user_rate_limit(app, api_index)
+    if await rate_limiter.is_rate_limited(rate_limit_key, limits):
         raise HTTPException(status_code=429, detail="Too many requests")
 def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
 async def frontend_rate_limit_dependency(request: Request, x_api_key: str = Depends(get_api_key)):
     token = x_api_key if x_api_key else None
     # 使用 IP 地址和 token（如果有）作为限制键
     client_ip = request.client.host
     rate_limit_key = f"{client_ip}:{token}" if token else client_ip
+    limits = [(100, 60)]
+    if await rate_limiter.is_rate_limited(rate_limit_key, limits):
         raise HTTPException(status_code=429, detail="Too many requests")
 # def get_backend_router_api_list():

utils.py CHANGED Viewed

@@ -17,32 +17,47 @@ def parse_rate_limit(limit_string):
         'y': 31536000, 'year': 31536000
     }
-    # 使用正则表达式匹配数字和单位
-    match = re.match(r'^(\d+)/(\w+)$', limit_string)
-    if not match:
-        raise ValueError(f"Invalid rate limit format: {limit_string}")
-    count, unit = match.groups()
-    count = int(count)
-    # 转换单位到秒
-    if unit not in time_units:
-        raise ValueError(f"Unknown time unit: {unit}")
-    seconds = time_units[unit]
-    return (count, seconds)
 from collections import defaultdict
 class InMemoryRateLimiter:
     def __init__(self):
         self.requests = defaultdict(list)
-    async def is_rate_limited(self, key: str, limit: int, period: int) -> bool:
         now = time()
-        self.requests[key] = [req for req in self.requests[key] if req > now - period]
-        if len(self.requests[key]) >= limit:
-            return True
         self.requests[key].append(now)
         return False
@@ -70,10 +85,8 @@ class ThreadSafeCircularList:
         self.index = 0
         self.lock = asyncio.Lock()
         self.requests = defaultdict(list)  # 用于追踪每个 API key 的请求时间
-        self.cooling_until = defaultdict(float)  # 记录每个 item 的冷却结束时间
-        count, period = parse_rate_limit(rate_limit)
-        self.rate_limit = count
-        self.period = period
     async def set_cooling(self, item: str, cooling_time: int = 60):
         """设置某个 item 进入冷却状态
@@ -86,7 +99,7 @@ class ThreadSafeCircularList:
         async with self.lock:
             self.cooling_until[item] = now + cooling_time
             # 清空该 item 的请求记录
-            self.requests[item] = []
             logger.warning(f"API key {item} 已进入冷却状态，冷却时间 {cooling_time} 秒")
     async def is_rate_limited(self, item) -> bool:
@@ -95,9 +108,19 @@ class ThreadSafeCircularList:
         if now < self.cooling_until[item]:
             return True
-        self.requests[item] = [req for req in self.requests[item] if req > now - self.period]
-        if len(self.requests[item]) >= self.rate_limit:
-            return True
         self.requests[item].append(now)
         return False
@@ -111,12 +134,10 @@ class ThreadSafeCircularList:
                 if not await self.is_rate_limited(item):
                     return item
-                logger.warning(f"API key {item} 已达到速率限制 ({self.rate_limit}/{self.period}秒)")
                 # 如果已经检查了所有的 API key 都被限制
                 if self.index == start_index:
-                    logger.warning(f"所有 API key 都已达到速率限制 ({self.rate_limit}/{self.period}秒)")
-                    return None
     async def after_next_current(self):
         # 返回当前取出的 API，因为已经调用了 next，所以当前API应该是上一个

         'y': 31536000, 'year': 31536000
     }
+    # 处理多个限制条件
+    limits = []
+    for limit in limit_string.split(','):
+        limit = limit.strip()
+        # 使用正则表达式匹配数字和单位
+        match = re.match(r'^(\d+)/(\w+)$', limit)
+        if not match:
+            raise ValueError(f"Invalid rate limit format: {limit}")
+        count, unit = match.groups()
+        count = int(count)
+        # 转换单位到秒
+        if unit not in time_units:
+            raise ValueError(f"Unknown time unit: {unit}")
+        seconds = time_units[unit]
+        limits.append((count, seconds))
+    return limits
 from collections import defaultdict
 class InMemoryRateLimiter:
     def __init__(self):
         self.requests = defaultdict(list)
+    async def is_rate_limited(self, key: str, limits) -> bool:
         now = time()
+        # 检查所有速率限制条件
+        for limit, period in limits:
+            # 计算在当前时间窗口内的请求数量
+            recent_requests = sum(1 for req in self.requests[key] if req > now - period)
+            if recent_requests >= limit:
+                return True
+        # 清理太旧的请求记录（比最长时间窗口还要老的记录）
+        max_period = max(period for _, period in limits)
+        self.requests[key] = [req for req in self.requests[key] if req > now - max_period]
+        # 记录新的请求
         self.requests[key].append(now)
         return False
         self.index = 0
         self.lock = asyncio.Lock()
         self.requests = defaultdict(list)  # 用于追踪每个 API key 的请求时间
+        self.cooling_until = defaultdict(float)
+        self.rate_limits = parse_rate_limit(rate_limit)  # 现在返回一个限制条件列表
     async def set_cooling(self, item: str, cooling_time: int = 60):
         """设置某个 item 进入冷却状态
         async with self.lock:
             self.cooling_until[item] = now + cooling_time
             # 清空该 item 的请求记录
+            # self.requests[item] = []
             logger.warning(f"API key {item} 已进入冷却状态，冷却时间 {cooling_time} 秒")
     async def is_rate_limited(self, item) -> bool:
         if now < self.cooling_until[item]:
             return True
+        # 检查所有速率限制条件
+        for limit_count, limit_period in self.rate_limits:
+            # 计算在当前时间窗口内的请求数量，而不是直接修改请求列表
+            recent_requests = sum(1 for req in self.requests[item] if req > now - limit_period)
+            if recent_requests >= limit_count:
+                logger.warning(f"API key {item} 已达到速率限制 ({limit_count}/{limit_period}秒)")
+                return True
+        # 清理太旧的请求记录（比最长时间窗口还要老的记录）
+        max_period = max(period for _, period in self.rate_limits)
+        self.requests[item] = [req for req in self.requests[item] if req > now - max_period]
+        # 所有限制条件都通过，记录新的请求
         self.requests[item].append(now)
         return False
                 if not await self.is_rate_limited(item):
                     return item
                 # 如果已经检查了所有的 API key 都被限制
                 if self.index == start_index:
+                    logger.warning(f"All API keys are rate limited!")
+                    raise HTTPException(status_code=429, detail="Too many requests")
     async def after_next_current(self):
         # 返回当前取出的 API，因为已经调用了 next，所以当前API应该是上一个