turkish_mmlu_leaderboard

Running

App Files Files

xet

Community

alibayram commited on Jul 6

Commit

60cc323

1 Parent(s): b75f27a

Refactor parameter size extraction logic in model_validator.py

Browse files

Files changed (1) hide show

model_validator.py +15 -13

model_validator.py CHANGED Viewed

@@ -27,7 +27,8 @@ class OllamaModelValidator:
         for attempt in range(max_retries):
             try:
                 if "/" in model_name:
-                    path = model_name
                 else:
                     path = f"/library/{model_name}"
                 if version:
@@ -71,23 +72,24 @@ class OllamaModelValidator:
             tags = []
             # Extract parameter size from body text
             body_text = soup.get_text()
-            param_match = re.search(r'(\d+)B(\s+|\b)', body_text, re.IGNORECASE)
-            if param_match:
-                parameter_size = int(param_match.group(1))
-            # If not found in body text, check headers
-            if not parameter_size:
-                for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
-                    text = header.get_text()
-                    match = re.search(r'(\d+)B(\s+|\b)', text, re.IGNORECASE)
-                    if match and not parameter_size:
-                        parameter_size = int(match.group(1))
-                        break
             # Extract tags from links
             for link in soup.find_all('a', href=True):
                 href = link['href']
                 if href.startswith('/library/'):
                     tag_match = re.search(r':([^/]+)', href)
                     if tag_match:

         for attempt in range(max_retries):
             try:
                 if "/" in model_name:
+                    # remove the first /
+                    path = f"/{model_name.lstrip('/')}"
                 else:
                     path = f"/library/{model_name}"
                 if version:
             tags = []
             # Extract parameter size from body text
+            """ <div class="flex sm:space-x-2 items-center"><span class="hidden sm:block">parameters</span><span class="text-neutral-400 sm:font-semibold sm:text-neutral-800 sm:text-xs">6.87B</span></div> """
             body_text = soup.get_text()
+            parameter_size_match = re.search(r'parameters\s*(\d+\.\d+B)', body_text)
+            if parameter_size_match:
+                parameter_size = float(parameter_size_match.group(1).replace('B', ''))
+            else:
+                logger.warning(f"Could not find parameter size in HTML")
             # Extract tags from links
             for link in soup.find_all('a', href=True):
                 href = link['href']
+                # check if href contains a slash
+                if "/" in href:
+                    tag_match = re.search(r':([^/]+)', href)
+                    if tag_match:
+                        tag = tag_match.group(1)
+                        if tag not in tags:
+                            tags.append(tag)
                 if href.startswith('/library/'):
                     tag_match = re.search(r':([^/]+)', href)
                     if tag_match: