Refactor parameter size extraction logic in model_validator.py
Browse files- model_validator.py +15 -13
model_validator.py
CHANGED
@@ -27,7 +27,8 @@ class OllamaModelValidator:
|
|
27 |
for attempt in range(max_retries):
|
28 |
try:
|
29 |
if "/" in model_name:
|
30 |
-
|
|
|
31 |
else:
|
32 |
path = f"/library/{model_name}"
|
33 |
if version:
|
@@ -71,23 +72,24 @@ class OllamaModelValidator:
|
|
71 |
tags = []
|
72 |
|
73 |
# Extract parameter size from body text
|
|
|
74 |
body_text = soup.get_text()
|
75 |
-
|
76 |
-
if
|
77 |
-
parameter_size =
|
78 |
-
|
79 |
-
|
80 |
-
if not parameter_size:
|
81 |
-
for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
|
82 |
-
text = header.get_text()
|
83 |
-
match = re.search(r'(\d+)B(\s+|\b)', text, re.IGNORECASE)
|
84 |
-
if match and not parameter_size:
|
85 |
-
parameter_size = int(match.group(1))
|
86 |
-
break
|
87 |
|
88 |
# Extract tags from links
|
89 |
for link in soup.find_all('a', href=True):
|
90 |
href = link['href']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
if href.startswith('/library/'):
|
92 |
tag_match = re.search(r':([^/]+)', href)
|
93 |
if tag_match:
|
|
|
27 |
for attempt in range(max_retries):
|
28 |
try:
|
29 |
if "/" in model_name:
|
30 |
+
# remove the first /
|
31 |
+
path = f"/{model_name.lstrip('/')}"
|
32 |
else:
|
33 |
path = f"/library/{model_name}"
|
34 |
if version:
|
|
|
72 |
tags = []
|
73 |
|
74 |
# Extract parameter size from body text
|
75 |
+
""" <div class="flex sm:space-x-2 items-center"><span class="hidden sm:block">parameters</span><span class="text-neutral-400 sm:font-semibold sm:text-neutral-800 sm:text-xs">6.87B</span></div> """
|
76 |
body_text = soup.get_text()
|
77 |
+
parameter_size_match = re.search(r'parameters\s*(\d+\.\d+B)', body_text)
|
78 |
+
if parameter_size_match:
|
79 |
+
parameter_size = float(parameter_size_match.group(1).replace('B', ''))
|
80 |
+
else:
|
81 |
+
logger.warning(f"Could not find parameter size in HTML")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Extract tags from links
|
84 |
for link in soup.find_all('a', href=True):
|
85 |
href = link['href']
|
86 |
+
# check if href contains a slash
|
87 |
+
if "/" in href:
|
88 |
+
tag_match = re.search(r':([^/]+)', href)
|
89 |
+
if tag_match:
|
90 |
+
tag = tag_match.group(1)
|
91 |
+
if tag not in tags:
|
92 |
+
tags.append(tag)
|
93 |
if href.startswith('/library/'):
|
94 |
tag_match = re.search(r':([^/]+)', href)
|
95 |
if tag_match:
|