alibayram commited on
Commit
60cc323
·
1 Parent(s): b75f27a

Refactor parameter size extraction logic in model_validator.py

Browse files
Files changed (1) hide show
  1. model_validator.py +15 -13
model_validator.py CHANGED
@@ -27,7 +27,8 @@ class OllamaModelValidator:
27
  for attempt in range(max_retries):
28
  try:
29
  if "/" in model_name:
30
- path = model_name
 
31
  else:
32
  path = f"/library/{model_name}"
33
  if version:
@@ -71,23 +72,24 @@ class OllamaModelValidator:
71
  tags = []
72
 
73
  # Extract parameter size from body text
 
74
  body_text = soup.get_text()
75
- param_match = re.search(r'(\d+)B(\s+|\b)', body_text, re.IGNORECASE)
76
- if param_match:
77
- parameter_size = int(param_match.group(1))
78
-
79
- # If not found in body text, check headers
80
- if not parameter_size:
81
- for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
82
- text = header.get_text()
83
- match = re.search(r'(\d+)B(\s+|\b)', text, re.IGNORECASE)
84
- if match and not parameter_size:
85
- parameter_size = int(match.group(1))
86
- break
87
 
88
  # Extract tags from links
89
  for link in soup.find_all('a', href=True):
90
  href = link['href']
 
 
 
 
 
 
 
91
  if href.startswith('/library/'):
92
  tag_match = re.search(r':([^/]+)', href)
93
  if tag_match:
 
27
  for attempt in range(max_retries):
28
  try:
29
  if "/" in model_name:
30
+ # remove the first /
31
+ path = f"/{model_name.lstrip('/')}"
32
  else:
33
  path = f"/library/{model_name}"
34
  if version:
 
72
  tags = []
73
 
74
  # Extract parameter size from body text
75
+ """ <div class="flex sm:space-x-2 items-center"><span class="hidden sm:block">parameters</span><span class="text-neutral-400 sm:font-semibold sm:text-neutral-800 sm:text-xs">6.87B</span></div> """
76
  body_text = soup.get_text()
77
+ parameter_size_match = re.search(r'parameters\s*(\d+\.\d+B)', body_text)
78
+ if parameter_size_match:
79
+ parameter_size = float(parameter_size_match.group(1).replace('B', ''))
80
+ else:
81
+ logger.warning(f"Could not find parameter size in HTML")
 
 
 
 
 
 
 
82
 
83
  # Extract tags from links
84
  for link in soup.find_all('a', href=True):
85
  href = link['href']
86
+ # check if href contains a slash
87
+ if "/" in href:
88
+ tag_match = re.search(r':([^/]+)', href)
89
+ if tag_match:
90
+ tag = tag_match.group(1)
91
+ if tag not in tags:
92
+ tags.append(tag)
93
  if href.startswith('/library/'):
94
  tag_match = re.search(r':([^/]+)', href)
95
  if tag_match: