Spaces:

gjoliveira
/

data-ai-llm-eval-app

Running

data-ai-llm-eval-app / utils /colors_utils.py

Guilherme

Improved UI ; removed per section scores ; Changed color scale

d906888 19 days ago

8.04 kB

	# colors_utils.py
	"""
	Continuous, lightness-first color utilities for LLM metric tables.

	Changes vs. old version:
	- Replaces 5-bin red→yellow→green with continuous ramps.
	- Uses colorblind-friendlier palettes (teal sequential; purple↔gray↔teal diverging).
	- Renders a soft in-cell "progress pill" instead of painting the whole cell.
	- Keeps API compatible: `get_metric_color(score, metric)` and `df_to_colored_html(df)`.
	"""

	from typing import Optional
	import colorsys
	import math
	import pandas as pd

	# -------------------------------
	# Color mapping helpers
	# -------------------------------

	def _clamp(x: float, lo: float, hi: float) -> float:
	return lo if x < lo else hi if x > hi else x

	def _hsl_hex(h_deg: float, s: float, l: float) -> str:
	"""
	Convert HSL (H in degrees) to #RRGGBB.
	Note: colorsys uses HLS (H, L, S). We pass in (H, L, S) accordingly.
	"""
	h = (h_deg % 360.0) / 360.0
	r, g, b = colorsys.hls_to_rgb(h, l, s)
	return "#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255))

	def _seq_color_01(t: float, *, hue: float = 200.0, s: float = 0.55,
	l_min: float = 0.18, l_max: float = 0.56, gamma: float = 0.85) -> str:
	"""
	Sequential ramp for metrics in [0,1] (e.g., BLEU, ROUGE, BERTScore).
	Single hue (default teal ≈ 200°), smoothly varying lightness.
	"""
	t = _clamp(float(t), 0.0, 1.0) ** gamma
	L = l_min + (l_max - l_min) * t
	return _hsl_hex(hue, s, L)

	def _div_color_m11(x: float, *, hue_lo: float = 280.0, hue_hi: float = 190.0,
	s: float = 0.55, l_mid: float = 0.24, l_span: float = 0.22,
	gamma: float = 0.9) -> str:
	"""
	Diverging ramp for metrics in [-1, 1] (e.g., BLEURT).
	Purple (neg) ↔ neutral gray (0) ↔ teal (pos). Hue shifts only at ends;
	the zero point is represented by a low-chroma grayish lightness.
	"""
	t = _clamp((float(x) + 1.0) / 2.0, 0.0, 1.0) ** gamma
	# Interpolate lightness around a mid gray; avoid oversaturation at 0
	if t < 0.5:
	# Toward negative: darker purple
	u = t / 0.5 # 0..1
	L = l_mid - l_span * (1.0 - u)
	return _hsl_hex(hue_lo, s, L)
	else:
	# Toward positive: brighter teal
	u = (t - 0.5) / 0.5 # 0..1
	L = l_mid + l_span * u
	return _hsl_hex(hue_hi, s, L)

	def _norm_for_bar(metric: str, score: Optional[float]) -> float:
	"""
	Normalize a score to [0,1] for bar width. BLEURT is [-1,1], others ~[0,1].
	"""
	if score is None or (isinstance(score, float) and not math.isfinite(score)):
	return 0.0
	m = metric.upper()
	if m == "BLEURT":
	return _clamp((float(score) + 1.0) / 2.0, 0.0, 1.0)
	# BLEU, ROUGE, BERTSCORE default assumption: already ~[0,1]
	return _clamp(float(score), 0.0, 1.0)

	def get_metric_color(score: Optional[float], metric: str = "BLEU") -> str:
	"""
	Public API: map a (metric, score) to a visually pleasant, continuous color.
	- BLEURT uses a diverging purple↔teal ramp with a neutral midpoint.
	- BLEU/ROUGE/BERTSCORE use a single-hue teal sequential ramp.
	Returns a neutral deep gray if score is None or not finite.
	"""
	if score is None or (isinstance(score, float) and not math.isfinite(score)):
	return "#2f3240" # neutral
	m = metric.upper()
	if m == "BLEURT":
	return _div_color_m11(float(score))
	else:
	return _seq_color_01(float(score))

	def _readable_text_on(bg_hex: str) -> str:
	"""
	Choose black or white text for contrast on a given bg color.
	"""
	try:
	r = int(bg_hex[1:3], 16) / 255.0
	g = int(bg_hex[3:5], 16) / 255.0
	b = int(bg_hex[5:7], 16) / 255.0
	except Exception:
	# default to white on bad input
	return "#ffffff"
	# Relative luminance (approx, with a gamma comp)
	Y = 0.2126 * (r ** 2.2) + 0.7152 * (g ** 2.2) + 0.0722 * (b ** 2.2)
	return "#0b0f14" if Y > 0.5 else "#ffffff"

	# -------------------------------
	# HTML rendering for DataFrames
	# -------------------------------

	def _metric_from_col(col: str) -> Optional[str]:
	c = col.lower()
	if c.startswith("bleurt_"):
	return "BLEURT"
	if c.startswith("bleu_"):
	return "BLEU"
	if c.startswith("rouge") or c.startswith("rougel_"):
	return "ROUGE"
	if c.startswith("bertscore_") or c.startswith("bert_score"):
	return "BERTSCORE"
	return None

	def _fmt_value(val) -> str:
	if val is None:
	return ""
	# keep audio id clean
	try:
	if isinstance(val, float):
	return f"{val:.4f}"
	return str(val)
	except Exception:
	return str(val)

	def df_to_colored_html(df: pd.DataFrame) -> str:
	"""
	Render a dark-themed HTML table with soft, continuous "progress pills"
	behind metric values. Keeps the background neutral (dark) to reduce
	fatigue; the color is confined to the small bar.
	"""
	df_display = df.copy()

	# Column order: ensure 'code_audio_transcription' first if present
	headers = list(df_display.columns)
	if "code_audio_transcription" in headers:
	headers = ["code_audio_transcription"] + [h for h in headers if h != "code_audio_transcription"]

	# Table shell
	html = [
	'<div style="font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial;'
	' font-size:14px; color:#e6ebf2;">',
	'<table style="border-collapse:separate; border-spacing:0; width:100%; background:#0b0f14;'
	' border:1px solid #202532; border-radius:10px; overflow:hidden;">'
	"<thead><tr>"
	]

	# Header row
	for h in headers:
	html.append(
	f'<th style="position:sticky; top:0; background:#121722; color:#c9d4e3;'
	f' padding:10px 12px; border-bottom:1px solid #1b2030; text-align:center;'
	f' font-weight:600; white-space:nowrap;">{h}</th>'
	)
	html.append("</tr></thead><tbody>")

	# Rows
	row_bg_a = "#0b0f14"
	row_bg_b = "#0e131b"
	for i, (_, row) in enumerate(df_display.iterrows()):
	row_bg = row_bg_a if (i % 2 == 0) else row_bg_b
	html.append(f"<tr style='background:{row_bg};'>")

	for col in headers:
	val = row.get(col, None)
	metric = _metric_from_col(col)
	disp = _fmt_value(val)

	if metric is None:
	# Non-metric column: plain cell
	html.append(
	"<td style='padding:10px 14px; border-bottom:1px solid #121722;"
	" text-align:center; color:#e6ebf2; white-space:nowrap;'>"
	f"{disp}</td>"
	)
	continue

	# Metric column: compute bar + color
	color = get_metric_color(val, metric) if pd.notnull(val) else "transparent"
	width01 = _norm_for_bar(metric, float(val)) if pd.notnull(val) else 0.0
	bar_width_pct = f"{width01 * 100:.1f}%"
	text_color = "#e6ebf2" # keep neutral text on dark background
	# A subtle inner background for the pill track
	track_bg = "#141a24"

	cell_html = f"""
	<td style="padding:10px 14px; border-bottom:1px solid #121722; text-align:center; white-space:nowrap;">
	<div style="position:relative; height:22px; border-radius:7px; background:{track_bg}; overflow:hidden;">
	<div style="position:absolute; inset:0; width:{bar_width_pct}; background:{color}; opacity:.75;"></div>
	<div style="position:relative; z-index:1; line-height:22px; color:{text_color};">{disp}</div>
	</div>
	</td>
	"""
	html.append(cell_html)

	html.append("</tr>")

	html.append("</tbody></table></div>")
	return "".join(html)