Spaces:

gjoliveira
/

data-ai-llm-eval-app

Running

Guilherme

Deploy to HF Space

4b112ae 27 days ago

850 Bytes

	# metrics/bleurt.py
	import os
	# disable GPU (and XLA/PTX) so BLEURT runs on CPU and avoids the libdevice error
	os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


	"""
	BLEURT metric wrappers using HuggingFace evaluate.
	"""
	from evaluate import load

	_hf_bleurt = None


	def get_hf_bleurt():
	global _hf_bleurt
	if _hf_bleurt is None:
	_hf_bleurt = load("bleurt", module_type="metric", checkpoint="BLEURT-20")
	return _hf_bleurt


	def compute_bleurt_single(reference: str, prediction: str) -> str:
	"""
	Compute and format BLEURT score for a single pair.
	"""
	if not reference or not prediction:
	return "Please provide both texts."
	bleurt = get_hf_bleurt()
	result = bleurt.compute(predictions=[prediction], references=[reference])
	return f"BLEURT Score: {result['scores'][0]:.4f}"