sdobson
/

tinystories-llama-15m

Text Generation

Model card Files Files and versions

tinystories-llama-15m / inference_example.py

sdobson's picture

Upload 9 files

d171a71 verified about 2 months ago

1.56 kB

	"""
	Example script for running inference with the Hugging Face model.
	"""
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import warnings

	# Suppress the TypedStorage deprecation warning
	warnings.filterwarnings('ignore', category=UserWarning, message='.TypedStorage is deprecated.')

	def main():
	model_path = "." # Path to the model, or use "YOUR_USERNAME/YOUR_MODEL_NAME" for HF Hub

	print("Loading model and tokenizer...")
	model = AutoModelForCausalLM.from_pretrained(model_path)
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = model.to(device)
	model.eval()

	print(f"Model loaded on {device}")
	print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

	prompts = [
	"Once upon a time",
	"The quick brown fox",
	]

	for prompt in prompts:
	print(f"\n{'='*60}")
	print(f"Prompt: {prompt}")
	print(f"{'='*60}")

	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_length=100,
	temperature=1.0,
	top_k=50,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	print(f"\nGenerated:\n{generated_text}")

	if __name__ == "__main__":
	main()