Usage

ONNXRuntime

First, install the necessary requirements:

pip install transformers onnxruntime chess

You can then run the model as follows:

from transformers import AutoConfig, AutoTokenizer
import onnxruntime
import numpy as np
from huggingface_hub import hf_hub_download
import chess

# 1. Load config, processor, and model
model_id = "onnx-community/chess-llama-ONNX"
config = AutoConfig.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model_path = hf_hub_download(repo_id=model_id, filename="onnx/model_q4.onnx") # Download the graph
hf_hub_download(repo_id=model_id, filename="onnx/model_q4.onnx_data") # Download the weights
decoder_session = onnxruntime.InferenceSession(model_path)

## Set config values
num_key_value_heads = config.num_key_value_heads
head_dim = config.hidden_size // config.num_attention_heads
num_hidden_layers = config.num_hidden_layers
eos_token_id = config.eos_token_id
id_to_move = {v: k for k, v in tokenizer.get_vocab().items()}

# 2. Prepare inputs
board = chess.Board()
initial_moves = "e2e4 e7e5 g1f3 g8f6 f3e5 b8c6 e5c6 d7c6 b1c3 f8c5 f1c4"
for move in initial_moves.split():
  board.push_uci(move)

# Prompt format:
text = f"0-1 {initial_moves}"
inputs = tokenizer(text, return_tensors="np")
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
batch_size = input_ids.shape[0]
past_key_values = {
    f"past_key_values.{layer}.{kv}": np.zeros([batch_size, num_key_value_heads, 0, head_dim], dtype=np.float32)
    for layer in range(num_hidden_layers)
    for kv in ("key", "value")
}
position_ids = np.tile(np.arange(0, input_ids.shape[-1]), (batch_size, 1))

# 3. Generation loop
max_new_tokens = 1024
for i in range(max_new_tokens):
  ## Run model
  logits, *present_key_values = decoder_session.run(None, dict(
      input_ids=input_ids,
      attention_mask=attention_mask,
      position_ids=position_ids,
      **past_key_values,
  ))

  ## Choose best legal move
  sorted_moves = np.argsort(-logits[0, -1]).tolist()
  moves = [(i, id_to_move[i]) for i in sorted_moves]
  legal_moves = set(x.uci() for x in board.legal_moves)
  next_id, next_uci = next(move for move in moves if move[1] in legal_moves)
  board.push_uci(next_uci)

  ## Update values for next generation step
  input_ids = np.array([[next_id]])
  attention_mask = np.concatenate([attention_mask, np.ones_like(input_ids, dtype=np.int64)], axis=-1)
  position_ids = position_ids[:, -1:] + 1
  for j, key in enumerate(past_key_values):
    past_key_values[key] = present_key_values[j]

  ## (Optional) Streaming
  print(tokenizer.decode(input_ids[0]))
  if (input_ids == eos_token_id).all():
    break

  if board.is_game_over():
    break

print(board)

Generates this game:

Downloads last month: 4

Model tree for onnx-community/chess-llama-ONNX

Base model

lazy-guy12/chess-llama

Quantized

(1)

this model