{ "model": { "block_size": 1024, "vocab_size": 50304, "n_layer": 12, "n_head": 12, "n_embd": 768, "n_kv_head": 4, "max_knn_memories": 81920 }, "training": { "max_steps": 19073, "log_dir": "log", "total_batch_size": 524288, "B": 64, "T": 1024, "max_lr": 0.0006, "min_lr": 0.00006, "warmup_steps": 715, "weight_decay": 0.1, "learning_rate": 0.0006 } }