Update model architecture: d_ff=1024, new weights from merged7.pt
Browse files- modeling_chessbot.py +6 -4
modeling_chessbot.py
CHANGED
@@ -418,14 +418,16 @@ class AbsolutePositionalEncoder(nn.Module):
|
|
418 |
def __init__(self, d_model):
|
419 |
super(AbsolutePositionalEncoder, self).__init__()
|
420 |
self.d_model = d_model
|
421 |
-
|
|
|
422 |
|
423 |
positional_encoding = torch.zeros(1, 64, d_model)
|
424 |
_2i = torch.arange(0, d_model, step=2).float()
|
425 |
-
positional_encoding[:, :, 0::2] = torch.sin(
|
426 |
-
positional_encoding[:, :, 1::2] = torch.cos(
|
427 |
|
428 |
-
|
|
|
429 |
|
430 |
def forward(self, x):
|
431 |
batch_size, _, _ = x.size()
|
|
|
418 |
def __init__(self, d_model):
|
419 |
super(AbsolutePositionalEncoder, self).__init__()
|
420 |
self.d_model = d_model
|
421 |
+
# Don't register as buffers since these are computed values
|
422 |
+
position = torch.arange(64).unsqueeze(1).float()
|
423 |
|
424 |
positional_encoding = torch.zeros(1, 64, d_model)
|
425 |
_2i = torch.arange(0, d_model, step=2).float()
|
426 |
+
positional_encoding[:, :, 0::2] = torch.sin(position / (10000 ** (_2i / d_model)))
|
427 |
+
positional_encoding[:, :, 1::2] = torch.cos(position / (10000 ** (_2i / d_model)))
|
428 |
|
429 |
+
# Register as non-persistent buffer (won't be saved/loaded)
|
430 |
+
self.register_buffer('positional_encoding', positional_encoding, persistent=False)
|
431 |
|
432 |
def forward(self, x):
|
433 |
batch_size, _, _ = x.size()
|