Spaces:
Sleeping
Sleeping
| model: | |
| name: "answerdotai/ModernBERT-base" | |
| loss_function: | |
| name: "SentimentWeightedLoss" # Options: "SentimentWeightedLoss", "SentimentFocalLoss" | |
| # Parameters for the chosen loss function. | |
| # For SentimentFocalLoss, common params are: | |
| # gamma_focal: 1.0 # (e.g., 2.0 for standard, -2.0 for reversed, 0 for none) | |
| # label_smoothing_epsilon: 0.05 # (e.g., 0.0 to 0.1) | |
| # For SentimentWeightedLoss, params is empty: | |
| params: | |
| gamma_focal: 1.0 | |
| label_smoothing_epsilon: 0.05 | |
| output_dir: "checkpoints" | |
| max_length: 880 # 256 | |
| dropout: 0.1 | |
| # --- Pooling Strategy --- # | |
| # Options: "cls", "mean", "cls_mean_concat", "weighted_layer", "cls_weighted_concat" | |
| # "cls" uses just the [CLS] token for classification | |
| # "mean" uses mean pooling over final hidden states for classification | |
| # "cls_mean_concat" uses both [CLS] and mean pooling over final hidden states for classification | |
| # "weighted_layer" uses a weighted combination of the final hidden states from the top N layers for classification | |
| # "cls_weighted_concat" uses a weighted combination of the final hidden states from the top N layers and the [CLS] token for classification | |
| pooling_strategy: "mean" # Current default, change as needed | |
| num_weighted_layers: 6 # Number of top BERT layers to use for 'weighted_layer' strategies (e.g., 1 to 12 for BERT-base) | |
| data: | |
| # No specific data paths needed as we use HF datasets at the moment | |
| training: | |
| epochs: 6 | |
| batch_size: 16 | |
| lr: 1e-5 # 1e-5 # 2.0e-5 | |
| weight_decay_rate: 0.02 # 0.01 | |
| resume_from_checkpoint: "" # "checkpoints/mean_epoch2_0.9361acc_0.9355f1.pt" # Path to checkpoint file, or empty to not resume | |
| inference: | |
| # Default path, can be overridden | |
| model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt" | |
| # Using the same max_length as training for consistency | |
| max_length: 880 # 256 | |
| # "answerdotai/ModernBERT-base" | |
| # "answerdotai/ModernBERT-large" |