Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,12 +38,13 @@ def estimate_transformer_stats(batch_size, seq_len, num_layers, hidden_dim, voca
|
|
| 38 |
]
|
| 39 |
|
| 40 |
if show_breakdown:
|
|
|
|
| 41 |
output_lines.append("\nComponent-wise totals across training batch:")
|
| 42 |
-
output_lines.append(f" - QKV Projections: {attn_proj_flops * B * L:.2e}")
|
| 43 |
-
output_lines.append(f" - Attention Scores: {attn_score_flops * B * L:.2e}")
|
| 44 |
-
output_lines.append(f" - Attention Output: {attn_out_proj_flops * B * L:.2e}")
|
| 45 |
-
output_lines.append(f" - FFN: {ffn_flops * B * L:.2e}")
|
| 46 |
-
output_lines.append(f" - Logits: {logit_flops * B * L:.2e}")
|
| 47 |
|
| 48 |
return "\n".join(output_lines)
|
| 49 |
|
|
|
|
| 38 |
]
|
| 39 |
|
| 40 |
if show_breakdown:
|
| 41 |
+
total_all = attn_proj_flops + attn_score_flops + attn_out_proj_flops + ffn_flops + logit_flops
|
| 42 |
output_lines.append("\nComponent-wise totals across training batch:")
|
| 43 |
+
output_lines.append(f" - QKV Projections: {attn_proj_flops * B * L:.2e} ({100 * attn_proj_flops / total_all:.1f}%)")
|
| 44 |
+
output_lines.append(f" - Attention Scores: {attn_score_flops * B * L:.2e} ({100 * attn_score_flops / total_all:.1f}%)")
|
| 45 |
+
output_lines.append(f" - Attention Output: {attn_out_proj_flops * B * L:.2e} ({100 * attn_out_proj_flops / total_all:.1f}%)")
|
| 46 |
+
output_lines.append(f" - FFN: {ffn_flops * B * L:.2e} ({100 * ffn_flops / total_all:.1f}%)")
|
| 47 |
+
output_lines.append(f" - Logits: {logit_flops * B * L:.2e} ({100 * logit_flops / total_all:.1f}%)")
|
| 48 |
|
| 49 |
return "\n".join(output_lines)
|
| 50 |
|