Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pydantic import BaseModel, field_validator | |
| from typing import List, Optional, Dict, Any | |
| import numpy as np | |
| import random | |
| import json | |
| import spaces | |
| class BaselineRequest(BaseModel): | |
| task: str # "classification", "regression", "generation", "chess_moves" | |
| dataset_size: int | |
| output_format: str # "categorical", "continuous", "sequence" | |
| classes: Optional[List[str]] = None | |
| num_classes: Optional[int] = None | |
| sequence_length: Optional[int] = None | |
| target_distribution: Optional[Dict[str, float]] = None | |
| def size_must_be_positive(cls, v): | |
| if v <= 0: | |
| raise ValueError('Dataset size must be positive') | |
| return v | |
| class BaselineResponse(BaseModel): | |
| task: str | |
| baseline_type: str | |
| metrics: Dict[str, Any] | |
| sample_predictions: List[Any] | |
| reality_check: str | |
| advice: str | |
| def generate_random_classification(request: BaselineRequest): | |
| """Generate random classification baseline""" | |
| if request.classes: | |
| num_classes = len(request.classes) | |
| class_names = request.classes | |
| else: | |
| num_classes = request.num_classes or 2 | |
| class_names = [f"class_{i}" for i in range(num_classes)] | |
| # Ensure num_classes is not zero | |
| if num_classes == 0: | |
| num_classes = 1 | |
| class_names = ["default_class"] | |
| # Generate random predictions | |
| if request.target_distribution: | |
| # Use provided distribution | |
| weights = [request.target_distribution.get(cls, 1/num_classes) for cls in class_names] | |
| try: | |
| predictions = random.choices(class_names, weights=weights, k=request.dataset_size) | |
| except ValueError: # Handle all-zero weights | |
| predictions = [random.choice(class_names) for _ in range(request.dataset_size)] | |
| else: | |
| # Uniform random | |
| predictions = [random.choice(class_names) for _ in range(request.dataset_size)] | |
| # Calculate expected accuracy for uniform random | |
| expected_accuracy = 1 / num_classes | |
| return { | |
| "baseline_type": "uniform_random" if not request.target_distribution else "weighted_random", | |
| "metrics": { | |
| "expected_accuracy": round(expected_accuracy, 4), | |
| "expected_f1": round(expected_accuracy, 4), # Simplified for uniform case | |
| "num_classes": num_classes | |
| }, | |
| "sample_predictions": predictions[:10], | |
| "reality_check": f"Random guessing should get ~{expected_accuracy:.1%} accuracy. If your model doesn't beat this by a significant margin, it's probably garbage.", | |
| "advice": "Train a simple baseline (logistic regression, random forest) before going neural. Save yourself the GPU bills." | |
| } | |
| def generate_random_regression(request: BaselineRequest): | |
| """Generate random regression baseline""" | |
| # Generate random continuous values | |
| predictions = np.random.normal(0, 1, request.dataset_size) | |
| return { | |
| "baseline_type": "gaussian_random", | |
| "metrics": { | |
| "mean": round(float(np.mean(predictions)), 4), | |
| "std": round(float(np.std(predictions)), 4), | |
| "range": [round(float(np.min(predictions)), 4), round(float(np.max(predictions)), 4)] | |
| }, | |
| "sample_predictions": predictions[:10].tolist(), | |
| "reality_check": "Random regression predictions have infinite MSE against any reasonable target. If your model's MSE isn't dramatically better, you're wasting compute.", | |
| "advice": "Start with mean prediction baseline, then linear regression. Neural networks are overkill for most regression problems." | |
| } | |
| def generate_random_sequence(request: BaselineRequest): | |
| """Generate random sequence baseline (like text/chess moves)""" | |
| vocab_size = len(request.classes) if request.classes else 1000 | |
| if vocab_size == 0: # Handle empty vocab | |
| vocab_size = 1 | |
| seq_len = request.sequence_length or 50 | |
| sequences = [] | |
| for _ in range(min(10, request.dataset_size)): | |
| if request.classes: | |
| seq = [random.choice(request.classes) for _ in range(seq_len)] | |
| else: | |
| seq = [random.randint(0, vocab_size-1) for _ in range(seq_len)] | |
| sequences.append(seq) | |
| perplexity = vocab_size # Worst case perplexity for uniform random | |
| return { | |
| "baseline_type": "uniform_random_sequence", | |
| "metrics": { | |
| "perplexity": perplexity, | |
| "sequence_length": seq_len, | |
| "vocab_size": vocab_size | |
| }, | |
| "sample_predictions": sequences, | |
| "reality_check": f"Random sequences have perplexity ~{perplexity}. If your language model doesn't crush this, it learned nothing.", | |
| "advice": "Even a bigram model should destroy random baselines. If it doesn't, check your data preprocessing." | |
| } | |
| # Special handlers (from original app) | |
| TASK_HANDLERS = { | |
| "chess_moves": lambda req: generate_random_sequence(BaselineRequest( | |
| task="chess_moves", | |
| dataset_size=req.dataset_size, | |
| output_format="sequence", | |
| classes=["e4", "d4", "Nf3", "c4", "g3", "Nc3", "f4", "e3"], # Common opening moves | |
| sequence_length=1 | |
| )), | |
| "sentiment": lambda req: generate_random_classification(BaselineRequest( | |
| task="sentiment", | |
| dataset_size=req.dataset_size, | |
| output_format="categorical", | |
| classes=["positive", "negative", "neutral"] | |
| )), | |
| "image_classification": lambda req: generate_random_classification(BaselineRequest( | |
| task="image_classification", | |
| dataset_size=req.dataset_size, | |
| output_format="categorical", | |
| num_classes=req.num_classes or 1000 # ImageNet default | |
| )) | |
| } | |
| # Roast logic (from original app) | |
| ROASTS = [ | |
| "Your neural network is just an expensive random number generator.", | |
| "I bet your model's accuracy is 50.1% and you're calling it 'promising results'.", | |
| "Random guessing doesn't need 8 GPUs and a PhD to run.", | |
| "Your transformer probably learned to predict the dataset bias, not the actual task.", | |
| "If random baseline beats your model, maybe try a different career?", | |
| "Your model: 47% accuracy. Random baseline: 50%. Congratulations, you made it worse.", | |
| ] | |
| def get_roast(): | |
| """Get roasted for probably having a model worse than random""" | |
| return random.choice(ROASTS) | |
| def handle_classification(task_choice, dataset_size, num_classes, classes_str, dist_str): | |
| """Gradio handler for the classification tab""" | |
| try: | |
| # 1. Parse Inputs | |
| task_name = task_choice | |
| if task_choice == "image_classification (1000 class)": | |
| task_name = "image_classification" | |
| num_classes = 1000 # Override | |
| classes_list = [c.strip() for c in classes_str.split(',')] if classes_str else None | |
| target_dist = None | |
| if dist_str: | |
| try: | |
| target_dist = json.loads(dist_str) | |
| if not isinstance(target_dist, dict): | |
| raise ValueError("JSON must be an object/dictionary.") | |
| except json.JSONDecodeError as e: | |
| raise gr.Error(f"Invalid JSON in target distribution: {e}") | |
| except ValueError as e: | |
| raise gr.Error(str(e)) | |
| # 2. Build Request | |
| request = BaselineRequest( | |
| task=task_name, | |
| dataset_size=int(dataset_size), | |
| output_format="categorical", | |
| classes=classes_list, | |
| num_classes=int(num_classes) if num_classes else None, | |
| target_distribution=target_dist | |
| ) | |
| # 3. Get Result | |
| if request.task in TASK_HANDLERS: | |
| result = TASK_HANDLERS[request.task](request) | |
| else: # "custom" | |
| result = generate_random_classification(request) | |
| # 4. Format Output | |
| response = BaselineResponse(task=request.task, **result) | |
| return ( | |
| response.task, | |
| response.baseline_type, | |
| response.metrics, | |
| response.sample_predictions, | |
| response.reality_check, | |
| response.advice | |
| ) | |
| except Exception as e: | |
| raise gr.Error(str(e)) | |
| def handle_regression(dataset_size): | |
| """Gradio handler for the regression tab""" | |
| try: | |
| request = BaselineRequest( | |
| task="regression", | |
| dataset_size=int(dataset_size), | |
| output_format="continuous" | |
| ) | |
| result = generate_random_regression(request) | |
| response = BaselineResponse(task=request.task, **result) | |
| return ( | |
| response.task, | |
| response.baseline_type, | |
| response.metrics, | |
| response.sample_predictions, | |
| response.reality_check, | |
| response.advice | |
| ) | |
| except Exception as e: | |
| raise gr.Error(str(e)) | |
| def handle_sequence(task_choice, dataset_size, seq_len, vocab_str): | |
| """Gradio handler for the generation/sequence tab""" | |
| try: | |
| vocab_list = [c.strip() for c in vocab_str.split(',')] if vocab_str else None | |
| request = BaselineRequest( | |
| task=task_choice, | |
| dataset_size=int(dataset_size), | |
| output_format="sequence", | |
| classes=vocab_list, | |
| sequence_length=int(seq_len) if seq_len else 50 | |
| ) | |
| if request.task in TASK_HANDLERS: | |
| result = TASK_HANDLERS[request.task](request) | |
| else: # "custom" | |
| result = generate_random_sequence(request) | |
| response = BaselineResponse(task=request.task, **result) | |
| return ( | |
| response.task, | |
| response.baseline_type, | |
| response.metrics, | |
| response.sample_predictions, | |
| response.reality_check, | |
| response.advice | |
| ) | |
| except Exception as e: | |
| raise gr.Error(str(e)) | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Random Baseline API") as demo: | |
| gr.Markdown( | |
| """ | |
| # Random Baseline API | |
| **The most honest ML API in existence. Keeping researchers humble since 2025.** | |
| Get a random baseline for your ML task. Because sometimes you need to know how bad 'bad' really is. | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # --- Classification Tab --- | |
| with gr.TabItem("Classification"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| task_cls = gr.Radio( | |
| ["sentiment", "image_classification (1000 class)", "custom"], | |
| label="Task", | |
| value="sentiment" | |
| ) | |
| dataset_size_cls = gr.Number(label="Dataset Size", value=1000, minimum=1, step=1) | |
| # Custom options | |
| num_classes_cls = gr.Number( | |
| label="Number of Classes (if classes not specified)", | |
| value=10, | |
| visible=False, | |
| minimum=1, | |
| step=1 | |
| ) | |
| classes_cls = gr.Textbox( | |
| label="Comma-separated classes (e.g., cat,dog,fish)", | |
| visible=False, | |
| placeholder="cat, dog, fish" | |
| ) | |
| dist_cls = gr.Textbox( | |
| label='JSON target distribution (e.g., {"cat": 0.8})', | |
| visible=False, | |
| placeholder='{"cat": 0.8, "dog": 0.1, "fish": 0.1}' | |
| ) | |
| btn_cls = gr.Button("Get Classification Baseline", variant="primary") | |
| with gr.Column(scale=2): | |
| out_task_cls = gr.Textbox(label="Task", interactive=False) | |
| out_btype_cls = gr.Textbox(label="Baseline Type", interactive=False) | |
| out_metrics_cls = gr.JSON(label="Metrics") | |
| out_preds_cls = gr.JSON(label="Sample Predictions") | |
| out_reality_cls = gr.Textbox(label="Reality Check", lines=3, interactive=False) | |
| out_advice_cls = gr.Textbox(label="Advice", lines=3, interactive=False) | |
| # --- Regression Tab --- | |
| with gr.TabItem("Regression"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| dataset_size_reg = gr.Number(label="Dataset Size", value=1000, minimum=1, step=1) | |
| btn_reg = gr.Button("Get Regression Baseline", variant="primary") | |
| with gr.Column(scale=2): | |
| out_task_reg = gr.Textbox(label="Task", interactive=False) | |
| out_btype_reg = gr.Textbox(label="Baseline Type", interactive=False) | |
| out_metrics_reg = gr.JSON(label="Metrics") | |
| out_preds_reg = gr.JSON(label="Sample Predictions") | |
| out_reality_reg = gr.Textbox(label="Reality Check", lines=3, interactive=False) | |
| out_advice_reg = gr.Textbox(label="Advice", lines=3, interactive=False) | |
| # --- Generation/Sequence Tab --- | |
| with gr.TabItem("Generation / Sequence"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| task_seq = gr.Radio( | |
| ["chess_moves", "custom"], | |
| label="Task", | |
| value="chess_moves" | |
| ) | |
| dataset_size_seq = gr.Number(label="Dataset Size", value=1000, minimum=1, step=1) | |
| # Custom options | |
| seq_len_seq = gr.Number(label="Sequence Length", value=50, visible=False, minimum=1, step=1) | |
| vocab_seq = gr.Textbox( | |
| label="Comma-separated vocabulary (e.g., a,b,c)", | |
| visible=False, | |
| placeholder="a, b, c, <pad>, <eos>" | |
| ) | |
| btn_seq = gr.Button("Get Sequence Baseline", variant="primary") | |
| with gr.Column(scale=2): | |
| out_task_seq = gr.Textbox(label="Task", interactive=False) | |
| out_btype_seq = gr.Textbox(label="Baseline Type", interactive=False) | |
| out_metrics_seq = gr.JSON(label="Metrics") | |
| out_preds_seq = gr.JSON(label="Sample Predictions") | |
| out_reality_seq = gr.Textbox(label="Reality Check", lines=3, interactive=False) | |
| out_advice_seq = gr.Textbox(label="Advice", lines=3, interactive=False) | |
| # --- Roast Tab --- | |
| with gr.TabItem("Roast My Model"): | |
| gr.Markdown("Feeling too good about your model's 98% accuracy on a balanced dataset? Let us fix that.") | |
| btn_roast = gr.Button("Roast Me!", variant="stop") | |
| out_roast = gr.Textbox(label="Your Roast", lines=3, interactive=False) | |
| # --- UI Listeners --- | |
| def update_cls_ui(task): | |
| """Show/hide custom classification options""" | |
| if task == "custom": | |
| return { | |
| num_classes_cls: gr.update(visible=True, value=10), | |
| classes_cls: gr.update(visible=True), | |
| dist_cls: gr.update(visible=True) | |
| } | |
| elif task == "image_classification (1000 class)": | |
| return { | |
| num_classes_cls: gr.update(visible=False, value=1000), | |
| classes_cls: gr.update(visible=False), | |
| dist_cls: gr.update(visible=False) | |
| } | |
| else: # sentiment | |
| return { | |
| num_classes_cls: gr.update(visible=False), | |
| classes_cls: gr.update(visible=False), | |
| dist_cls: gr.update(visible=False) | |
| } | |
| task_cls.change( | |
| fn=update_cls_ui, | |
| inputs=task_cls, | |
| outputs=[num_classes_cls, classes_cls, dist_cls] | |
| ) | |
| def update_seq_ui(task): | |
| """Show/hide custom sequence options""" | |
| if task == "custom": | |
| return { | |
| seq_len_seq: gr.update(visible=True), | |
| vocab_seq: gr.update(visible=True) | |
| } | |
| else: # chess_moves | |
| return { | |
| seq_len_seq: gr.update(visible=False), | |
| vocab_seq: gr.update(visible=False) | |
| } | |
| task_seq.change( | |
| fn=update_seq_ui, | |
| inputs=task_seq, | |
| outputs=[seq_len_seq, vocab_seq] | |
| ) | |
| # Button click handlers | |
| cls_outputs = [out_task_cls, out_btype_cls, out_metrics_cls, out_preds_cls, out_reality_cls, out_advice_cls] | |
| btn_cls.click( | |
| fn=handle_classification, | |
| inputs=[task_cls, dataset_size_cls, num_classes_cls, classes_cls, dist_cls], | |
| outputs=cls_outputs | |
| ) | |
| reg_outputs = [out_task_reg, out_btype_reg, out_metrics_reg, out_preds_reg, out_reality_reg, out_advice_reg] | |
| btn_reg.click( | |
| fn=handle_regression, | |
| inputs=[dataset_size_reg], | |
| outputs=reg_outputs | |
| ) | |
| seq_outputs = [out_task_seq, out_btype_seq, out_metrics_seq, out_preds_seq, out_reality_seq, out_advice_seq] | |
| btn_seq.click( | |
| fn=handle_sequence, | |
| inputs=[task_seq, dataset_size_seq, seq_len_seq, vocab_seq], | |
| outputs=seq_outputs | |
| ) | |
| btn_roast.click(fn=get_roast, inputs=None, outputs=out_roast) | |
| if __name__ == "__main__": | |
| print("Starting Gradio app... Access it at http://127.0.0.1:7860 (or the URL shown below)") | |
| demo.launch() | |