import gradio as gr
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import traceback
import logging
import torch

# --- logging setup so errors appear in HF logs ---
logging.basicConfig(level=logging.ERROR)

# --- Model paths ---
model_names = [
    "ActiveYixiao/roberta-large-ToM0",
    "ActiveYixiao/roberta-large-ToM1", 
    "ActiveYixiao/roberta-large-ToM2",
    "ActiveYixiao/roberta-large-ToM3",
    "ActiveYixiao/roberta-large-ToM4",
    "ActiveYixiao/roberta-large-ToM6",
    "ActiveYixiao/roberta-large-ToM7",
    "ActiveYixiao/roberta-large-ToM8",
    "ActiveYixiao/roberta-large-ToM9",
]

def classify_csv(file, progress=gr.Progress()):
    try:
        # Load input CSV
        try:
            df = pd.read_csv(file, encoding="utf-8")
        except UnicodeDecodeError:
            df = pd.read_csv(file, encoding="latin1")

        # Clean the file and remove empty rows
        df = df.dropna(how="all")
        df = df[~(df.astype(str).apply(lambda x: x.str.strip() == '').all(axis=1))]
        
        # Reset index after cleaning
        df = df.reset_index(drop=True)
        
        # Verify required columns exist
        required_columns = ["test-1", "test-2", "test-3", "test-4", "test-5", "test-6", "test-7", "test-8", "test-9"]
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            raise ValueError(f"Missing columns: {missing_columns} in input CSV")
        
        # Prepare output data
        output_data = {}
        output_data['ID'] = df.iloc[:, 0]  # First column as ID
        
        total_steps = len(model_names) * len(df)
        current_step = 0
        
        for i, model_name in enumerate(model_names):
            test_col = f"test-{i+1}"
            score_col = f"score-{i+1}"

            output_data[test_col] = df[test_col]

            progress((i / len(model_names)), f"Loading model {i+1}/{len(model_names)}")

            try:
                # Load model and tokenizer
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForSequenceClassification.from_pretrained(model_name)
                
                # Use device mapping for better memory management
                device = 0 if torch.cuda.is_available() else -1
                clf = pipeline(
                    "text-classification", 
                    model=model, 
                    tokenizer=tokenizer, 
                    return_all_scores=False,
                    device=device
                )

                preds = []
                for j, text in enumerate(df[test_col].astype(str).fillna("")):
                    current_step += 1
                    progress((current_step / total_steps), f"Processing {test_col}, row {j+1}/{len(df)}")
                    
                    try:
                        result = clf(text)[0]
                        label = result["label"]
                        # More robust label checking
                        if label in ["LABEL_1", "1", "positive", "POSITIVE"]:
                            preds.append(1)
                        else:
                            preds.append(0)
                    except Exception as e:
                        logging.error(f"Error processing row {j+1} in {test_col}: {str(e)}")
                        preds.append(0)

                output_data[score_col] = preds

            except Exception as e:
                logging.error(f"Error loading model {model_name}: {str(e)}")
                # Fill with zeros if model fails
                output_data[score_col] = [0] * len(df)
            
            finally:
                # Clean up to free memory
                if 'clf' in locals():
                    del clf
                if 'model' in locals():
                    del model
                if 'tokenizer' in locals():
                    del tokenizer
                torch.cuda.empty_cache() if torch.cuda.is_available() else None

        out_df = pd.DataFrame(output_data)
        out_path = "classified_output.csv"
        out_df.to_csv(out_path, index=False)

        progress(1, "Done! File ready for download.")
        return out_path, None

    except Exception as e:
        err_msg = f"❌ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
        logging.error(err_msg)
        return None, err_msg

# Create example input for users to see
demo = gr.Interface(
    fn=classify_csv,
    inputs=gr.File(file_types=[".csv"], label="Upload CSV"),
    outputs=[
        gr.File(label="Download Scored CSV"),
        gr.Textbox(label="Error Message", lines=5)
    ],
    title="Mind-Reading Response Classification System (9 Test Items)",
    description=(
        "### 📋 Thank-you for accessing the online marking system for the Birmingham Advanced Mindreading Stories. "
        "Please follow the instructions below to prepare your dataset and then click ‘Submit’ to have your participant responses marked by our fine-tuned language models.\n\n"
        "Upload Instructions\n"
        "- The file must be a **CSV** with exactly **10 columns**\n"
        "- The **first column** must contain participant IDs\n"
        "- The **next 9 columns** must be named `test-1`, `test-2`, ..., `test-9`\n\n"
        "**Example format:**\n"
        "<pre style='background-color:#f8f9fa;border:1px solid #ccc;border-radius:8px;padding:10px;font-family:monospace;overflow-x:auto;white-space:pre;'>"
        "ID,test-1,test-2,test-3,test-4,test-5,test-6,test-7,test-8,test-9\n"
        "1,response,response,response,response,response,response,response,response,response\n"
        "2,response,response,response,response,response,response,response,response,response\n"
        "3,response,response,response,response,response,response,response,response,response\n"
        "...\n"
        "N,response,response,response,response,response,response,response,response,response"
        "</pre>"
    ),
    article=(
        """
        <p>The output CSV will include the original test responses plus <code>score-1</code> ... <code>score-9</code> columns.</p>
        <hr>
        <p><strong>Reference:</strong></p>
        <pre style="background-color:#f8f9fa;border:1px solid #ccc;border-radius:8px;padding:10px;font-family:monospace;overflow-x:auto;white-space:pre;">
        @inproceedings{wang2025automatic,
        title={Automatic Scoring of an Open-Response Measure of Advanced Mind-Reading Using Large Language Models},
          author={Wang, Yixiao and Dsouza, Russel and Lee, Robert and Apperly, Ian and Devine, Rory T and van der Kleij, Sanne W and Lee, Mark},
          booktitle={The 10th Workshop on Computational Linguistics and Clinical Psychology},
          pages={79},
          year={2025}
        }
        </pre>
        """
    )
)

if __name__ == "__main__":
    demo.launch(share=True)