Uploading biomarker-agnostic model for Neurodiagnoses

Browse files

Files changed (2) hide show

data/sample_data.csv +6 -0
run_pipeline.py +76 -0

data/sample_data.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+patient_id,age,sex,MMSE,plasma_biomarker_1,plasma_biomarker_2,MRI_region_1,MRI_region_2,clinical_score
+1,68,M,27,0.45,0.30,1200,1300,5
+2,73,F,25,0.50,0.35,1150,1280,7
+3,70,M,28,0.40,0.28,1220,1350,4
+4,65,F,30,0.38,0.25,1300,1400,3
+5,72,M,26,0.55,0.40,1180,1320,6

run_pipeline.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import pandas as pd
+def load_and_clean_data(file_path):
+    # Read the CSV file
+    data = pd.read_csv(file_path)
+    # Simple cleaning: For example, check for missing values
+    data = data.dropna()
+    return data
+def select_features(data):
+    # Detectar dinámicamente qué columnas pertenecen a cada categoría
+    feature_blocks = {
+        "clinical": [],
+        "plasma": [],
+        "MRI": [],
+        "PET": [],
+        "functional": []
+    }
+    for column in data.columns:
+        col_name = column.lower()
+        if "plasma" in col_name:
+            feature_blocks["plasma"].append(column)
+        elif "mri" in col_name:
+            feature_blocks["MRI"].append(column)
+        elif "pet" in col_name:
+            feature_blocks["PET"].append(column)
+        elif "clinical" in col_name or column in ["age", "sex", "education"]:
+            feature_blocks["clinical"].append(column)
+        elif "fmri" in col_name or "eeg" in col_name:
+            feature_blocks["functional"].append(column)
+    # Seleccionar automáticamente todas las variables detectadas como X (entrada del modelo)
+    X = data[feature_blocks["plasma"] + feature_blocks["MRI"] + feature_blocks["PET"] + feature_blocks["functional"]]
+    # Usamos la última columna como variable objetivo (y), asegurándonos de que no sea parte de X
+    y = data.iloc[:, -1]
+    return X, y
+def train_model(X, y):
+    from sklearn.linear_model import LinearRegression
+    model = LinearRegression()
+    model.fit(X, y)
+    return model
+def evaluate_model(model, X, y):
+    from sklearn.metrics import mean_squared_error, r2_score
+    predictions = model.predict(X)
+    mse = mean_squared_error(y, predictions)
+    r2 = r2_score(y, predictions)
+    print("Mean Squared Error:", mse)
+    print("R-squared:", r2)
+def run_pipeline():
+    # Update the file path to where you saved sample_data.csv
+    file_path = "data/sample_data.csv"
+    # Step 1: Load and clean data
+    data = load_and_clean_data(file_path)
+    print("Data Loaded:")
+    print(data.head())
+    # Step 2: Select features
+    X, y = select_features(data)
+    # Step 3: Train the model
+    model = train_model(X, y)
+    # Step 4: Evaluate the model
+    evaluate_model(model, X, y)
+if __name__ == "__main__":
+    run_pipeline()