ManMenGon commited on
Commit
58bb819
1 Parent(s): afdaffd

Uploading biomarker-agnostic model for Neurodiagnoses

Browse files
Files changed (2) hide show
  1. data/sample_data.csv +6 -0
  2. run_pipeline.py +76 -0
data/sample_data.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ patient_id,age,sex,MMSE,plasma_biomarker_1,plasma_biomarker_2,MRI_region_1,MRI_region_2,clinical_score
2
+ 1,68,M,27,0.45,0.30,1200,1300,5
3
+ 2,73,F,25,0.50,0.35,1150,1280,7
4
+ 3,70,M,28,0.40,0.28,1220,1350,4
5
+ 4,65,F,30,0.38,0.25,1300,1400,3
6
+ 5,72,M,26,0.55,0.40,1180,1320,6
run_pipeline.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def load_and_clean_data(file_path):
4
+ # Read the CSV file
5
+ data = pd.read_csv(file_path)
6
+
7
+ # Simple cleaning: For example, check for missing values
8
+ data = data.dropna()
9
+
10
+ return data
11
+
12
+ def select_features(data):
13
+ # Detectar din谩micamente qu茅 columnas pertenecen a cada categor铆a
14
+ feature_blocks = {
15
+ "clinical": [],
16
+ "plasma": [],
17
+ "MRI": [],
18
+ "PET": [],
19
+ "functional": []
20
+ }
21
+
22
+ for column in data.columns:
23
+ col_name = column.lower()
24
+ if "plasma" in col_name:
25
+ feature_blocks["plasma"].append(column)
26
+ elif "mri" in col_name:
27
+ feature_blocks["MRI"].append(column)
28
+ elif "pet" in col_name:
29
+ feature_blocks["PET"].append(column)
30
+ elif "clinical" in col_name or column in ["age", "sex", "education"]:
31
+ feature_blocks["clinical"].append(column)
32
+ elif "fmri" in col_name or "eeg" in col_name:
33
+ feature_blocks["functional"].append(column)
34
+
35
+ # Seleccionar autom谩ticamente todas las variables detectadas como X (entrada del modelo)
36
+ X = data[feature_blocks["plasma"] + feature_blocks["MRI"] + feature_blocks["PET"] + feature_blocks["functional"]]
37
+
38
+ # Usamos la 煤ltima columna como variable objetivo (y), asegur谩ndonos de que no sea parte de X
39
+ y = data.iloc[:, -1]
40
+
41
+ return X, y
42
+
43
+ def train_model(X, y):
44
+ from sklearn.linear_model import LinearRegression
45
+ model = LinearRegression()
46
+ model.fit(X, y)
47
+ return model
48
+
49
+ def evaluate_model(model, X, y):
50
+ from sklearn.metrics import mean_squared_error, r2_score
51
+ predictions = model.predict(X)
52
+ mse = mean_squared_error(y, predictions)
53
+ r2 = r2_score(y, predictions)
54
+ print("Mean Squared Error:", mse)
55
+ print("R-squared:", r2)
56
+
57
+ def run_pipeline():
58
+ # Update the file path to where you saved sample_data.csv
59
+ file_path = "data/sample_data.csv"
60
+
61
+ # Step 1: Load and clean data
62
+ data = load_and_clean_data(file_path)
63
+ print("Data Loaded:")
64
+ print(data.head())
65
+
66
+ # Step 2: Select features
67
+ X, y = select_features(data)
68
+
69
+ # Step 3: Train the model
70
+ model = train_model(X, y)
71
+
72
+ # Step 4: Evaluate the model
73
+ evaluate_model(model, X, y)
74
+
75
+ if __name__ == "__main__":
76
+ run_pipeline()