Luigi commited on
Commit
0f1c73d
·
1 Parent(s): 15cfec8

Add standalone Python API

Browse files
Files changed (1) hide show
  1. api.py +226 -0
api.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+ import pickle
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from sklearn.preprocessing import PolynomialFeatures
8
+ import statsmodels.api as sm
9
+ import scipy.optimize as opt
10
+
11
+
12
+ def hex_to_int(x):
13
+ """
14
+ 安全地將十六進位字串(帶或不帶 '0x')轉換為整數,失敗時回傳 NaN。
15
+ """
16
+ try:
17
+ return int(str(x).strip(), 16)
18
+ except Exception:
19
+ return np.nan
20
+
21
+
22
+ def load_data(file_path: str) -> pd.DataFrame:
23
+ """
24
+ 載入校正資料,支援 Excel(.xlsx/.xls) 或 CSV 檔案。
25
+
26
+ 回傳欄位:
27
+ - Device: 裝置名稱
28
+ - power_hex, er_hex: 設定值 (十六進位字串)
29
+ - power_dec, er_dec: 設定值 (十進位整數)
30
+ - power_meas, er_meas: 實際量測值 (浮點)
31
+ """
32
+ ext = os.path.splitext(file_path)[1].lower()
33
+
34
+ if ext in ('.xlsx', '.xls'):
35
+ xls = pd.ExcelFile(file_path)
36
+ all_rows = []
37
+
38
+ # 只處理工作表名稱以 'T3' 開頭的分頁
39
+ for sheet in xls.sheet_names:
40
+ if not sheet.startswith('T3'):
41
+ continue
42
+
43
+ df = pd.read_excel(xls, sheet_name=sheet, header=None)
44
+ # 組合前兩列成為正確欄位名稱
45
+ header0 = df.iloc[1].ffill()
46
+ header1 = df.iloc[2].fillna('')
47
+ cols = [f"{a} {b}".strip() if b else str(a).strip() for a, b in zip(header0, header1)]
48
+ df.columns = cols
49
+
50
+ raw = df.iloc[3:][['Setting Power', 'Setting ER', 'EA-4000 Power', 'EA-4000 ER']].copy()
51
+ raw['Setting Power'] = raw['Setting Power'].ffill()
52
+
53
+ # 轉換並命名欄位
54
+ raw['power_hex'] = raw['Setting Power']
55
+ raw['er_hex'] = raw['Setting ER']
56
+ raw['power_dec'] = raw['power_hex'].apply(hex_to_int)
57
+ raw['er_dec'] = raw['er_hex'].apply(hex_to_int)
58
+ raw['power_meas'] = pd.to_numeric(raw['EA-4000 Power'], errors='coerce')
59
+ raw['er_meas'] = pd.to_numeric(raw['EA-4000 ER'], errors='coerce')
60
+ raw['Device'] = sheet
61
+
62
+ # 篩選有效資料
63
+ valid = raw[raw['power_meas'].notna()]
64
+ all_rows.append(valid[['Device','power_hex','er_hex','power_dec','er_dec','power_meas','er_meas']])
65
+
66
+ if not all_rows:
67
+ raise ValueError("找不到有效的 'T3' 分頁。")
68
+
69
+ return pd.concat(all_rows, ignore_index=True)
70
+
71
+ elif ext == '.csv':
72
+ df = pd.read_csv(file_path, quoting=csv.QUOTE_ALL, escapechar='\\')
73
+ required_cols = {'Device','power_hex','er_hex','power_dec','er_dec','power_meas','er_meas'}
74
+ missing = required_cols - set(df.columns)
75
+ if missing:
76
+ raise ValueError(f"CSV 檔缺少必要欄位: {missing}")
77
+
78
+ # 轉換 hex 與強制類型
79
+ df = df.copy()
80
+ df['power_dec'] = df['power_hex'].apply(hex_to_int)
81
+ df['er_dec'] = df['er_hex'].apply(hex_to_int)
82
+ df['power_meas'] = pd.to_numeric(df['power_meas'], errors='coerce')
83
+ df['er_meas'] = pd.to_numeric(df['er_meas'], errors='coerce')
84
+ return df
85
+
86
+ else:
87
+ raise ValueError(f"不支援的檔案格式: {ext}")
88
+
89
+
90
+ def train_model(data_df: pd.DataFrame, model_file: str) -> dict:
91
+ """
92
+ 訓練二階回應面模型(Response Surface Model)。
93
+
94
+ 輸入:
95
+ data_df: 校正資料 DataFrame
96
+ model_file: 模型輸出檔路徑 (.pkl)
97
+
98
+ 輸出:
99
+ r2_power, rmse_power, r2_er, rmse_er 四項效能指標。
100
+ """
101
+ # 特徵與目標
102
+ X = data_df[['power_dec','er_dec']].values
103
+ y_p = data_df['power_meas'].values
104
+ y_e = data_df['er_meas'].values
105
+ groups = data_df['Device']
106
+
107
+ # 產生二階多項式特徵
108
+ poly = PolynomialFeatures(degree=2, include_bias=True)
109
+ Xp = poly.fit_transform(X)
110
+
111
+ # 混合效果模型擬合
112
+ model_power = sm.MixedLM(endog=y_p, exog=Xp, groups=groups).fit()
113
+ model_er = sm.MixedLM(endog=y_e, exog=Xp, groups=groups).fit()
114
+
115
+ # 計算 R² 與 RMSE
116
+ pred_p = model_power.fittedvalues
117
+ pred_e = model_er.fittedvalues
118
+ r2p = 1 - np.sum((y_p - pred_p)**2) / np.sum((y_p - y_p.mean())**2)
119
+ r2e = 1 - np.sum((y_e - pred_e)**2) / np.sum((y_e - y_e.mean())**2)
120
+ rmse_p = float(np.sqrt(np.mean((y_p - pred_p)**2)))
121
+ rmse_e = float(np.sqrt(np.mean((y_e - pred_e)**2)))
122
+
123
+ # 資料範圍,用於後續預測時的邊界
124
+ bounds = {
125
+ 'p_min': int(data_df['power_dec'].min()),
126
+ 'p_max': int(data_df['power_dec'].max()),
127
+ 'e_min': int(data_df['er_dec'].min()),
128
+ 'e_max': int(data_df['er_dec'].max()),
129
+ }
130
+
131
+ # 儲存模型物件
132
+ model_dict = {'poly': poly, 'model_power': model_power, 'model_er': model_er, 'bounds': bounds}
133
+ with open(model_file, 'wb') as f:
134
+ pickle.dump(model_dict, f)
135
+
136
+ return {
137
+ 'r2_power': r2p, 'rmse_power': rmse_p,
138
+ 'r2_er': r2e, 'rmse_er': rmse_e
139
+ }
140
+
141
+
142
+ def calibrate_and_predict(
143
+ calib_df: pd.DataFrame,
144
+ target_power: float,
145
+ target_er: float,
146
+ model_file: str
147
+ ) -> dict:
148
+ """
149
+ 根據已訓練模型,針對目標 Power/ER 預測最佳十六進位設定值。
150
+
151
+ 輸入:
152
+ calib_df: 最多 N 筆校正樣本 (含 hex 與量測值)
153
+ target_power: 目標功率 (十進位浮點)
154
+ target_er: 目標 ER 值 (十進位浮點)
155
+ model_file: 訓練後模型檔 (.pkl)
156
+
157
+ 回傳:
158
+ {'Power Setting (hex)': ..., 'ER Setting (hex)': ...}
159
+ """
160
+ # 載入模型
161
+ with open(model_file, 'rb') as f:
162
+ md = pickle.load(f)
163
+ poly = md['poly']
164
+ model_power = md['model_power']
165
+ model_er = md['model_er']
166
+ b = md['bounds']
167
+
168
+ # 擷取校正樣本並計算偏移量
169
+ samples = []
170
+ for _, row in calib_df.iterrows():
171
+ p_hex = hex_to_int(row.get('power_hex', None))
172
+ e_hex = hex_to_int(row.get('er_hex', None))
173
+ pm = pd.to_numeric(row.get('power_meas', None), errors='coerce')
174
+ em = pd.to_numeric(row.get('er_meas', None), errors='coerce')
175
+ if not np.isnan(p_hex) and not np.isnan(e_hex) and not np.isnan(pm) and not np.isnan(em):
176
+ samples.append((p_hex, e_hex, pm, em))
177
+
178
+ if samples:
179
+ Xc = np.array([[p,e] for p,e,_,_ in samples])
180
+ Xcp = poly.transform(Xc)
181
+ pred_p = model_power.predict(exog=Xcp)
182
+ pred_e = model_er.predict(exog=Xcp)
183
+ # 平均偏移
184
+ offset_p = float(np.mean([pm - p for (_,_,pm,_), p in zip(samples, pred_p)]))
185
+ offset_e = float(np.mean([em - e for (_,_,_,em), e in zip(samples, pred_e)]))
186
+ else:
187
+ offset_p = offset_e = 0.0
188
+
189
+ # 最小化目標函數: (預測值 - 目標值)^2
190
+ def objective(vars):
191
+ x = np.array(vars).reshape(1, -1)
192
+ xp = poly.transform(x)
193
+ p0 = model_power.predict(exog=xp)[0] + offset_p
194
+ e0 = model_er.predict(exog=xp)[0] + offset_e
195
+ return (p0 - target_power)**2 + (e0 - target_er)**2
196
+
197
+ # 有界優化
198
+ res = opt.minimize(
199
+ objective,
200
+ x0=[(b['p_min']+b['p_max'])/2, (b['e_min']+b['e_max'])/2],
201
+ bounds=[(b['p_min'], b['p_max']), (b['e_min'], b['e_max'])]
202
+ )
203
+ ph, eh = map(int, np.round(res.x))
204
+
205
+ return {
206
+ 'Power Setting (hex)': hex(ph),
207
+ 'ER Setting (hex)' : hex(eh)
208
+ }
209
+
210
+
211
+ if __name__ == '__main__':
212
+ # 範例用法
213
+ df = load_data('path/to/your_training_file.xlsx')
214
+ print(f'載入 {len(df)} 筆校正資料')
215
+
216
+ metrics = train_model(df, 'calibration_model.pkl')
217
+ print('訓練完成,效能:')
218
+ print(f" Power → R²={metrics['r2_power']:.3f}, RMSE={metrics['rmse_power']:.3f}")
219
+ print(f" ER → R²={metrics['r2_er']:.3f}, RMSE={metrics['rmse_er']:.3f}")
220
+
221
+ sample_df = pd.DataFrame([
222
+ {'power_hex':'0x1A','er_hex':'0x0F','power_meas':3.2,'er_meas':11.5},
223
+ {'power_hex':'0x2B','er_hex':'0x1C','power_meas':4.8,'er_meas':13.0},
224
+ ])
225
+ result = calibrate_and_predict(sample_df, target_power=2.5, target_er=12.75, model_file='calibration_model.pkl')
226
+ print('預測設定值:', result)