Add standalone Python API
Browse files
api.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import csv
|
3 |
+
import pickle
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import pandas as pd
|
7 |
+
from sklearn.preprocessing import PolynomialFeatures
|
8 |
+
import statsmodels.api as sm
|
9 |
+
import scipy.optimize as opt
|
10 |
+
|
11 |
+
|
12 |
+
def hex_to_int(x):
|
13 |
+
"""
|
14 |
+
安全地將十六進位字串(帶或不帶 '0x')轉換為整數,失敗時回傳 NaN。
|
15 |
+
"""
|
16 |
+
try:
|
17 |
+
return int(str(x).strip(), 16)
|
18 |
+
except Exception:
|
19 |
+
return np.nan
|
20 |
+
|
21 |
+
|
22 |
+
def load_data(file_path: str) -> pd.DataFrame:
|
23 |
+
"""
|
24 |
+
載入校正資料,支援 Excel(.xlsx/.xls) 或 CSV 檔案。
|
25 |
+
|
26 |
+
回傳欄位:
|
27 |
+
- Device: 裝置名稱
|
28 |
+
- power_hex, er_hex: 設定值 (十六進位字串)
|
29 |
+
- power_dec, er_dec: 設定值 (十進位整數)
|
30 |
+
- power_meas, er_meas: 實際量測值 (浮點)
|
31 |
+
"""
|
32 |
+
ext = os.path.splitext(file_path)[1].lower()
|
33 |
+
|
34 |
+
if ext in ('.xlsx', '.xls'):
|
35 |
+
xls = pd.ExcelFile(file_path)
|
36 |
+
all_rows = []
|
37 |
+
|
38 |
+
# 只處理工作表名稱以 'T3' 開頭的分頁
|
39 |
+
for sheet in xls.sheet_names:
|
40 |
+
if not sheet.startswith('T3'):
|
41 |
+
continue
|
42 |
+
|
43 |
+
df = pd.read_excel(xls, sheet_name=sheet, header=None)
|
44 |
+
# 組合前兩列成為正確欄位名稱
|
45 |
+
header0 = df.iloc[1].ffill()
|
46 |
+
header1 = df.iloc[2].fillna('')
|
47 |
+
cols = [f"{a} {b}".strip() if b else str(a).strip() for a, b in zip(header0, header1)]
|
48 |
+
df.columns = cols
|
49 |
+
|
50 |
+
raw = df.iloc[3:][['Setting Power', 'Setting ER', 'EA-4000 Power', 'EA-4000 ER']].copy()
|
51 |
+
raw['Setting Power'] = raw['Setting Power'].ffill()
|
52 |
+
|
53 |
+
# 轉換並命名欄位
|
54 |
+
raw['power_hex'] = raw['Setting Power']
|
55 |
+
raw['er_hex'] = raw['Setting ER']
|
56 |
+
raw['power_dec'] = raw['power_hex'].apply(hex_to_int)
|
57 |
+
raw['er_dec'] = raw['er_hex'].apply(hex_to_int)
|
58 |
+
raw['power_meas'] = pd.to_numeric(raw['EA-4000 Power'], errors='coerce')
|
59 |
+
raw['er_meas'] = pd.to_numeric(raw['EA-4000 ER'], errors='coerce')
|
60 |
+
raw['Device'] = sheet
|
61 |
+
|
62 |
+
# 篩選有效資料
|
63 |
+
valid = raw[raw['power_meas'].notna()]
|
64 |
+
all_rows.append(valid[['Device','power_hex','er_hex','power_dec','er_dec','power_meas','er_meas']])
|
65 |
+
|
66 |
+
if not all_rows:
|
67 |
+
raise ValueError("找不到有效的 'T3' 分頁。")
|
68 |
+
|
69 |
+
return pd.concat(all_rows, ignore_index=True)
|
70 |
+
|
71 |
+
elif ext == '.csv':
|
72 |
+
df = pd.read_csv(file_path, quoting=csv.QUOTE_ALL, escapechar='\\')
|
73 |
+
required_cols = {'Device','power_hex','er_hex','power_dec','er_dec','power_meas','er_meas'}
|
74 |
+
missing = required_cols - set(df.columns)
|
75 |
+
if missing:
|
76 |
+
raise ValueError(f"CSV 檔缺少必要欄位: {missing}")
|
77 |
+
|
78 |
+
# 轉換 hex 與強制類型
|
79 |
+
df = df.copy()
|
80 |
+
df['power_dec'] = df['power_hex'].apply(hex_to_int)
|
81 |
+
df['er_dec'] = df['er_hex'].apply(hex_to_int)
|
82 |
+
df['power_meas'] = pd.to_numeric(df['power_meas'], errors='coerce')
|
83 |
+
df['er_meas'] = pd.to_numeric(df['er_meas'], errors='coerce')
|
84 |
+
return df
|
85 |
+
|
86 |
+
else:
|
87 |
+
raise ValueError(f"不支援的檔案格式: {ext}")
|
88 |
+
|
89 |
+
|
90 |
+
def train_model(data_df: pd.DataFrame, model_file: str) -> dict:
|
91 |
+
"""
|
92 |
+
訓練二階回應面模型(Response Surface Model)。
|
93 |
+
|
94 |
+
輸入:
|
95 |
+
data_df: 校正資料 DataFrame
|
96 |
+
model_file: 模型輸出檔路徑 (.pkl)
|
97 |
+
|
98 |
+
輸出:
|
99 |
+
r2_power, rmse_power, r2_er, rmse_er 四項效能指標。
|
100 |
+
"""
|
101 |
+
# 特徵與目標
|
102 |
+
X = data_df[['power_dec','er_dec']].values
|
103 |
+
y_p = data_df['power_meas'].values
|
104 |
+
y_e = data_df['er_meas'].values
|
105 |
+
groups = data_df['Device']
|
106 |
+
|
107 |
+
# 產生二階多項式特徵
|
108 |
+
poly = PolynomialFeatures(degree=2, include_bias=True)
|
109 |
+
Xp = poly.fit_transform(X)
|
110 |
+
|
111 |
+
# 混合效果模型擬合
|
112 |
+
model_power = sm.MixedLM(endog=y_p, exog=Xp, groups=groups).fit()
|
113 |
+
model_er = sm.MixedLM(endog=y_e, exog=Xp, groups=groups).fit()
|
114 |
+
|
115 |
+
# 計算 R² 與 RMSE
|
116 |
+
pred_p = model_power.fittedvalues
|
117 |
+
pred_e = model_er.fittedvalues
|
118 |
+
r2p = 1 - np.sum((y_p - pred_p)**2) / np.sum((y_p - y_p.mean())**2)
|
119 |
+
r2e = 1 - np.sum((y_e - pred_e)**2) / np.sum((y_e - y_e.mean())**2)
|
120 |
+
rmse_p = float(np.sqrt(np.mean((y_p - pred_p)**2)))
|
121 |
+
rmse_e = float(np.sqrt(np.mean((y_e - pred_e)**2)))
|
122 |
+
|
123 |
+
# 資料範圍,用於後續預測時的邊界
|
124 |
+
bounds = {
|
125 |
+
'p_min': int(data_df['power_dec'].min()),
|
126 |
+
'p_max': int(data_df['power_dec'].max()),
|
127 |
+
'e_min': int(data_df['er_dec'].min()),
|
128 |
+
'e_max': int(data_df['er_dec'].max()),
|
129 |
+
}
|
130 |
+
|
131 |
+
# 儲存模型物件
|
132 |
+
model_dict = {'poly': poly, 'model_power': model_power, 'model_er': model_er, 'bounds': bounds}
|
133 |
+
with open(model_file, 'wb') as f:
|
134 |
+
pickle.dump(model_dict, f)
|
135 |
+
|
136 |
+
return {
|
137 |
+
'r2_power': r2p, 'rmse_power': rmse_p,
|
138 |
+
'r2_er': r2e, 'rmse_er': rmse_e
|
139 |
+
}
|
140 |
+
|
141 |
+
|
142 |
+
def calibrate_and_predict(
|
143 |
+
calib_df: pd.DataFrame,
|
144 |
+
target_power: float,
|
145 |
+
target_er: float,
|
146 |
+
model_file: str
|
147 |
+
) -> dict:
|
148 |
+
"""
|
149 |
+
根據已訓練模型,針對目標 Power/ER 預測最佳十六進位設定值。
|
150 |
+
|
151 |
+
輸入:
|
152 |
+
calib_df: 最多 N 筆校正樣本 (含 hex 與量測值)
|
153 |
+
target_power: 目標功率 (十進位浮點)
|
154 |
+
target_er: 目標 ER 值 (十進位浮點)
|
155 |
+
model_file: 訓練後模型檔 (.pkl)
|
156 |
+
|
157 |
+
回傳:
|
158 |
+
{'Power Setting (hex)': ..., 'ER Setting (hex)': ...}
|
159 |
+
"""
|
160 |
+
# 載入模型
|
161 |
+
with open(model_file, 'rb') as f:
|
162 |
+
md = pickle.load(f)
|
163 |
+
poly = md['poly']
|
164 |
+
model_power = md['model_power']
|
165 |
+
model_er = md['model_er']
|
166 |
+
b = md['bounds']
|
167 |
+
|
168 |
+
# 擷取校正樣本並計算偏移量
|
169 |
+
samples = []
|
170 |
+
for _, row in calib_df.iterrows():
|
171 |
+
p_hex = hex_to_int(row.get('power_hex', None))
|
172 |
+
e_hex = hex_to_int(row.get('er_hex', None))
|
173 |
+
pm = pd.to_numeric(row.get('power_meas', None), errors='coerce')
|
174 |
+
em = pd.to_numeric(row.get('er_meas', None), errors='coerce')
|
175 |
+
if not np.isnan(p_hex) and not np.isnan(e_hex) and not np.isnan(pm) and not np.isnan(em):
|
176 |
+
samples.append((p_hex, e_hex, pm, em))
|
177 |
+
|
178 |
+
if samples:
|
179 |
+
Xc = np.array([[p,e] for p,e,_,_ in samples])
|
180 |
+
Xcp = poly.transform(Xc)
|
181 |
+
pred_p = model_power.predict(exog=Xcp)
|
182 |
+
pred_e = model_er.predict(exog=Xcp)
|
183 |
+
# 平均偏移
|
184 |
+
offset_p = float(np.mean([pm - p for (_,_,pm,_), p in zip(samples, pred_p)]))
|
185 |
+
offset_e = float(np.mean([em - e for (_,_,_,em), e in zip(samples, pred_e)]))
|
186 |
+
else:
|
187 |
+
offset_p = offset_e = 0.0
|
188 |
+
|
189 |
+
# 最小化目標函數: (預測值 - 目標值)^2
|
190 |
+
def objective(vars):
|
191 |
+
x = np.array(vars).reshape(1, -1)
|
192 |
+
xp = poly.transform(x)
|
193 |
+
p0 = model_power.predict(exog=xp)[0] + offset_p
|
194 |
+
e0 = model_er.predict(exog=xp)[0] + offset_e
|
195 |
+
return (p0 - target_power)**2 + (e0 - target_er)**2
|
196 |
+
|
197 |
+
# 有界優化
|
198 |
+
res = opt.minimize(
|
199 |
+
objective,
|
200 |
+
x0=[(b['p_min']+b['p_max'])/2, (b['e_min']+b['e_max'])/2],
|
201 |
+
bounds=[(b['p_min'], b['p_max']), (b['e_min'], b['e_max'])]
|
202 |
+
)
|
203 |
+
ph, eh = map(int, np.round(res.x))
|
204 |
+
|
205 |
+
return {
|
206 |
+
'Power Setting (hex)': hex(ph),
|
207 |
+
'ER Setting (hex)' : hex(eh)
|
208 |
+
}
|
209 |
+
|
210 |
+
|
211 |
+
if __name__ == '__main__':
|
212 |
+
# 範例用法
|
213 |
+
df = load_data('path/to/your_training_file.xlsx')
|
214 |
+
print(f'載入 {len(df)} 筆校正資料')
|
215 |
+
|
216 |
+
metrics = train_model(df, 'calibration_model.pkl')
|
217 |
+
print('訓練完成,效能:')
|
218 |
+
print(f" Power → R²={metrics['r2_power']:.3f}, RMSE={metrics['rmse_power']:.3f}")
|
219 |
+
print(f" ER → R²={metrics['r2_er']:.3f}, RMSE={metrics['rmse_er']:.3f}")
|
220 |
+
|
221 |
+
sample_df = pd.DataFrame([
|
222 |
+
{'power_hex':'0x1A','er_hex':'0x0F','power_meas':3.2,'er_meas':11.5},
|
223 |
+
{'power_hex':'0x2B','er_hex':'0x1C','power_meas':4.8,'er_meas':13.0},
|
224 |
+
])
|
225 |
+
result = calibrate_and_predict(sample_df, target_power=2.5, target_er=12.75, model_file='calibration_model.pkl')
|
226 |
+
print('預測設定值:', result)
|