import os import csv import pickle import numpy as np import pandas as pd from sklearn.preprocessing import PolynomialFeatures import statsmodels.api as sm import scipy.optimize as opt def hex_to_int(x): """ 安全地將十六進位字串(帶或不帶 '0x')轉換為整數,失敗時回傳 NaN。 """ try: return int(str(x).strip(), 16) except Exception: return np.nan def load_data(file_path: str) -> pd.DataFrame: """ 載入校正資料,支援 Excel(.xlsx/.xls) 或 CSV 檔案。 回傳欄位: - Device: 裝置名稱 - power_hex, er_hex: 設定值 (十六進位字串) - power_dec, er_dec: 設定值 (十進位整數) - power_meas, er_meas: 實際量測值 (浮點) """ ext = os.path.splitext(file_path)[1].lower() if ext in ('.xlsx', '.xls'): xls = pd.ExcelFile(file_path) all_rows = [] # 只處理工作表名稱以 'T3' 開頭的分頁 for sheet in xls.sheet_names: if not sheet.startswith('T3'): continue df = pd.read_excel(xls, sheet_name=sheet, header=None) # 組合前兩列成為正確欄位名稱 header0 = df.iloc[1].ffill() header1 = df.iloc[2].fillna('') cols = [f"{a} {b}".strip() if b else str(a).strip() for a, b in zip(header0, header1)] df.columns = cols raw = df.iloc[3:][['Setting Power', 'Setting ER', 'EA-4000 Power', 'EA-4000 ER']].copy() raw['Setting Power'] = raw['Setting Power'].ffill() # 轉換並命名欄位 raw['power_hex'] = raw['Setting Power'] raw['er_hex'] = raw['Setting ER'] raw['power_dec'] = raw['power_hex'].apply(hex_to_int) raw['er_dec'] = raw['er_hex'].apply(hex_to_int) raw['power_meas'] = pd.to_numeric(raw['EA-4000 Power'], errors='coerce') raw['er_meas'] = pd.to_numeric(raw['EA-4000 ER'], errors='coerce') raw['Device'] = sheet # 篩選有效資料 valid = raw[raw['power_meas'].notna()] all_rows.append(valid[['Device','power_hex','er_hex','power_dec','er_dec','power_meas','er_meas']]) if not all_rows: raise ValueError("找不到有效的 'T3' 分頁。") return pd.concat(all_rows, ignore_index=True) elif ext == '.csv': df = pd.read_csv(file_path, quoting=csv.QUOTE_ALL, escapechar='\\') required_cols = {'Device','power_hex','er_hex','power_dec','er_dec','power_meas','er_meas'} missing = required_cols - set(df.columns) if missing: raise ValueError(f"CSV 檔缺少必要欄位: {missing}") # 轉換 hex 與強制類型 df = df.copy() df['power_dec'] = df['power_hex'].apply(hex_to_int) df['er_dec'] = df['er_hex'].apply(hex_to_int) df['power_meas'] = pd.to_numeric(df['power_meas'], errors='coerce') df['er_meas'] = pd.to_numeric(df['er_meas'], errors='coerce') return df else: raise ValueError(f"不支援的檔案格式: {ext}") def train_model(data_df: pd.DataFrame, model_file: str) -> dict: """ 訓練二階回應面模型(Response Surface Model)。 輸入: data_df: 校正資料 DataFrame model_file: 模型輸出檔路徑 (.pkl) 輸出: r2_power, rmse_power, r2_er, rmse_er 四項效能指標。 """ # 特徵與目標 X = data_df[['power_dec','er_dec']].values y_p = data_df['power_meas'].values y_e = data_df['er_meas'].values groups = data_df['Device'] # 產生二階多項式特徵 poly = PolynomialFeatures(degree=2, include_bias=True) Xp = poly.fit_transform(X) # 混合效果模型擬合 model_power = sm.MixedLM(endog=y_p, exog=Xp, groups=groups).fit() model_er = sm.MixedLM(endog=y_e, exog=Xp, groups=groups).fit() # 計算 R² 與 RMSE pred_p = model_power.fittedvalues pred_e = model_er.fittedvalues r2p = 1 - np.sum((y_p - pred_p)**2) / np.sum((y_p - y_p.mean())**2) r2e = 1 - np.sum((y_e - pred_e)**2) / np.sum((y_e - y_e.mean())**2) rmse_p = float(np.sqrt(np.mean((y_p - pred_p)**2))) rmse_e = float(np.sqrt(np.mean((y_e - pred_e)**2))) # 資料範圍,用於後續預測時的邊界 bounds = { 'p_min': int(data_df['power_dec'].min()), 'p_max': int(data_df['power_dec'].max()), 'e_min': int(data_df['er_dec'].min()), 'e_max': int(data_df['er_dec'].max()), } # 儲存模型物件 model_dict = {'poly': poly, 'model_power': model_power, 'model_er': model_er, 'bounds': bounds} with open(model_file, 'wb') as f: pickle.dump(model_dict, f) return { 'r2_power': r2p, 'rmse_power': rmse_p, 'r2_er': r2e, 'rmse_er': rmse_e } def calibrate_and_predict( calib_df: pd.DataFrame, target_power: float, target_er: float, model_file: str ) -> dict: """ 根據已訓練模型,針對目標 Power/ER 預測最佳十六進位設定值。 輸入: calib_df: 最多 N 筆校正樣本 (含 hex 與量測值) target_power: 目標功率 (十進位浮點) target_er: 目標 ER 值 (十進位浮點) model_file: 訓練後模型檔 (.pkl) 回傳: {'Power Setting (hex)': ..., 'ER Setting (hex)': ...} """ # 載入模型 with open(model_file, 'rb') as f: md = pickle.load(f) poly = md['poly'] model_power = md['model_power'] model_er = md['model_er'] b = md['bounds'] # 擷取校正樣本並計算偏移量 samples = [] for _, row in calib_df.iterrows(): p_hex = hex_to_int(row.get('power_hex', None)) e_hex = hex_to_int(row.get('er_hex', None)) pm = pd.to_numeric(row.get('power_meas', None), errors='coerce') em = pd.to_numeric(row.get('er_meas', None), errors='coerce') if not np.isnan(p_hex) and not np.isnan(e_hex) and not np.isnan(pm) and not np.isnan(em): samples.append((p_hex, e_hex, pm, em)) if samples: Xc = np.array([[p,e] for p,e,_,_ in samples]) Xcp = poly.transform(Xc) pred_p = model_power.predict(exog=Xcp) pred_e = model_er.predict(exog=Xcp) # 平均偏移 offset_p = float(np.mean([pm - p for (_,_,pm,_), p in zip(samples, pred_p)])) offset_e = float(np.mean([em - e for (_,_,_,em), e in zip(samples, pred_e)])) else: offset_p = offset_e = 0.0 # 最小化目標函數: (預測值 - 目標值)^2 def objective(vars): x = np.array(vars).reshape(1, -1) xp = poly.transform(x) p0 = model_power.predict(exog=xp)[0] + offset_p e0 = model_er.predict(exog=xp)[0] + offset_e return (p0 - target_power)**2 + (e0 - target_er)**2 # 有界優化 res = opt.minimize( objective, x0=[(b['p_min']+b['p_max'])/2, (b['e_min']+b['e_max'])/2], bounds=[(b['p_min'], b['p_max']), (b['e_min'], b['e_max'])] ) ph, eh = map(int, np.round(res.x)) return { 'Power Setting (hex)': hex(ph), 'ER Setting (hex)' : hex(eh) } if __name__ == '__main__': # 範例用法 df = load_data('path/to/your_training_file.xlsx') print(f'載入 {len(df)} 筆校正資料') metrics = train_model(df, 'calibration_model.pkl') print('訓練完成,效能:') print(f" Power → R²={metrics['r2_power']:.3f}, RMSE={metrics['rmse_power']:.3f}") print(f" ER → R²={metrics['r2_er']:.3f}, RMSE={metrics['rmse_er']:.3f}") sample_df = pd.DataFrame([ {'power_hex':'0x1A','er_hex':'0x0F','power_meas':3.2,'er_meas':11.5}, {'power_hex':'0x2B','er_hex':'0x1C','power_meas':4.8,'er_meas':13.0}, ]) result = calibrate_and_predict(sample_df, target_power=2.5, target_er=12.75, model_file='calibration_model.pkl') print('預測設定值:', result)