#%% import os.path as op import pandas as pd import numpy as np import joblib import sys from glob import glob from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.base import BaseEstimator, RegressorMixin from sklearn.gaussian_process.kernels import WhiteKernel, DotProduct # model_path = "/data/project/BrainAge4AD/features_new" dir_path = sys.argv[1] model_path = "/model" class EnigmaGaussianProcessRegressor(RegressorMixin, BaseEstimator): def __init__(self, *, noise_level=-1, optimizer="fmin_l_bfgs_b"): # default values self.constant_mean = 100 self.noise_level = noise_level self.kernel = self.create_kernel() self.optimizer = optimizer def create_kernel(self): # creates a kernel using these default parameters. # ConstantKernel for the constant mean function, # WhiteKernel for the noise level function with the specified default parameters. likhyp = np.log10(0.1) # Log of noise variance hyperparameter # Mean hyperparameter kernel = DotProduct() + WhiteKernel(noise_level=10 ** (2 * likhyp)) return kernel def fit(self, X, y): self.gpr = GaussianProcessRegressor( kernel=self.kernel, optimizer=self.optimizer ) self.gpr.fit(X, y - self.constant_mean) def predict(self, X): return self.gpr.predict(X) + self.constant_mean #%% stacking_model = joblib.load(op.join(model_path, "stacking_enigmaModel.pkl")) subjs = glob(dir_path + "/**/*features.csv", recursive=True) #%% for sub in subjs: subj_name = op.basename(sub).removesuffix('_features.csv') subj_path = op.dirname(sub) sub_feats = pd.read_csv(sub, index_col=0) col_feats = sub_feats.columns[sub_feats.columns.str.contains('p[1,2]_r[4,8]s[4,8]_*')] pred = stacking_model.predict(sub_feats[col_feats]) sub_report = [{'subj-path': sub, 'name': op.splitext(subj_name)[0], 'pred-age': pred[0]}] df_res = pd.DataFrame(sub_report) out_path = op.join(subj_path, op.splitext(subj_name)[0] + '_prediction.csv') df_res.to_csv(out_path) #%%