123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- #%%
- import os.path as op
- import pandas as pd
- import numpy as np
- import joblib
- import sys
- from glob import glob
- from sklearn.gaussian_process import GaussianProcessRegressor
- from sklearn.base import BaseEstimator, RegressorMixin
- from sklearn.gaussian_process.kernels import WhiteKernel, DotProduct
- # model_path = "/data/project/BrainAge4AD/features_new"
- dir_path = sys.argv[1]
- model_path = "/model"
- class EnigmaGaussianProcessRegressor(RegressorMixin, BaseEstimator):
- def __init__(self, *, noise_level=-1, optimizer="fmin_l_bfgs_b"):
- # default values
- self.constant_mean = 100
- self.noise_level = noise_level
- self.kernel = self.create_kernel()
- self.optimizer = optimizer
- def create_kernel(self):
- # creates a kernel using these default parameters.
- # ConstantKernel for the constant mean function,
- # WhiteKernel for the noise level function with the specified default parameters.
- likhyp = np.log10(0.1) # Log of noise variance hyperparameter
- # Mean hyperparameter
- kernel = DotProduct() + WhiteKernel(noise_level=10 ** (2 * likhyp))
- return kernel
- def fit(self, X, y):
- self.gpr = GaussianProcessRegressor(
- kernel=self.kernel, optimizer=self.optimizer
- )
- self.gpr.fit(X, y - self.constant_mean)
- def predict(self, X):
- return self.gpr.predict(X) + self.constant_mean
- #%%
- stacking_model = joblib.load(op.join(model_path, "stacking_enigmaModel.pkl"))
- subjs = glob(dir_path + "/**/*features.csv", recursive=True)
- #%%
- for sub in subjs:
- subj_name = op.basename(sub).removesuffix('_features.csv')
- subj_path = op.dirname(sub)
- sub_feats = pd.read_csv(sub, index_col=0)
- col_feats = sub_feats.columns[sub_feats.columns.str.contains('p[1,2]_r[4,8]s[4,8]_*')]
- pred = stacking_model.predict(sub_feats[col_feats])
- sub_report = [{'subj-path': sub, 'name': op.splitext(subj_name)[0], 'pred-age': pred[0]}]
- df_res = pd.DataFrame(sub_report)
- out_path = op.join(subj_path, op.splitext(subj_name)[0] + '_prediction.csv')
- df_res.to_csv(out_path)
- #%%
|