predict_age_stacking.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. #%%
  2. import os.path as op
  3. import pandas as pd
  4. import numpy as np
  5. import joblib
  6. import sys
  7. from glob import glob
  8. from sklearn.gaussian_process import GaussianProcessRegressor
  9. from sklearn.base import BaseEstimator, RegressorMixin
  10. from sklearn.gaussian_process.kernels import WhiteKernel, DotProduct
  11. # model_path = "/data/project/BrainAge4AD/features_new"
  12. dir_path = sys.argv[1]
  13. model_path = "/model"
  14. class EnigmaGaussianProcessRegressor(RegressorMixin, BaseEstimator):
  15. def __init__(self, *, noise_level=-1, optimizer="fmin_l_bfgs_b"):
  16. # default values
  17. self.constant_mean = 100
  18. self.noise_level = noise_level
  19. self.kernel = self.create_kernel()
  20. self.optimizer = optimizer
  21. def create_kernel(self):
  22. # creates a kernel using these default parameters.
  23. # ConstantKernel for the constant mean function,
  24. # WhiteKernel for the noise level function with the specified default parameters.
  25. likhyp = np.log10(0.1) # Log of noise variance hyperparameter
  26. # Mean hyperparameter
  27. kernel = DotProduct() + WhiteKernel(noise_level=10 ** (2 * likhyp))
  28. return kernel
  29. def fit(self, X, y):
  30. self.gpr = GaussianProcessRegressor(
  31. kernel=self.kernel, optimizer=self.optimizer
  32. )
  33. self.gpr.fit(X, y - self.constant_mean)
  34. def predict(self, X):
  35. return self.gpr.predict(X) + self.constant_mean
  36. #%%
  37. stacking_model = joblib.load(op.join(model_path, "stacking_enigmaModel.pkl"))
  38. subjs = glob(dir_path + "/**/*features.csv", recursive=True)
  39. #%%
  40. for sub in subjs:
  41. subj_name = op.basename(sub).removesuffix('_features.csv')
  42. subj_path = op.dirname(sub)
  43. sub_feats = pd.read_csv(sub, index_col=0)
  44. col_feats = sub_feats.columns[sub_feats.columns.str.contains('p[1,2]_r[4,8]s[4,8]_*')]
  45. pred = stacking_model.predict(sub_feats[col_feats])
  46. sub_report = [{'subj-path': sub, 'name': op.splitext(subj_name)[0], 'pred-age': pred[0]}]
  47. df_res = pd.DataFrame(sub_report)
  48. out_path = op.join(subj_path, op.splitext(subj_name)[0] + '_prediction.csv')
  49. df_res.to_csv(out_path)
  50. #%%