juaml
/
brainage


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
							#from read_data_mask_resampled import *
import sys
print("sys.path:", sys.path)

from brainage.calculate_features import calculate_voxelwise_features
from pathlib import Path
import pandas as pd
import argparse
import pickle
import os
import re


def model_pred(test_df, model_file, feature_space_str):
    """This functions predicts age
    Args:
        test_df (dataframe): test data
        model_file (pickle file): trained model file
        feature_space_str (string): feature space name

    Returns:
        dataframe: predictions from the model
    """    

    model = pickle.load(open(model_file, 'rb')) # load model
    pred = pd.DataFrame()
    for key, model_value in model.items():
        X = data_df.columns.tolist()
        pre_X, pre_X2 = model_value.preprocess(test_df[X], test_df[X])  # preprocessed data
        y_pred = model_value.predict(test_df).ravel()
        print(y_pred.shape)
        pred[feature_space_str + '+' + key] = y_pred
    return pred


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--features_path", type=str, help="path to features dir")  # eg '../data/ADNI'
    parser.add_argument("--data_dir", type=str, help="path to data dir")  #
    parser.add_argument("--subject_filepaths", type=str, help="path to csv or txt file with subject filepaths") # eg: '../data/ADNI/ADNI.paths_cat12.8.csv'
    parser.add_argument("--output_path", type=str, help="path to output_dir")  # eg'../results/ADNI'
    parser.add_argument("--output_prefix", type=str, help="prefix added to features filename ans results (predictions) file name") # eg: 'ADNI'
    parser.add_argument("--mask_file", type=str, help="path to GM mask nii file",
                        default='../masks/brainmask_12.8.nii')
    parser.add_argument("--smooth_fwhm", type=int, help="smoothing FWHM", default=4)
    parser.add_argument("--resample_size", type=int, help="resampling kernel size", default=4)
    parser.add_argument("--model_file", type=str, help="Trained model to be used to predict",
                        default='../trained_models/4sites.S4_R4_pca.gauss.models')
    # For testing
    # python3 predict_age.py --features_path ../data/ADNI --subject_filepaths ../data/ADNI/ADNI.paths_cat12.8.csv --output_path ../results/ADNI --output_prefix ADNI --mask_file ../masks/brainmask_12.8.nii  --smooth_fwhm 4 --resample_size 4 --model_file ../trained_models/4sites.S4_R4_pca.gauss.models

    args = parser.parse_args()
    features_path = args.features_path
    data_dir = args.data_dir
    subject_filepaths = args.subject_filepaths
    output_path = args.output_path
    output_prefix = args.output_prefix
    smooth_fwhm = args.smooth_fwhm
    resample_size = args.resample_size
    mask_file = args.mask_file
    model_file = args.model_file

    print('\nBrain-age trained model used: ', model_file)
    print('Data directory (test data): ', data_dir)
    print('Subjects filepaths (test data): ', subject_filepaths)
    print('Directory to features path: ',  features_path)
    print('Results directory: ', output_path)
    print('Results filename prefix: ', output_prefix)
    print('GM mask used: ', mask_file)
    
    # create full filename for the nii files of the subjects and save as csv in features_path
    subject_filepaths_nii = pd.read_csv(subject_filepaths, header=None)
    subject_filepaths_nii = data_dir + '/' + subject_filepaths_nii
    print(subject_filepaths_nii)
    subject_full_filepaths = os.path.join(features_path, 'subject_full_filepaths.csv')
    print(subject_full_filepaths)
    subject_filepaths_nii.to_csv(subject_full_filepaths, header=False, index=False)
    
    
    # get feature space name from the model file entered and
    # create feature space name using the input values (smoothing, resampling)
    # match them: they should be same

    # get feature space name from the model file entered in argument
    pipeline_name1 = model_file.split('/')[-1]
    feature_space = pipeline_name1.split('.')[1]
    model_name = pipeline_name1.split('.')[2]
    pipeline_name = feature_space + '.' + model_name
    
    # create feature space name using the input values (smoothing, resampling)
    pca_string = re.findall(r"pca", feature_space)
    if len(pca_string) == 1:
        feature_space_str = 'S' + str(smooth_fwhm) + '_R' + str(resample_size) + '_pca'
    else:
        feature_space_str = 'S' + str(smooth_fwhm) + '_R' + str(resample_size)

    # match them: they should be same
    assert(feature_space_str == feature_space), f"Mismatch in feature parameters entered ({feature_space_str}) & features used for model training ({feature_space})"

    print('Feature space: ', feature_space)
    print('Model name: ', model_name)

    # Create directories, create features if they don't exists
    Path(output_path).mkdir(exist_ok=True, parents=True)
    Path(features_path).mkdir(exist_ok=True, parents=True)
    features_filename = str(output_prefix) + '.S' + str(smooth_fwhm) + '_R' + str(resample_size)
    features_fullfile = os.path.join(features_path, features_filename)
    print('\nfilename for features created: ', features_fullfile)

    if os.path.isfile(features_fullfile): # check if features file exists
        print('\n----File exists')
        data_df = pickle.load(open(features_fullfile, 'rb'))
        print('Features loaded')
    else:
        print('\n-----Extracting features')
        # create features
        data_df = calculate_voxelwise_features(subject_full_filepaths, mask_file, smooth_fwhm=smooth_fwhm, resample_size=resample_size)
        # save features
        pickle.dump(data_df, open(features_fullfile, "wb"), protocol=4)
        data_df.to_csv(features_fullfile + '.csv', index=False)
        print('Feature extraction done and saved')

    # get predictions and save
    try:
        predictions_df = model_pred(data_df, model_file, feature_space_str)
        # save predictions
        predictions_filename = str(output_prefix) + '.' + pipeline_name + '.prediction.csv'
        predictions_fullfile = os.path.join(output_path, predictions_filename)
        print('\nfilename for predictions created: ', predictions_fullfile)
        predictions_df.to_csv(predictions_fullfile, index=False)
        print(predictions_df)

    except FileNotFoundError:
        print(f'{model_file} is not present')