import matplotlib.pyplot as plt from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.inspection import permutation_importance from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score import numpy as np import pandas as pd from master_funcs import * from collections import Counter from imblearn.over_sampling import SMOTE from imblearn.under_sampling import RandomUnderSampler from imblearn.pipeline import Pipeline from imblearn.over_sampling import SMOTE import dill import warnings warnings.filterwarnings("ignore") %matplotlib qt patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: dill.load_session('/home/user/Documents/Master/GW_'+z+'_data.pkl') names=df_vel_acc.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] importances = best_model.feature_importances_ std = np.std([tree.feature_importances_ for tree in best_model.estimators_], axis=0) ranked=np.argsort(importances)[::-1] std=pd.Series(std) std=std.reindex(ranked) forest_importances = pd.Series(importances, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax= plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=np.array(std[:20]/10), ax=ax) ax.set_ylabel('Mean decrease in impurity') plt.tight_layout() #plt.savefig('/home/user/owncloud/thesis_figures/GW_FI_'+z+'_3d.png', dpi=200) plt.show() forest_importances = pd.Series(result.importances_mean, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=result.importances_std[:20], ax=ax) ax.set_ylabel('Mean accuracy decrease') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/GW_FI_PM_'+z+'_3d.png', dpi=200) plt.show() patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: dill.load_session('/home/user/Documents/Master/GW_'+z+'_data.pkl') importances = best_model.feature_importances_ names=df_vel_acc.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] if z=='PB_T2_3_1': total_imp=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_perm=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) forest_importances = pd.Series(importances, index=names) total_imp+=forest_importances*acc_score forest_importances_perm = pd.Series(result.importances_mean, index=names) total_imp_perm+=forest_importances_perm*acc_score total_imp_sort=total_imp.sort_values(ascending=False) total_imp_sort=total_imp_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_sort.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/GW_FI_total_3d.png', dpi=200) plt.show() total_imp_perm_sort=total_imp_perm.sort_values(ascending=False) total_imp_perm_sort=total_imp_perm_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_perm_sort.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/GW_FI_PM_total_3d.png', dpi=200) plt.show() ####________________________________ patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: dill.load_session('/home/user/Documents/Master/CY_'+z+'_data.pkl') importances = best_model_touch.feature_importances_ std = np.std([tree.feature_importances_ for tree in best_model_touch.estimators_], axis=0) ranked=np.argsort(importances)[::-1] std=pd.Series(std) std=std.reindex(ranked) names=df_vel_acc.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] forest_importances = pd.Series(importances, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=np.array(std[:20]/10), ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #plt.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_'+z+'_3d.png', dpi=200) plt.show() forest_importances = pd.Series(result_touch.importances_mean, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=result_touch.importances_std[:20], ax=ax) ax.set_ylabel('Mean accuracy decrease') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_PM_'+z+'_3d.png', dpi=200) plt.show() ###_____ importances = best_model_drag.feature_importances_ std = np.std([tree.feature_importances_ for tree in best_model_drag.estimators_], axis=0) ranked=np.argsort(importances)[::-1] std=pd.Series(std) std=std.reindex(ranked) forest_importances = pd.Series(importances, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=np.array(std[:20]/10), ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() plt.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_'+z+'_3d.png', dpi=200) plt.show() forest_importances = pd.Series(result_drag.importances_mean, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=result_drag.importances_std[:20], ax=ax) ax.set_ylabel('Mean accuracy decrease') fig.tight_layout() fig.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_PM_'+z+'_3d.png', dpi=200) plt.show() patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: dill.load_session('/home/user/Documents/Master/CY_'+z+'_data.pkl') names=df_vel_acc.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] if z=='PB_T2_3_1': total_imp_touch=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_perm_touch=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_drag=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_perm_drag=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) importances = best_model_touch.feature_importances_ forest_importances_touch = pd.Series(importances, index=names) total_imp_touch+=forest_importances_touch*acc_score_touch forest_importances_perm_touch = pd.Series(result_touch.importances_mean, index=names) total_imp_perm_touch+=forest_importances_perm_touch*acc_score_touch importances = best_model_drag.feature_importances_ forest_importances_drag = pd.Series(importances, index=names) total_imp_drag+=forest_importances_drag*acc_score_drag forest_importances_perm_drag = pd.Series(result_drag.importances_mean, index=names) total_imp_perm_drag+=forest_importances_perm_drag*acc_score_drag total_imp_touch_sort=total_imp_touch.sort_values(ascending=False) total_imp_touch_sort_20=total_imp_touch_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_touch_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() fig.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_total_3d.png', dpi=200) plt.show() total_imp_perm_touch_sort=total_imp_perm_touch.sort_values(ascending=False) total_imp_perm_touch_sort_20=total_imp_perm_touch_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_perm_touch_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() fig.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_PM_total_3d.png', dpi=200) plt.show() total_imp_drag_sort=total_imp_drag.sort_values(ascending=False) total_imp_drag_sort_20=total_imp_drag_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_drag_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() fig.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_total_3d.png', dpi=200) plt.show() total_imp_perm_drag_sort=total_imp_perm_drag.sort_values(ascending=False) total_imp_perm_drag_sort_20=total_imp_perm_drag_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_perm_drag_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() fig.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_PM_total_3d.png', dpi=200) plt.show() patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: dill.load_session('/home/user/Documents/Master/GW_2D_'+z+'_data.pkl') names=df_big.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] importances = best_model.feature_importances_ std = np.std([tree.feature_importances_ for tree in best_model.estimators_], axis=0) ranked=np.argsort(importances)[::-1] std=pd.Series(std) std=std.reindex(ranked) forest_importances = pd.Series(importances, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax= plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=np.array(std[:20]/10), ax=ax) ax.set_ylabel('Mean decrease in impurity') plt.tight_layout() #plt.savefig('/home/user/owncloud/thesis_figures/GW_FI_'+z+'_2d.png', dpi=200) plt.show() forest_importances = pd.Series(result.importances_mean, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=result.importances_std[:20], ax=ax) ax.set_ylabel('Mean accuracy decrease') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/GW_FI_PM_'+z+'_2d.png', dpi=200) plt.show() patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: dill.load_session('/home/user/Documents/Master/GW_2D_'+z+'_data.pkl') importances = best_model.feature_importances_ names=df_big.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] if z=='PB_T2_3_1': total_imp=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_perm=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) forest_importances = pd.Series(importances, index=names) total_imp+=forest_importances*acc_score forest_importances_perm = pd.Series(result.importances_mean, index=names) total_imp_perm+=forest_importances_perm*acc_score total_imp_sort=total_imp.sort_values(ascending=False) total_imp_sort_20=total_imp_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/GW_FI_total_2d.png', dpi=200) plt.show() total_imp_perm_sort=total_imp_perm.sort_values(ascending=False) total_imp_perm_sort_20=total_imp_perm_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_perm_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/GW_FI_PM_total_2d.png', dpi=200) plt.show() patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] z='PB_T3_23_2' for z in patients: dill.load_session('/home/user/Documents/Master/CY_2D_'+z+'_data.pkl') importances = best_model_touch.feature_importances_ std = np.std([tree.feature_importances_ for tree in best_model_touch.estimators_], axis=0) ranked=np.argsort(importances)[::-1] std=pd.Series(std) std=std.reindex(ranked) names=df_big.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] forest_importances = pd.Series(importances, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=np.array(std[:20]/10), ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #plt.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_'+z+'_2d.png', dpi=200) plt.show() forest_importances = pd.Series(result_touch.importances_mean, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=result_touch.importances_std[:20], ax=ax) ax.set_ylabel('Mean accuracy decrease') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_PM_'+z+'_2d.png', dpi=200) plt.show() ###_____ importances = best_model_drag.feature_importances_ std = np.std([tree.feature_importances_ for tree in best_model_drag.estimators_], axis=0) ranked=np.argsort(importances)[::-1] std=pd.Series(std) std=std.reindex(ranked) forest_importances = pd.Series(importances, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=np.array(std[:20]/10), ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #plt.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_'+z+'_2d.png', dpi=200) plt.show() forest_importances = pd.Series(result_drag.importances_mean, index=names) forest_importances=forest_importances.sort_values(ascending=False) forest_importances=forest_importances[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) forest_importances.plot.bar(yerr=result_drag.importances_std[:20], ax=ax) ax.set_ylabel('Mean accuracy decrease') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_PM_'+z+'_2d.png', dpi=200) plt.show() patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: dill.load_session('/home/user/Documents/Master/CY_2D_'+z+'_data.pkl') names=df_big.columns.tolist() names=[sub.replace('acceleration', 'acc') for sub in names] names=[sub.replace('velocity', 'vel') for sub in names] names=[sub.replace('angle', 'ang') for sub in names] names=[sub.replace('right', 'r') for sub in names] names=[sub.replace('left', 'l') for sub in names] names=[sub.replace('top', 't') for sub in names] names=[sub.replace('bottom', 'b') for sub in names] if z=='PB_T2_3_1': total_imp_touch=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_perm_touch=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_drag=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) total_imp_perm_drag=pd.Series(np.zeros((df_vel_acc.shape[1])), index=names) importances = best_model_touch.feature_importances_ forest_importances_touch = pd.Series(importances, index=names) total_imp_touch+=forest_importances_touch*acc_score_touch forest_importances_perm_touch = pd.Series(result_touch.importances_mean, index=names) total_imp_perm_touch+=forest_importances_perm_touch*acc_score_touch importances = best_model_drag.feature_importances_ forest_importances_drag = pd.Series(importances, index=names) total_imp_drag+=forest_importances_drag*acc_score_drag forest_importances_perm_drag = pd.Series(result_drag.importances_mean, index=names) total_imp_perm_drag+=forest_importances_perm_drag*acc_score_drag total_imp_touch_sort=total_imp_touch.sort_values(ascending=False) total_imp_touch_sort_20=total_imp_touch_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_touch_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_total_2d.png', dpi=200) plt.show() total_imp_perm_touch_sort=total_imp_perm_touch.sort_values(ascending=False) total_imp_perm_touch_sort_20=total_imp_perm_touch_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_perm_touch_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/CY_touch_FI_PM_total_2d.png', dpi=200) plt.show() total_imp_drag_sort=total_imp_drag.sort_values(ascending=False) total_imp_drag_sort_20=total_imp_drag_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_drag_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_total_2d.png', dpi=200) plt.show() total_imp_perm_drag_sort=total_imp_perm_drag.sort_values(ascending=False) total_imp_perm_drag_sort_20=total_imp_perm_drag_sort[:20] fig, ax = plt.subplots() ax.figure.set_size_inches(5, 8) total_imp_perm_drag_sort_20.plot.bar(ax=ax) ax.set_ylabel('Mean decrease in impurity') fig.tight_layout() #fig.savefig('/home/user/owncloud/thesis_figures/CY_drag_FI_PM_total_2d.png', dpi=200) plt.show()