import matplotlib.pyplot as plt from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.inspection import permutation_importance from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score import numpy as np import pandas as pd from master_funcs import * from imblearn.over_sampling import SMOTE from imblearn.under_sampling import RandomUnderSampler from imblearn.pipeline import Pipeline from imblearn.over_sampling import SMOTE import dill mr_gw_1=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Jan.csv", index_col=0, delimiter=',', header=None) mr_gw_2=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Nicole.csv", index_col=0, delimiter=',', header=None) mr_gw_3=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Jule.csv", index_col=0, delimiter=',', header=None) patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: GW_3D='/mnt/DLC/DLC/3D_videos/GW_3D/'+z+'_P3_GW_DLC_3D.csv' data=pd.read_csv(GW_3D, delimiter=',', skiprows=0, index_col=0, header=[1,2]) coln=data.columns zero='grid_top_left' x='grid_top_right' y='grid_bottom_left' data=transform(data, zero, x, y) vel, acc=vel_acc(data, '3D') names=[i[0] for i in data.columns[::3]] df_vel=pd.DataFrame(vel) df_vel.columns=names df_vel=pd.concat([df_vel], keys=['velocity'], axis=1, names=['coords']) df_vel.columns.names=['coords', 'bodyparts'] df_acc=pd.DataFrame(acc) df_acc.columns=names df_acc=pd.concat([df_acc], keys=['acceleration'], axis=1, names=['coords']) df_acc.columns.names=['coords', 'bodyparts'] nnames=[] angle=angle_all(data, names[0], names[1], names[2]) nnames.append(names[0]+'_'+names[1]+'_'+names[2]) sum=0 for i in range(1, len(names)): for j in range(i+1, len(names)): for k in range(i+2, len(names)): if i!=j!=k: angle=np.c_[angle,angle_all(data, names[i], names[j], names[k])] nnames.append(names[i]+'_'+names[j]+'_'+names[k]) sum+=1 print(sum, ' of 2870 angles') df_angle=pd.DataFrame(angle) df_angle.columns=nnames df_angle=pd.concat([df_angle], keys=['angle'], axis=1, names=['coords']) df_angle.columns.names=['coords', 'bodyparts'] angle_vel=np.diff(df_angle[df_angle.columns[0]]) for i in df_angle.columns[1:]: angle_vel=np.c_[angle_vel, np.diff(df_angle[i])] angle_vel=np.r_[angle_vel, [[0]*angle_vel.shape[1]]] df_angle_vel=pd.DataFrame(angle_vel) df_angle_vel.columns=nnames df_angle_vel=pd.concat([df_angle_vel], keys=['angle_velocity'], axis=1, names=['coords']) df_angle_vel.columns.names=['coords', 'bodyparts'] angle_acc=np.diff(df_angle_vel[df_angle_vel.columns[0]]) for i in df_angle_vel.columns[1:]: angle_acc=np.c_[angle_acc, np.diff(df_angle_vel[i])] angle_acc=np.r_[angle_acc, [[0]*angle_acc.shape[1], [0]*angle_acc.shape[1]]] df_angle_acc=pd.DataFrame(angle_acc) df_angle_acc.columns=nnames df_angle_acc=pd.concat([df_angle_acc], keys=['angle_acceleration'], axis=1, names=['coords']) df_angle_acc.columns.names=['coords', 'bodyparts'] nnames=[] dis=distance_all(data, names[0], names[1]) nnames.append(names[0]+'_'+names[1]) sum=0 for i in range(1, len(names)): for j in range(i+1, len(names)): dis=np.c_[dis, distance_all(data, names[i], names[j])] nnames.append(names[i]+'_'+names[j]) sum+=1 print(sum, 'of 231') df_dis=pd.DataFrame(dis) df_dis.columns=nnames df_dis=pd.concat([df_dis], keys=['distance'], axis=1, names=['coords']) df_dis.columns.names=['coords', 'bodyparts'] dis_vel=np.diff(df_dis[df_dis.columns[0]]) for i in df_dis.columns[1:]: dis_vel=np.c_[dis_vel, np.diff(df_dis[i])] dis_vel=np.r_[dis_vel, [[0]*dis_vel.shape[1]]] df_dis_vel=pd.DataFrame(dis_vel) df_dis_vel.columns=nnames df_dis_vel=pd.concat([df_dis_vel], keys=['distance_velocity'], axis=1, names=['coords']) df_dis_vel.columns.names=['coords', 'bodyparts'] dis_acc=np.diff(df_dis_vel[df_dis_vel.columns[0]]) for i in df_dis_vel.columns[1:]: dis_acc=np.c_[dis_acc, np.diff(df_dis_vel[i])] dis_acc=np.r_[dis_acc, [[0]*dis_acc.shape[1], [0]*dis_acc.shape[1]]] df_dis_acc=pd.DataFrame(dis_acc) df_dis_acc.columns=nnames df_dis_acc=pd.concat([df_dis_acc], keys=['distance_acceleration'], axis=1, names=['coords']) df_dis_acc.columns.names=['coords', 'bodyparts'] df_vel_acc=pd.concat((df_vel, df_acc, df_angle, df_angle_vel, df_angle_acc, df_dis, df_dis_vel, df_dis_acc), axis=1) names=[] for i in range(0, len(df_vel_acc.columns)): names.append(df_vel_acc.columns[i][0]+'_'+df_vel_acc.columns[i][1]) df_vel_acc.columns=names vid1=[] for j in [mr_gw_1, mr_gw_2, mr_gw_3]: df=j.fillna(0) for i in df.loc[z, 2:]: if i>0: vid1.append(i) y=np.zeros(shape=(len(df_vel_acc),)) for i in vid1: y[int(i)]=1 df_vel_acc=df_vel_acc.interpolate(method='polynomial', order=1) df_vel_acc=df_vel_acc.fillna(0) over = SMOTE(sampling_strategy=0.1) under = RandomUnderSampler(sampling_strategy=0.2) steps = [('o', over), ('u', under)] pipeline = Pipeline(steps=steps) X, y = pipeline.fit_resample(df_vel_acc, y) acc_score = 0 for i in range(100): if (i+1) % 10 == 0: print("iterations: {}/100".format(i+1)) X_train, X_test, y_train, y_test = train_test_split(X, y) sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) rfc = RandomForestClassifier(bootstrap=True).fit(X_train, y_train) pred_rfc = rfc.predict(X_test) if accuracy_score(y_test, pred_rfc) > acc_score: best_model = rfc acc_score = accuracy_score(y_test, pred_rfc) result = permutation_importance(best_model, X_test, y_test, n_repeats=10, random_state=42) dill.dump_session('GW_'+z+'_data.pkl') print(z+' ready!')