import matplotlib.pyplot as plt from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.inspection import permutation_importance from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score import numpy as np import pandas as pd from master_funcs import * from imblearn.over_sampling import SMOTE from imblearn.under_sampling import RandomUnderSampler from imblearn.pipeline import Pipeline from imblearn.over_sampling import SMOTE import dill mr_cyl_touch_1=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jan_touch.csv", index_col=0, delimiter=',', header=None) mr_cyl_touch_2=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Nicole_touch.csv", index_col=0, delimiter=',', header=None) mr_cyl_touch_3=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jule_touch.csv", index_col=0, delimiter=',', header=None) mr_cyl_drag_1=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jan_drag.csv", index_col=0, delimiter=',', header=None) mr_cyl_drag_2=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Nicole_drag.csv", index_col=0, delimiter=',', header=None) mr_cyl_drag_3=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jule_drag.csv", index_col=0, delimiter=',', header=None) patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3'] for z in patients: path1='/mnt/DLC/DLC/3D_videos/CY_2D/'+z+'_P3_CY_camera-1DLC_resnet50_Cylinder_camera-1Jun15shuffle1_250000_filtered.csv' path2='/mnt/DLC/DLC/3D_videos/CY_2D/'+z+'_P3_CY_camera-2DLC_resnet50_Cylinder_camera-2Jun15shuffle1_250000_filtered.csv' data1=prep_dlc(path1, 0.9, 1080) data2=prep_dlc(path2, 0.9, 1080) vel1, acc1=vel_acc(data1, '2D') vel2, acc2=vel_acc(data2, '2D') names=[i[0] for i in data1.columns[::3]] likeli1=np.array(np.zeros(shape=(len(data1,)))) likeli2=np.array(np.zeros(shape=(len(data2,)))) for j in names: data1[j, 'x']=data1[j, 'x'].interpolate(method='polynomial', order=1) data1[j, 'y']=data1[j, 'y'].interpolate(method='polynomial', order=1) likeli1+=data1[j, 'likelihood'] data1=data1.drop([(j, 'likelihood')], axis=1) data2[j, 'x']=data2[j, 'x'].interpolate(method='polynomial', order=1) data2[j, 'y']=data2[j, 'y'].interpolate(method='polynomial', order=1) likeli2+=data2[j, 'likelihood'] data2=data2.drop([(j, 'likelihood')], axis=1) df_vel1=pd.DataFrame(vel1) df_vel1.columns=names df_vel1=pd.concat([df_vel1], keys=['velocity'], axis=1, names=['coords']) df_vel1.columns.names=['coords', 'bodyparts'] df_vel2=pd.DataFrame(vel2) df_vel2.columns=names df_vel2=pd.concat([df_vel2], keys=['velocity'], axis=1, names=['coords']) df_vel2.columns.names=['coords', 'bodyparts'] df_acc1=pd.DataFrame(acc1) df_acc1.columns=names df_acc1=pd.concat([df_acc1], keys=['accocity'], axis=1, names=['coords']) df_acc1.columns.names=['coords', 'bodyparts'] df_acc2=pd.DataFrame(acc2) df_acc2.columns=names df_acc2=pd.concat([df_acc2], keys=['accocity'], axis=1, names=['coords']) df_acc2.columns.names=['coords', 'bodyparts'] nnames=[] angle1=angle_all_2D(data1, names[0], names[1], names[2]) nnames.append(names[0]+'_'+names[1]+'_'+names[2]) sum=0 for i in range(1, len(names)): for j in range(i+1, len(names)): for k in range(i+2, len(names)): if i!=j!=k: angle1=np.c_[angle1,angle_all_2D(data1, names[i], names[j], names[k])] nnames.append(names[i]+'_'+names[j]+'_'+names[k]) sum+=1 print(sum, ' of 2870 angles') df_angle1=pd.DataFrame(angle1) df_angle1.columns=nnames df_angle1=pd.concat([df_angle1], keys=['angle'], axis=1, names=['coords']) df_angle1.columns.names=['coords', 'bodyparts'] angle_vel1=np.diff(df_angle1[df_angle1.columns[0]]) for i in df_angle1.columns[1:]: angle_vel1=np.c_[angle_vel1, np.diff(df_angle1[i])] angle_vel1=np.r_[angle_vel1, [[0]*angle_vel1.shape[1]]] df_angle_vel1=pd.DataFrame(angle_vel1) df_angle_vel1.columns=nnames df_angle_vel1=pd.concat([df_angle_vel1], keys=['angle_velocity'], axis=1, names=['coords']) df_angle_vel1.columns.names=['coords', 'bodyparts'] angle_acc1=np.diff(df_angle_vel1[df_angle_vel1.columns[0]]) for i in df_angle_vel1.columns[1:]: angle_acc1=np.c_[angle_acc1, np.diff(df_angle_vel1[i])] angle_acc1=np.r_[angle_acc1, [[0]*angle_acc1.shape[1], [0]*angle_acc1.shape[1]]] df_angle_acc1=pd.DataFrame(angle_acc1) df_angle_acc1.columns=nnames df_angle_acc1=pd.concat([df_angle_acc1], keys=['angle_acceleration'], axis=1, names=['coords']) df_angle_acc1.columns.names=['coords', 'bodyparts'] nnames=[] dis1=distance_all_2D(data1, names[0], names[1]) nnames.append(names[0]+'_'+names[1]) sum=0 for i in range(1, len(names)): for j in range(i+1, len(names)): dis1=np.c_[dis1, distance_all_2D(data1, names[i], names[j])] nnames.append(names[i]+'_'+names[j]) sum+=1 print(sum, 'of 231') df_dis1=pd.DataFrame(dis1) df_dis1.columns=nnames df_dis1=pd.concat([df_dis1], keys=['distance'], axis=1, names=['coords']) df_dis1.columns.names=['coords', 'bodyparts'] dis_vel1=np.diff(df_dis1[df_dis1.columns[0]]) for i in df_dis1.columns[1:]: dis_vel1=np.c_[dis_vel1, np.diff(df_dis1[i])] dis_vel1=np.r_[dis_vel1, [[0]*dis_vel1.shape[1]]] df_dis_vel1=pd.DataFrame(dis_vel1) df_dis_vel1.columns=nnames df_dis_vel1=pd.concat([df_dis_vel1], keys=['distance_velocity'], axis=1, names=['coords']) df_dis_vel1.columns.names=['coords', 'bodyparts'] dis_acc1=np.diff(df_dis_vel1[df_dis_vel1.columns[0]]) for i in df_dis_vel1.columns[1:]: dis_acc1=np.c_[dis_acc1, np.diff(df_dis_vel1[i])] dis_acc1=np.r_[dis_acc1, [[0]*dis_acc1.shape[1], [0]*dis_acc1.shape[1]]] df_dis_acc1=pd.DataFrame(dis_acc1) df_dis_acc1.columns=nnames df_dis_acc1=pd.concat([df_dis_acc1], keys=['distance_acceleration'], axis=1, names=['coords']) df_dis_acc1.columns.names=['coords', 'bodyparts'] df_vel_acc1=pd.concat((df_vel1, df_acc1, df_angle1, df_angle_vel1, df_angle_acc1, df_dis1, df_dis_vel1, df_dis_acc1), axis=1) names=[] for i in range(0, len(df_vel_acc1.columns)): names.append(df_vel_acc1.columns[i][0]+'_'+df_vel_acc1.columns[i][1]) df_vel_acc1.columns=names df_vel_acc1=df_vel_acc1.interpolate(method='polynomial', order=1) df_vel_acc1=df_vel_acc1.fillna(0) del df_vel1 del df_acc1 del acc1 del vel1 del angle1 del df_angle1 del angle_vel1 del df_angle_vel1 del angle_acc1 del df_angle_acc1 del dis1 del df_dis1 del dis_vel1 del df_dis_vel1 del dis_acc1 del df_dis_acc1 names=[i[0] for i in data2.columns[::2]] nnames=[] angle2=angle_all_2D(data2, names[0], names[1], names[2]) nnames.append(names[0]+'_'+names[1]+'_'+names[2]) sum=0 for i in range(1, len(names)): for j in range(i+1, len(names)): for k in range(i+2, len(names)): if i!=j!=k: angle2=np.c_[angle2,angle_all_2D(data2, names[i], names[j], names[k])] nnames.append(names[i]+'_'+names[j]+'_'+names[k]) sum+=1 print(sum, ' of 2870 angles') df_angle2=pd.DataFrame(angle2) df_angle2.columns=nnames df_angle2=pd.concat([df_angle2], keys=['angle'], axis=1, names=['coords']) df_angle2.columns.names=['coords', 'bodyparts'] angle_vel2=np.diff(df_angle2[df_angle2.columns[0]]) for i in df_angle2.columns[1:]: angle_vel2=np.c_[angle_vel2, np.diff(df_angle2[i])] angle_vel2=np.r_[angle_vel2, [[0]*angle_vel2.shape[1]]] df_angle_vel2=pd.DataFrame(angle_vel2) df_angle_vel2.columns=nnames df_angle_vel2=pd.concat([df_angle_vel2], keys=['angle_velocity'], axis=1, names=['coords']) df_angle_vel2.columns.names=['coords', 'bodyparts'] angle_acc2=np.diff(df_angle_vel2[df_angle_vel2.columns[0]]) for i in df_angle_vel2.columns[1:]: angle_acc2=np.c_[angle_acc2, np.diff(df_angle_vel2[i])] angle_acc2=np.r_[angle_acc2, [[0]*angle_acc2.shape[1], [0]*angle_acc2.shape[1]]] df_angle_acc2=pd.DataFrame(angle_acc2) df_angle_acc2.columns=nnames df_angle_acc2=pd.concat([df_angle_acc2], keys=['angle_acceleration'], axis=1, names=['coords']) df_angle_acc2.columns.names=['coords', 'bodyparts'] nnames=[] dis2=distance_all_2D(data2, names[0], names[1]) nnames.append(names[0]+'_'+names[1]) sum=0 for i in range(1, len(names)): for j in range(i+1, len(names)): dis2=np.c_[dis2, distance_all_2D(data2, names[i], names[j])] nnames.append(names[i]+'_'+names[j]) sum+=1 print(sum, 'of 231') df_dis2=pd.DataFrame(dis2) df_dis2.columns=nnames df_dis2=pd.concat([df_dis2], keys=['distance'], axis=1, names=['coords']) df_dis2.columns.names=['coords', 'bodyparts'] dis_vel2=np.diff(df_dis2[df_dis2.columns[0]]) for i in df_dis2.columns[1:]: dis_vel2=np.c_[dis_vel2, np.diff(df_dis2[i])] dis_vel2=np.r_[dis_vel2, [[0]*dis_vel2.shape[1]]] df_dis_vel2=pd.DataFrame(dis_vel2) df_dis_vel2.columns=nnames df_dis_vel2=pd.concat([df_dis_vel2], keys=['distance_velocity'], axis=1, names=['coords']) df_dis_vel2.columns.names=['coords', 'bodyparts'] dis_acc2=np.diff(df_dis_vel2[df_dis_vel2.columns[0]]) for i in df_dis_vel2.columns[1:]: dis_acc2=np.c_[dis_acc2, np.diff(df_dis_vel2[i])] dis_acc2=np.r_[dis_acc2, [[0]*dis_acc2.shape[1], [0]*dis_acc2.shape[1]]] df_dis_acc2=pd.DataFrame(dis_acc2) df_dis_acc2.columns=nnames df_dis_acc2=pd.concat([df_dis_acc2], keys=['distance_acceleration'], axis=1, names=['coords']) df_dis_acc2.columns.names=['coords', 'bodyparts'] df_vel_acc2=pd.concat((df_vel2, df_acc2, df_angle2, df_angle_vel2, df_angle_acc2, df_dis2, df_dis_vel2, df_dis_acc2), axis=1) names=[] for i in range(0, len(df_vel_acc2.columns)): names.append(df_vel_acc2.columns[i][0]+'_'+df_vel_acc2.columns[i][1]) df_vel_acc2.columns=names df_vel_acc2=df_vel_acc2.interpolate(method='polynomial', order=1) df_vel_acc2=df_vel_acc2.fillna(0) del df_vel2 del df_acc2 del acc2 del vel2 del angle2 del df_angle2 del angle_vel2 del df_angle_vel2 del angle_acc2 del df_angle_acc2 del dis2 del df_dis2 del dis_vel2 del df_dis_vel2 del dis_acc2 del df_dis_acc2 if likeli1[0]>likeli2[0]: df_big=df_vel_acc1.iloc[[0]] else: df_big=df_vel_acc2.iloc[[0]] for i in range(1, len(likeli1)): if (ilikeli2[i]: df_big=np.r_[df_big, df_vel_acc1.iloc[[i]]] elif likeli2[i]>likeli1[i]: df_big=np.r_[df_big, df_vel_acc2.iloc[[i]]] else: df_big=np.r_[df_big, df_vel_acc1.iloc[[i]]] df_big=pd.DataFrame(df_big) df_big.columns=df_vel_acc1.columns vid1=[] for j in [mr_cyl_touch_1, mr_cyl_touch_2, mr_cyl_touch_3]: df=j.fillna(0) for i in df.loc[z, :]: if i>0: vid1.append(i) y_touch=np.zeros(shape=(len(df_big),)) for i in vid1: y_touch[int(i)]=1 if np.count_nonzero(y_touch)>=6: over = SMOTE(sampling_strategy=0.1) under = RandomUnderSampler(sampling_strategy=0.2) steps = [('o', over), ('u', under)] pipeline = Pipeline(steps=steps) X_touch, y_touch = pipeline.fit_resample(df_big, y_touch) acc_score_touch = 0 for i in range(100): if (i+1) % 10 == 0: print("iterations: {}/100".format(i+1)) X_touch_train, X_touch_test, y_touch_train, y_touch_test = train_test_split(X_touch, y_touch) sc = StandardScaler() X_touch_train = sc.fit_transform(X_touch_train) X_touch_test = sc.transform(X_touch_test) rfc_touch = RandomForestClassifier(bootstrap=True).fit(X_touch_train, y_touch_train) pred_rfc_touch = rfc_touch.predict(X_touch_test) if accuracy_score(y_touch_test, pred_rfc_touch) > acc_score_touch: best_model_touch = rfc_touch acc_score_touch = accuracy_score(y_touch_test, pred_rfc_touch) result_touch = permutation_importance(best_model_touch, X_touch_test, y_touch_test, n_repeats=10, random_state=42) vid2=[] for j in [mr_cyl_drag_1, mr_cyl_drag_2, mr_cyl_drag_3]: df=j.fillna(0) for i in df.loc[z, :]: if i>0: vid2.append(i) y_drag=np.zeros(shape=(len(df_big),)) for i in vid2: y_drag[int(i)]=1 if np.count_nonzero(y_drag)>=6: over = SMOTE(sampling_strategy=0.1) under = RandomUnderSampler(sampling_strategy=0.2) steps = [('o', over), ('u', under)] pipeline = Pipeline(steps=steps) X_drag, y_drag = pipeline.fit_resample(df_big, y_drag) acc_score_drag = 0 for i in range(100): if (i+1) % 10 == 0: print("iterations: {}/100".format(i+1)) X_drag_train, X_drag_test, y_drag_train, y_drag_test = train_test_split(X_drag, y_drag) sc = StandardScaler() X_drag_train = sc.fit_transform(X_drag_train) X_drag_test = sc.transform(X_drag_test) rfc_drag = RandomForestClassifier(bootstrap=True).fit(X_drag_train, y_drag_train) pred_rfc_drag = rfc_drag.predict(X_drag_test) if accuracy_score(y_drag_test, pred_rfc_drag) > acc_score_drag: best_model_drag = rfc_drag acc_score_drag = accuracy_score(y_drag_test, pred_rfc_drag) result_drag = permutation_importance(best_model_drag, X_drag_test, y_drag_test, n_repeats=10, random_state=42) dill.dump_session('CY_2D_'+z+'_data.pkl') print(z+' ready!')