FI_GW_3D.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import matplotlib.pyplot as plt
  2. from sklearn.datasets import make_classification
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.ensemble import RandomForestClassifier
  5. from sklearn.inspection import permutation_importance
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.metrics import accuracy_score
  8. import numpy as np
  9. import pandas as pd
  10. from master_funcs import *
  11. from imblearn.over_sampling import SMOTE
  12. from imblearn.under_sampling import RandomUnderSampler
  13. from imblearn.pipeline import Pipeline
  14. from imblearn.over_sampling import SMOTE
  15. import dill
  16. mr_gw_1=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Jan.csv", index_col=0, delimiter=',', header=None)
  17. mr_gw_2=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Nicole.csv", index_col=0, delimiter=',', header=None)
  18. mr_gw_3=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Jule.csv", index_col=0, delimiter=',', header=None)
  19. patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3']
  20. for z in patients:
  21. GW_3D='/mnt/DLC/DLC/3D_videos/GW_3D/'+z+'_P3_GW_DLC_3D.csv'
  22. data=pd.read_csv(GW_3D, delimiter=',', skiprows=0, index_col=0, header=[1,2])
  23. coln=data.columns
  24. zero='grid_top_left'
  25. x='grid_top_right'
  26. y='grid_bottom_left'
  27. data=transform(data, zero, x, y)
  28. vel, acc=vel_acc(data, '3D')
  29. names=[i[0] for i in data.columns[::3]]
  30. df_vel=pd.DataFrame(vel)
  31. df_vel.columns=names
  32. df_vel=pd.concat([df_vel], keys=['velocity'], axis=1, names=['coords'])
  33. df_vel.columns.names=['coords', 'bodyparts']
  34. df_acc=pd.DataFrame(acc)
  35. df_acc.columns=names
  36. df_acc=pd.concat([df_acc], keys=['acceleration'], axis=1, names=['coords'])
  37. df_acc.columns.names=['coords', 'bodyparts']
  38. nnames=[]
  39. angle=angle_all(data, names[0], names[1], names[2])
  40. nnames.append(names[0]+'_'+names[1]+'_'+names[2])
  41. sum=0
  42. for i in range(1, len(names)):
  43. for j in range(i+1, len(names)):
  44. for k in range(i+2, len(names)):
  45. if i!=j!=k:
  46. angle=np.c_[angle,angle_all(data, names[i], names[j], names[k])]
  47. nnames.append(names[i]+'_'+names[j]+'_'+names[k])
  48. sum+=1
  49. print(sum, ' of 2870 angles')
  50. df_angle=pd.DataFrame(angle)
  51. df_angle.columns=nnames
  52. df_angle=pd.concat([df_angle], keys=['angle'], axis=1, names=['coords'])
  53. df_angle.columns.names=['coords', 'bodyparts']
  54. angle_vel=np.diff(df_angle[df_angle.columns[0]])
  55. for i in df_angle.columns[1:]:
  56. angle_vel=np.c_[angle_vel, np.diff(df_angle[i])]
  57. angle_vel=np.r_[angle_vel, [[0]*angle_vel.shape[1]]]
  58. df_angle_vel=pd.DataFrame(angle_vel)
  59. df_angle_vel.columns=nnames
  60. df_angle_vel=pd.concat([df_angle_vel], keys=['angle_velocity'], axis=1, names=['coords'])
  61. df_angle_vel.columns.names=['coords', 'bodyparts']
  62. angle_acc=np.diff(df_angle_vel[df_angle_vel.columns[0]])
  63. for i in df_angle_vel.columns[1:]:
  64. angle_acc=np.c_[angle_acc, np.diff(df_angle_vel[i])]
  65. angle_acc=np.r_[angle_acc, [[0]*angle_acc.shape[1], [0]*angle_acc.shape[1]]]
  66. df_angle_acc=pd.DataFrame(angle_acc)
  67. df_angle_acc.columns=nnames
  68. df_angle_acc=pd.concat([df_angle_acc], keys=['angle_acceleration'], axis=1, names=['coords'])
  69. df_angle_acc.columns.names=['coords', 'bodyparts']
  70. nnames=[]
  71. dis=distance_all(data, names[0], names[1])
  72. nnames.append(names[0]+'_'+names[1])
  73. sum=0
  74. for i in range(1, len(names)):
  75. for j in range(i+1, len(names)):
  76. dis=np.c_[dis, distance_all(data, names[i], names[j])]
  77. nnames.append(names[i]+'_'+names[j])
  78. sum+=1
  79. print(sum, 'of 231')
  80. df_dis=pd.DataFrame(dis)
  81. df_dis.columns=nnames
  82. df_dis=pd.concat([df_dis], keys=['distance'], axis=1, names=['coords'])
  83. df_dis.columns.names=['coords', 'bodyparts']
  84. dis_vel=np.diff(df_dis[df_dis.columns[0]])
  85. for i in df_dis.columns[1:]:
  86. dis_vel=np.c_[dis_vel, np.diff(df_dis[i])]
  87. dis_vel=np.r_[dis_vel, [[0]*dis_vel.shape[1]]]
  88. df_dis_vel=pd.DataFrame(dis_vel)
  89. df_dis_vel.columns=nnames
  90. df_dis_vel=pd.concat([df_dis_vel], keys=['distance_velocity'], axis=1, names=['coords'])
  91. df_dis_vel.columns.names=['coords', 'bodyparts']
  92. dis_acc=np.diff(df_dis_vel[df_dis_vel.columns[0]])
  93. for i in df_dis_vel.columns[1:]:
  94. dis_acc=np.c_[dis_acc, np.diff(df_dis_vel[i])]
  95. dis_acc=np.r_[dis_acc, [[0]*dis_acc.shape[1], [0]*dis_acc.shape[1]]]
  96. df_dis_acc=pd.DataFrame(dis_acc)
  97. df_dis_acc.columns=nnames
  98. df_dis_acc=pd.concat([df_dis_acc], keys=['distance_acceleration'], axis=1, names=['coords'])
  99. df_dis_acc.columns.names=['coords', 'bodyparts']
  100. df_vel_acc=pd.concat((df_vel, df_acc, df_angle, df_angle_vel, df_angle_acc, df_dis, df_dis_vel, df_dis_acc), axis=1)
  101. names=[]
  102. for i in range(0, len(df_vel_acc.columns)):
  103. names.append(df_vel_acc.columns[i][0]+'_'+df_vel_acc.columns[i][1])
  104. df_vel_acc.columns=names
  105. vid1=[]
  106. for j in [mr_gw_1, mr_gw_2, mr_gw_3]:
  107. df=j.fillna(0)
  108. for i in df.loc[z, 2:]:
  109. if i>0:
  110. vid1.append(i)
  111. y=np.zeros(shape=(len(df_vel_acc),))
  112. for i in vid1:
  113. y[int(i)]=1
  114. df_vel_acc=df_vel_acc.interpolate(method='polynomial', order=1)
  115. df_vel_acc=df_vel_acc.fillna(0)
  116. over = SMOTE(sampling_strategy=0.1)
  117. under = RandomUnderSampler(sampling_strategy=0.2)
  118. steps = [('o', over), ('u', under)]
  119. pipeline = Pipeline(steps=steps)
  120. X, y = pipeline.fit_resample(df_vel_acc, y)
  121. acc_score = 0
  122. for i in range(100):
  123. if (i+1) % 10 == 0:
  124. print("iterations: {}/100".format(i+1))
  125. X_train, X_test, y_train, y_test = train_test_split(X, y)
  126. sc = StandardScaler()
  127. X_train = sc.fit_transform(X_train)
  128. X_test = sc.transform(X_test)
  129. rfc = RandomForestClassifier(bootstrap=True).fit(X_train, y_train)
  130. pred_rfc = rfc.predict(X_test)
  131. if accuracy_score(y_test, pred_rfc) > acc_score:
  132. best_model = rfc
  133. acc_score = accuracy_score(y_test, pred_rfc)
  134. result = permutation_importance(best_model, X_test, y_test, n_repeats=10, random_state=42)
  135. dill.dump_session('GW_'+z+'_data.pkl')
  136. print(z+' ready!')