FI_GW_2D.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. import matplotlib.pyplot as plt
  2. from sklearn.datasets import make_classification
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.ensemble import RandomForestClassifier
  5. from sklearn.inspection import permutation_importance
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.metrics import accuracy_score
  8. import numpy as np
  9. import pandas as pd
  10. from master_funcs import *
  11. from imblearn.over_sampling import SMOTE
  12. from imblearn.under_sampling import RandomUnderSampler
  13. from imblearn.pipeline import Pipeline
  14. from imblearn.over_sampling import SMOTE
  15. import dill
  16. mr_gw_1=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Jan.csv", index_col=0, delimiter=',', header=None)
  17. mr_gw_2=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Nicole.csv", index_col=0, delimiter=',', header=None)
  18. mr_gw_3=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_GridWalk_Analyse_Jule.csv", index_col=0, delimiter=',', header=None)
  19. patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3']
  20. for z in patients:
  21. path1='/mnt/DLC/DLC/3D_videos/GW_2D/'+z+'_P3_GW_camera-1DLC_resnet50_GridWalk_camera-1Jun15shuffle1_250000.csv'
  22. path2='/mnt/DLC/DLC/3D_videos/GW_2D/'+z+'_P3_GW_camera-2DLC_resnet50_GridWalk_camera-2Jun15shuffle1_250000.csv'
  23. data1=prep_dlc(path1, 0.9, 1080)
  24. data2=prep_dlc(path2, 0.9, 1080)
  25. vel1, acc1=vel_acc(data1, '2D')
  26. vel2, acc2=vel_acc(data2, '2D')
  27. names=[i[0] for i in data1.columns[::3]]
  28. likeli1=np.array(np.zeros(shape=(len(data1,))))
  29. likeli2=np.array(np.zeros(shape=(len(data2,))))
  30. for j in names:
  31. data1[j, 'x']=data1[j, 'x'].interpolate(method='polynomial', order=1)
  32. data1[j, 'y']=data1[j, 'y'].interpolate(method='polynomial', order=1)
  33. likeli1+=data1[j, 'likelihood']
  34. data1=data1.drop([(j, 'likelihood')], axis=1)
  35. data2[j, 'x']=data2[j, 'x'].interpolate(method='polynomial', order=1)
  36. data2[j, 'y']=data2[j, 'y'].interpolate(method='polynomial', order=1)
  37. likeli2+=data2[j, 'likelihood']
  38. data2=data2.drop([(j, 'likelihood')], axis=1)
  39. df_vel1=pd.DataFrame(vel1)
  40. df_vel1.columns=names
  41. df_vel1=pd.concat([df_vel1], keys=['velocity'], axis=1, names=['coords'])
  42. df_vel1.columns.names=['coords', 'bodyparts']
  43. df_vel2=pd.DataFrame(vel2)
  44. df_vel2.columns=names
  45. df_vel2=pd.concat([df_vel2], keys=['velocity'], axis=1, names=['coords'])
  46. df_vel2.columns.names=['coords', 'bodyparts']
  47. df_acc1=pd.DataFrame(acc1)
  48. df_acc1.columns=names
  49. df_acc1=pd.concat([df_acc1], keys=['accocity'], axis=1, names=['coords'])
  50. df_acc1.columns.names=['coords', 'bodyparts']
  51. df_acc2=pd.DataFrame(acc2)
  52. df_acc2.columns=names
  53. df_acc2=pd.concat([df_acc2], keys=['accocity'], axis=1, names=['coords'])
  54. df_acc2.columns.names=['coords', 'bodyparts']
  55. nnames=[]
  56. angle1=angle_all_2D(data1, names[0], names[1], names[2])
  57. nnames.append(names[0]+'_'+names[1]+'_'+names[2])
  58. sum=0
  59. for i in range(1, len(names)):
  60. for j in range(i+1, len(names)):
  61. for k in range(i+2, len(names)):
  62. if i!=j!=k:
  63. angle1=np.c_[angle1,angle_all_2D(data1, names[i], names[j], names[k])]
  64. nnames.append(names[i]+'_'+names[j]+'_'+names[k])
  65. sum+=1
  66. print(sum, ' of 2870 angles')
  67. df_angle1=pd.DataFrame(angle1)
  68. df_angle1.columns=nnames
  69. df_angle1=pd.concat([df_angle1], keys=['angle'], axis=1, names=['coords'])
  70. df_angle1.columns.names=['coords', 'bodyparts']
  71. angle_vel1=np.diff(df_angle1[df_angle1.columns[0]])
  72. for i in df_angle1.columns[1:]:
  73. angle_vel1=np.c_[angle_vel1, np.diff(df_angle1[i])]
  74. angle_vel1=np.r_[angle_vel1, [[0]*angle_vel1.shape[1]]]
  75. df_angle_vel1=pd.DataFrame(angle_vel1)
  76. df_angle_vel1.columns=nnames
  77. df_angle_vel1=pd.concat([df_angle_vel1], keys=['angle_velocity'], axis=1, names=['coords'])
  78. df_angle_vel1.columns.names=['coords', 'bodyparts']
  79. angle_acc1=np.diff(df_angle_vel1[df_angle_vel1.columns[0]])
  80. for i in df_angle_vel1.columns[1:]:
  81. angle_acc1=np.c_[angle_acc1, np.diff(df_angle_vel1[i])]
  82. angle_acc1=np.r_[angle_acc1, [[0]*angle_acc1.shape[1], [0]*angle_acc1.shape[1]]]
  83. df_angle_acc1=pd.DataFrame(angle_acc1)
  84. df_angle_acc1.columns=nnames
  85. df_angle_acc1=pd.concat([df_angle_acc1], keys=['angle_acceleration'], axis=1, names=['coords'])
  86. df_angle_acc1.columns.names=['coords', 'bodyparts']
  87. nnames=[]
  88. dis1=distance_all_2D(data1, names[0], names[1])
  89. nnames.append(names[0]+'_'+names[1])
  90. sum=0
  91. for i in range(1, len(names)):
  92. for j in range(i+1, len(names)):
  93. dis1=np.c_[dis1, distance_all_2D(data1, names[i], names[j])]
  94. nnames.append(names[i]+'_'+names[j])
  95. sum+=1
  96. print(sum, 'of 231')
  97. df_dis1=pd.DataFrame(dis1)
  98. df_dis1.columns=nnames
  99. df_dis1=pd.concat([df_dis1], keys=['distance'], axis=1, names=['coords'])
  100. df_dis1.columns.names=['coords', 'bodyparts']
  101. dis_vel1=np.diff(df_dis1[df_dis1.columns[0]])
  102. for i in df_dis1.columns[1:]:
  103. dis_vel1=np.c_[dis_vel1, np.diff(df_dis1[i])]
  104. dis_vel1=np.r_[dis_vel1, [[0]*dis_vel1.shape[1]]]
  105. df_dis_vel1=pd.DataFrame(dis_vel1)
  106. df_dis_vel1.columns=nnames
  107. df_dis_vel1=pd.concat([df_dis_vel1], keys=['distance_velocity'], axis=1, names=['coords'])
  108. df_dis_vel1.columns.names=['coords', 'bodyparts']
  109. dis_acc1=np.diff(df_dis_vel1[df_dis_vel1.columns[0]])
  110. for i in df_dis_vel1.columns[1:]:
  111. dis_acc1=np.c_[dis_acc1, np.diff(df_dis_vel1[i])]
  112. dis_acc1=np.r_[dis_acc1, [[0]*dis_acc1.shape[1], [0]*dis_acc1.shape[1]]]
  113. df_dis_acc1=pd.DataFrame(dis_acc1)
  114. df_dis_acc1.columns=nnames
  115. df_dis_acc1=pd.concat([df_dis_acc1], keys=['distance_acceleration'], axis=1, names=['coords'])
  116. df_dis_acc1.columns.names=['coords', 'bodyparts']
  117. df_vel_acc1=pd.concat((df_vel1, df_acc1, df_angle1, df_angle_vel1, df_angle_acc1, df_dis1, df_dis_vel1, df_dis_acc1), axis=1)
  118. names=[]
  119. for i in range(0, len(df_vel_acc1.columns)):
  120. names.append(df_vel_acc1.columns[i][0]+'_'+df_vel_acc1.columns[i][1])
  121. df_vel_acc1.columns=names
  122. df_vel_acc1=df_vel_acc1.interpolate(method='polynomial', order=1)
  123. df_vel_acc1=df_vel_acc1.fillna(0)
  124. del df_vel1
  125. del df_acc1
  126. del acc1
  127. del vel1
  128. del angle1
  129. del df_angle1
  130. del angle_vel1
  131. del df_angle_vel1
  132. del angle_acc1
  133. del df_angle_acc1
  134. del dis1
  135. del df_dis1
  136. del dis_vel1
  137. del df_dis_vel1
  138. del dis_acc1
  139. del df_dis_acc1
  140. names=[i[0] for i in data2.columns[::2]]
  141. nnames=[]
  142. angle2=angle_all_2D(data2, names[0], names[1], names[2])
  143. nnames.append(names[0]+'_'+names[1]+'_'+names[2])
  144. sum=0
  145. for i in range(1, len(names)):
  146. for j in range(i+1, len(names)):
  147. for k in range(i+2, len(names)):
  148. if i!=j!=k:
  149. angle2=np.c_[angle2,angle_all_2D(data2, names[i], names[j], names[k])]
  150. nnames.append(names[i]+'_'+names[j]+'_'+names[k])
  151. sum+=1
  152. print(sum, ' of 2870 angles')
  153. df_angle2=pd.DataFrame(angle2)
  154. df_angle2.columns=nnames
  155. df_angle2=pd.concat([df_angle2], keys=['angle'], axis=1, names=['coords'])
  156. df_angle2.columns.names=['coords', 'bodyparts']
  157. angle_vel2=np.diff(df_angle2[df_angle2.columns[0]])
  158. for i in df_angle2.columns[1:]:
  159. angle_vel2=np.c_[angle_vel2, np.diff(df_angle2[i])]
  160. angle_vel2=np.r_[angle_vel2, [[0]*angle_vel2.shape[1]]]
  161. df_angle_vel2=pd.DataFrame(angle_vel2)
  162. df_angle_vel2.columns=nnames
  163. df_angle_vel2=pd.concat([df_angle_vel2], keys=['angle_velocity'], axis=1, names=['coords'])
  164. df_angle_vel2.columns.names=['coords', 'bodyparts']
  165. angle_acc2=np.diff(df_angle_vel2[df_angle_vel2.columns[0]])
  166. for i in df_angle_vel2.columns[1:]:
  167. angle_acc2=np.c_[angle_acc2, np.diff(df_angle_vel2[i])]
  168. angle_acc2=np.r_[angle_acc2, [[0]*angle_acc2.shape[1], [0]*angle_acc2.shape[1]]]
  169. df_angle_acc2=pd.DataFrame(angle_acc2)
  170. df_angle_acc2.columns=nnames
  171. df_angle_acc2=pd.concat([df_angle_acc2], keys=['angle_acceleration'], axis=1, names=['coords'])
  172. df_angle_acc2.columns.names=['coords', 'bodyparts']
  173. nnames=[]
  174. dis2=distance_all_2D(data2, names[0], names[1])
  175. nnames.append(names[0]+'_'+names[1])
  176. sum=0
  177. for i in range(1, len(names)):
  178. for j in range(i+1, len(names)):
  179. dis2=np.c_[dis2, distance_all_2D(data2, names[i], names[j])]
  180. nnames.append(names[i]+'_'+names[j])
  181. sum+=1
  182. print(sum, 'of 231')
  183. df_dis2=pd.DataFrame(dis2)
  184. df_dis2.columns=nnames
  185. df_dis2=pd.concat([df_dis2], keys=['distance'], axis=1, names=['coords'])
  186. df_dis2.columns.names=['coords', 'bodyparts']
  187. dis_vel2=np.diff(df_dis2[df_dis2.columns[0]])
  188. for i in df_dis2.columns[1:]:
  189. dis_vel2=np.c_[dis_vel2, np.diff(df_dis2[i])]
  190. dis_vel2=np.r_[dis_vel2, [[0]*dis_vel2.shape[1]]]
  191. df_dis_vel2=pd.DataFrame(dis_vel2)
  192. df_dis_vel2.columns=nnames
  193. df_dis_vel2=pd.concat([df_dis_vel2], keys=['distance_velocity'], axis=1, names=['coords'])
  194. df_dis_vel2.columns.names=['coords', 'bodyparts']
  195. dis_acc2=np.diff(df_dis_vel2[df_dis_vel2.columns[0]])
  196. for i in df_dis_vel2.columns[1:]:
  197. dis_acc2=np.c_[dis_acc2, np.diff(df_dis_vel2[i])]
  198. dis_acc2=np.r_[dis_acc2, [[0]*dis_acc2.shape[1], [0]*dis_acc2.shape[1]]]
  199. df_dis_acc2=pd.DataFrame(dis_acc2)
  200. df_dis_acc2.columns=nnames
  201. df_dis_acc2=pd.concat([df_dis_acc2], keys=['distance_acceleration'], axis=1, names=['coords'])
  202. df_dis_acc2.columns.names=['coords', 'bodyparts']
  203. df_vel_acc2=pd.concat((df_vel2, df_acc2, df_angle2, df_angle_vel2, df_angle_acc2, df_dis2, df_dis_vel2, df_dis_acc2), axis=1)
  204. names=[]
  205. for i in range(0, len(df_vel_acc2.columns)):
  206. names.append(df_vel_acc2.columns[i][0]+'_'+df_vel_acc2.columns[i][1])
  207. df_vel_acc2.columns=names
  208. df_vel_acc2=df_vel_acc2.interpolate(method='polynomial', order=1)
  209. df_vel_acc2=df_vel_acc2.fillna(0)
  210. del df_vel2
  211. del df_acc2
  212. del acc2
  213. del vel2
  214. del angle2
  215. del df_angle2
  216. del angle_vel2
  217. del df_angle_vel2
  218. del angle_acc2
  219. del df_angle_acc2
  220. del dis2
  221. del df_dis2
  222. del dis_vel2
  223. del df_dis_vel2
  224. del dis_acc2
  225. del df_dis_acc2
  226. if likeli1[0]>likeli2[0]:
  227. df_big=df_vel_acc1.iloc[[0]]
  228. else:
  229. df_big=df_vel_acc2.iloc[[0]]
  230. for i in range(1, len(likeli1)):
  231. if (i<=df_vel_acc1.shape[0]-1) and (i<=df_vel_acc2.shape[0]-1):
  232. if likeli1[i]>likeli2[i]:
  233. df_big=np.r_[df_big, df_vel_acc1.iloc[[i]]]
  234. elif likeli2[i]>likeli1[i]:
  235. df_big=np.r_[df_big, df_vel_acc2.iloc[[i]]]
  236. else:
  237. df_big=np.r_[df_big, df_vel_acc1.iloc[[i]]]
  238. df_big=pd.DataFrame(df_big)
  239. df_big.columns=df_vel_acc1.columns
  240. vid1=[]
  241. for j in [mr_gw_1, mr_gw_2, mr_gw_3]:
  242. df=j.fillna(0)
  243. for i in df.loc[z, :]:
  244. if i>0:
  245. vid1.append(i)
  246. y=np.zeros(shape=(len(df_big),))
  247. for i in vid1:
  248. y[int(i)]=1
  249. if np.count_nonzero(y)>=6:
  250. over = SMOTE(sampling_strategy=0.1)
  251. under = RandomUnderSampler(sampling_strategy=0.2)
  252. steps = [('o', over), ('u', under)]
  253. pipeline = Pipeline(steps=steps)
  254. X, y = pipeline.fit_resample(df_big, y)
  255. acc_score = 0
  256. for i in range(100):
  257. if (i+1) % 10 == 0:
  258. print("iterations: {}/100".format(i+1))
  259. X_train, X_test, y_train, y_test = train_test_split(X, y)
  260. sc = StandardScaler()
  261. X_train = sc.fit_transform(X_train)
  262. X_test = sc.transform(X_test)
  263. rfc = RandomForestClassifier(bootstrap=True).fit(X_train, y_train)
  264. pred_rfc = rfc.predict(X_test)
  265. if accuracy_score(y_test, pred_rfc) > acc_score:
  266. best_model = rfc
  267. acc_score = accuracy_score(y_test, pred_rfc)
  268. result = permutation_importance(best_model, X_test, y_test, n_repeats=10, random_state=42)
  269. dill.dump_session('GW_2D_'+z+'_data.pkl')
  270. print(z+' ready!')