FI_CY_2D.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. import matplotlib.pyplot as plt
  2. from sklearn.datasets import make_classification
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.ensemble import RandomForestClassifier
  5. from sklearn.inspection import permutation_importance
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.metrics import accuracy_score
  8. import numpy as np
  9. import pandas as pd
  10. from master_funcs import *
  11. from imblearn.over_sampling import SMOTE
  12. from imblearn.under_sampling import RandomUnderSampler
  13. from imblearn.pipeline import Pipeline
  14. from imblearn.over_sampling import SMOTE
  15. import dill
  16. mr_cyl_touch_1=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jan_touch.csv", index_col=0, delimiter=',', header=None)
  17. mr_cyl_touch_2=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Nicole_touch.csv", index_col=0, delimiter=',', header=None)
  18. mr_cyl_touch_3=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jule_touch.csv", index_col=0, delimiter=',', header=None)
  19. mr_cyl_drag_1=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jan_drag.csv", index_col=0, delimiter=',', header=None)
  20. mr_cyl_drag_2=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Nicole_drag.csv", index_col=0, delimiter=',', header=None)
  21. mr_cyl_drag_3=pd.read_csv("/mnt/DLC/DLC/3D_videos/manual_raters/Manual_Cylinder_Analyse_Jule_drag.csv", index_col=0, delimiter=',', header=None)
  22. patients=['PB_T2_3_1', 'PB_T2_3_2', 'PB_T2_3_3', 'PB_T2_4_2', 'PB_T2_5_1', 'PB_T2_6_1', 'PB_T2_6_2', 'PB_T3_23_1', 'PB_T3_23_2', 'PB_T3_23_3', 'PB_T3_24_1', 'PB_T3_24_2', 'PB_T3_24_3']
  23. for z in patients:
  24. path1='/mnt/DLC/DLC/3D_videos/CY_2D/'+z+'_P3_CY_camera-1DLC_resnet50_Cylinder_camera-1Jun15shuffle1_250000_filtered.csv'
  25. path2='/mnt/DLC/DLC/3D_videos/CY_2D/'+z+'_P3_CY_camera-2DLC_resnet50_Cylinder_camera-2Jun15shuffle1_250000_filtered.csv'
  26. data1=prep_dlc(path1, 0.9, 1080)
  27. data2=prep_dlc(path2, 0.9, 1080)
  28. vel1, acc1=vel_acc(data1, '2D')
  29. vel2, acc2=vel_acc(data2, '2D')
  30. names=[i[0] for i in data1.columns[::3]]
  31. likeli1=np.array(np.zeros(shape=(len(data1,))))
  32. likeli2=np.array(np.zeros(shape=(len(data2,))))
  33. for j in names:
  34. data1[j, 'x']=data1[j, 'x'].interpolate(method='polynomial', order=1)
  35. data1[j, 'y']=data1[j, 'y'].interpolate(method='polynomial', order=1)
  36. likeli1+=data1[j, 'likelihood']
  37. data1=data1.drop([(j, 'likelihood')], axis=1)
  38. data2[j, 'x']=data2[j, 'x'].interpolate(method='polynomial', order=1)
  39. data2[j, 'y']=data2[j, 'y'].interpolate(method='polynomial', order=1)
  40. likeli2+=data2[j, 'likelihood']
  41. data2=data2.drop([(j, 'likelihood')], axis=1)
  42. df_vel1=pd.DataFrame(vel1)
  43. df_vel1.columns=names
  44. df_vel1=pd.concat([df_vel1], keys=['velocity'], axis=1, names=['coords'])
  45. df_vel1.columns.names=['coords', 'bodyparts']
  46. df_vel2=pd.DataFrame(vel2)
  47. df_vel2.columns=names
  48. df_vel2=pd.concat([df_vel2], keys=['velocity'], axis=1, names=['coords'])
  49. df_vel2.columns.names=['coords', 'bodyparts']
  50. df_acc1=pd.DataFrame(acc1)
  51. df_acc1.columns=names
  52. df_acc1=pd.concat([df_acc1], keys=['accocity'], axis=1, names=['coords'])
  53. df_acc1.columns.names=['coords', 'bodyparts']
  54. df_acc2=pd.DataFrame(acc2)
  55. df_acc2.columns=names
  56. df_acc2=pd.concat([df_acc2], keys=['accocity'], axis=1, names=['coords'])
  57. df_acc2.columns.names=['coords', 'bodyparts']
  58. nnames=[]
  59. angle1=angle_all_2D(data1, names[0], names[1], names[2])
  60. nnames.append(names[0]+'_'+names[1]+'_'+names[2])
  61. sum=0
  62. for i in range(1, len(names)):
  63. for j in range(i+1, len(names)):
  64. for k in range(i+2, len(names)):
  65. if i!=j!=k:
  66. angle1=np.c_[angle1,angle_all_2D(data1, names[i], names[j], names[k])]
  67. nnames.append(names[i]+'_'+names[j]+'_'+names[k])
  68. sum+=1
  69. print(sum, ' of 2870 angles')
  70. df_angle1=pd.DataFrame(angle1)
  71. df_angle1.columns=nnames
  72. df_angle1=pd.concat([df_angle1], keys=['angle'], axis=1, names=['coords'])
  73. df_angle1.columns.names=['coords', 'bodyparts']
  74. angle_vel1=np.diff(df_angle1[df_angle1.columns[0]])
  75. for i in df_angle1.columns[1:]:
  76. angle_vel1=np.c_[angle_vel1, np.diff(df_angle1[i])]
  77. angle_vel1=np.r_[angle_vel1, [[0]*angle_vel1.shape[1]]]
  78. df_angle_vel1=pd.DataFrame(angle_vel1)
  79. df_angle_vel1.columns=nnames
  80. df_angle_vel1=pd.concat([df_angle_vel1], keys=['angle_velocity'], axis=1, names=['coords'])
  81. df_angle_vel1.columns.names=['coords', 'bodyparts']
  82. angle_acc1=np.diff(df_angle_vel1[df_angle_vel1.columns[0]])
  83. for i in df_angle_vel1.columns[1:]:
  84. angle_acc1=np.c_[angle_acc1, np.diff(df_angle_vel1[i])]
  85. angle_acc1=np.r_[angle_acc1, [[0]*angle_acc1.shape[1], [0]*angle_acc1.shape[1]]]
  86. df_angle_acc1=pd.DataFrame(angle_acc1)
  87. df_angle_acc1.columns=nnames
  88. df_angle_acc1=pd.concat([df_angle_acc1], keys=['angle_acceleration'], axis=1, names=['coords'])
  89. df_angle_acc1.columns.names=['coords', 'bodyparts']
  90. nnames=[]
  91. dis1=distance_all_2D(data1, names[0], names[1])
  92. nnames.append(names[0]+'_'+names[1])
  93. sum=0
  94. for i in range(1, len(names)):
  95. for j in range(i+1, len(names)):
  96. dis1=np.c_[dis1, distance_all_2D(data1, names[i], names[j])]
  97. nnames.append(names[i]+'_'+names[j])
  98. sum+=1
  99. print(sum, 'of 231')
  100. df_dis1=pd.DataFrame(dis1)
  101. df_dis1.columns=nnames
  102. df_dis1=pd.concat([df_dis1], keys=['distance'], axis=1, names=['coords'])
  103. df_dis1.columns.names=['coords', 'bodyparts']
  104. dis_vel1=np.diff(df_dis1[df_dis1.columns[0]])
  105. for i in df_dis1.columns[1:]:
  106. dis_vel1=np.c_[dis_vel1, np.diff(df_dis1[i])]
  107. dis_vel1=np.r_[dis_vel1, [[0]*dis_vel1.shape[1]]]
  108. df_dis_vel1=pd.DataFrame(dis_vel1)
  109. df_dis_vel1.columns=nnames
  110. df_dis_vel1=pd.concat([df_dis_vel1], keys=['distance_velocity'], axis=1, names=['coords'])
  111. df_dis_vel1.columns.names=['coords', 'bodyparts']
  112. dis_acc1=np.diff(df_dis_vel1[df_dis_vel1.columns[0]])
  113. for i in df_dis_vel1.columns[1:]:
  114. dis_acc1=np.c_[dis_acc1, np.diff(df_dis_vel1[i])]
  115. dis_acc1=np.r_[dis_acc1, [[0]*dis_acc1.shape[1], [0]*dis_acc1.shape[1]]]
  116. df_dis_acc1=pd.DataFrame(dis_acc1)
  117. df_dis_acc1.columns=nnames
  118. df_dis_acc1=pd.concat([df_dis_acc1], keys=['distance_acceleration'], axis=1, names=['coords'])
  119. df_dis_acc1.columns.names=['coords', 'bodyparts']
  120. df_vel_acc1=pd.concat((df_vel1, df_acc1, df_angle1, df_angle_vel1, df_angle_acc1, df_dis1, df_dis_vel1, df_dis_acc1), axis=1)
  121. names=[]
  122. for i in range(0, len(df_vel_acc1.columns)):
  123. names.append(df_vel_acc1.columns[i][0]+'_'+df_vel_acc1.columns[i][1])
  124. df_vel_acc1.columns=names
  125. df_vel_acc1=df_vel_acc1.interpolate(method='polynomial', order=1)
  126. df_vel_acc1=df_vel_acc1.fillna(0)
  127. del df_vel1
  128. del df_acc1
  129. del acc1
  130. del vel1
  131. del angle1
  132. del df_angle1
  133. del angle_vel1
  134. del df_angle_vel1
  135. del angle_acc1
  136. del df_angle_acc1
  137. del dis1
  138. del df_dis1
  139. del dis_vel1
  140. del df_dis_vel1
  141. del dis_acc1
  142. del df_dis_acc1
  143. names=[i[0] for i in data2.columns[::2]]
  144. nnames=[]
  145. angle2=angle_all_2D(data2, names[0], names[1], names[2])
  146. nnames.append(names[0]+'_'+names[1]+'_'+names[2])
  147. sum=0
  148. for i in range(1, len(names)):
  149. for j in range(i+1, len(names)):
  150. for k in range(i+2, len(names)):
  151. if i!=j!=k:
  152. angle2=np.c_[angle2,angle_all_2D(data2, names[i], names[j], names[k])]
  153. nnames.append(names[i]+'_'+names[j]+'_'+names[k])
  154. sum+=1
  155. print(sum, ' of 2870 angles')
  156. df_angle2=pd.DataFrame(angle2)
  157. df_angle2.columns=nnames
  158. df_angle2=pd.concat([df_angle2], keys=['angle'], axis=1, names=['coords'])
  159. df_angle2.columns.names=['coords', 'bodyparts']
  160. angle_vel2=np.diff(df_angle2[df_angle2.columns[0]])
  161. for i in df_angle2.columns[1:]:
  162. angle_vel2=np.c_[angle_vel2, np.diff(df_angle2[i])]
  163. angle_vel2=np.r_[angle_vel2, [[0]*angle_vel2.shape[1]]]
  164. df_angle_vel2=pd.DataFrame(angle_vel2)
  165. df_angle_vel2.columns=nnames
  166. df_angle_vel2=pd.concat([df_angle_vel2], keys=['angle_velocity'], axis=1, names=['coords'])
  167. df_angle_vel2.columns.names=['coords', 'bodyparts']
  168. angle_acc2=np.diff(df_angle_vel2[df_angle_vel2.columns[0]])
  169. for i in df_angle_vel2.columns[1:]:
  170. angle_acc2=np.c_[angle_acc2, np.diff(df_angle_vel2[i])]
  171. angle_acc2=np.r_[angle_acc2, [[0]*angle_acc2.shape[1], [0]*angle_acc2.shape[1]]]
  172. df_angle_acc2=pd.DataFrame(angle_acc2)
  173. df_angle_acc2.columns=nnames
  174. df_angle_acc2=pd.concat([df_angle_acc2], keys=['angle_acceleration'], axis=1, names=['coords'])
  175. df_angle_acc2.columns.names=['coords', 'bodyparts']
  176. nnames=[]
  177. dis2=distance_all_2D(data2, names[0], names[1])
  178. nnames.append(names[0]+'_'+names[1])
  179. sum=0
  180. for i in range(1, len(names)):
  181. for j in range(i+1, len(names)):
  182. dis2=np.c_[dis2, distance_all_2D(data2, names[i], names[j])]
  183. nnames.append(names[i]+'_'+names[j])
  184. sum+=1
  185. print(sum, 'of 231')
  186. df_dis2=pd.DataFrame(dis2)
  187. df_dis2.columns=nnames
  188. df_dis2=pd.concat([df_dis2], keys=['distance'], axis=1, names=['coords'])
  189. df_dis2.columns.names=['coords', 'bodyparts']
  190. dis_vel2=np.diff(df_dis2[df_dis2.columns[0]])
  191. for i in df_dis2.columns[1:]:
  192. dis_vel2=np.c_[dis_vel2, np.diff(df_dis2[i])]
  193. dis_vel2=np.r_[dis_vel2, [[0]*dis_vel2.shape[1]]]
  194. df_dis_vel2=pd.DataFrame(dis_vel2)
  195. df_dis_vel2.columns=nnames
  196. df_dis_vel2=pd.concat([df_dis_vel2], keys=['distance_velocity'], axis=1, names=['coords'])
  197. df_dis_vel2.columns.names=['coords', 'bodyparts']
  198. dis_acc2=np.diff(df_dis_vel2[df_dis_vel2.columns[0]])
  199. for i in df_dis_vel2.columns[1:]:
  200. dis_acc2=np.c_[dis_acc2, np.diff(df_dis_vel2[i])]
  201. dis_acc2=np.r_[dis_acc2, [[0]*dis_acc2.shape[1], [0]*dis_acc2.shape[1]]]
  202. df_dis_acc2=pd.DataFrame(dis_acc2)
  203. df_dis_acc2.columns=nnames
  204. df_dis_acc2=pd.concat([df_dis_acc2], keys=['distance_acceleration'], axis=1, names=['coords'])
  205. df_dis_acc2.columns.names=['coords', 'bodyparts']
  206. df_vel_acc2=pd.concat((df_vel2, df_acc2, df_angle2, df_angle_vel2, df_angle_acc2, df_dis2, df_dis_vel2, df_dis_acc2), axis=1)
  207. names=[]
  208. for i in range(0, len(df_vel_acc2.columns)):
  209. names.append(df_vel_acc2.columns[i][0]+'_'+df_vel_acc2.columns[i][1])
  210. df_vel_acc2.columns=names
  211. df_vel_acc2=df_vel_acc2.interpolate(method='polynomial', order=1)
  212. df_vel_acc2=df_vel_acc2.fillna(0)
  213. del df_vel2
  214. del df_acc2
  215. del acc2
  216. del vel2
  217. del angle2
  218. del df_angle2
  219. del angle_vel2
  220. del df_angle_vel2
  221. del angle_acc2
  222. del df_angle_acc2
  223. del dis2
  224. del df_dis2
  225. del dis_vel2
  226. del df_dis_vel2
  227. del dis_acc2
  228. del df_dis_acc2
  229. if likeli1[0]>likeli2[0]:
  230. df_big=df_vel_acc1.iloc[[0]]
  231. else:
  232. df_big=df_vel_acc2.iloc[[0]]
  233. for i in range(1, len(likeli1)):
  234. if (i<df_vel_acc1.shape[0]-1) and (i<df_vel_acc2.shape[0]-1):
  235. if likeli1[i]>likeli2[i]:
  236. df_big=np.r_[df_big, df_vel_acc1.iloc[[i]]]
  237. elif likeli2[i]>likeli1[i]:
  238. df_big=np.r_[df_big, df_vel_acc2.iloc[[i]]]
  239. else:
  240. df_big=np.r_[df_big, df_vel_acc1.iloc[[i]]]
  241. df_big=pd.DataFrame(df_big)
  242. df_big.columns=df_vel_acc1.columns
  243. vid1=[]
  244. for j in [mr_cyl_touch_1, mr_cyl_touch_2, mr_cyl_touch_3]:
  245. df=j.fillna(0)
  246. for i in df.loc[z, :]:
  247. if i>0:
  248. vid1.append(i)
  249. y_touch=np.zeros(shape=(len(df_big),))
  250. for i in vid1:
  251. y_touch[int(i)]=1
  252. if np.count_nonzero(y_touch)>=6:
  253. over = SMOTE(sampling_strategy=0.1)
  254. under = RandomUnderSampler(sampling_strategy=0.2)
  255. steps = [('o', over), ('u', under)]
  256. pipeline = Pipeline(steps=steps)
  257. X_touch, y_touch = pipeline.fit_resample(df_big, y_touch)
  258. acc_score_touch = 0
  259. for i in range(100):
  260. if (i+1) % 10 == 0:
  261. print("iterations: {}/100".format(i+1))
  262. X_touch_train, X_touch_test, y_touch_train, y_touch_test = train_test_split(X_touch, y_touch)
  263. sc = StandardScaler()
  264. X_touch_train = sc.fit_transform(X_touch_train)
  265. X_touch_test = sc.transform(X_touch_test)
  266. rfc_touch = RandomForestClassifier(bootstrap=True).fit(X_touch_train, y_touch_train)
  267. pred_rfc_touch = rfc_touch.predict(X_touch_test)
  268. if accuracy_score(y_touch_test, pred_rfc_touch) > acc_score_touch:
  269. best_model_touch = rfc_touch
  270. acc_score_touch = accuracy_score(y_touch_test, pred_rfc_touch)
  271. result_touch = permutation_importance(best_model_touch, X_touch_test, y_touch_test, n_repeats=10, random_state=42)
  272. vid2=[]
  273. for j in [mr_cyl_drag_1, mr_cyl_drag_2, mr_cyl_drag_3]:
  274. df=j.fillna(0)
  275. for i in df.loc[z, :]:
  276. if i>0:
  277. vid2.append(i)
  278. y_drag=np.zeros(shape=(len(df_big),))
  279. for i in vid2:
  280. y_drag[int(i)]=1
  281. if np.count_nonzero(y_drag)>=6:
  282. over = SMOTE(sampling_strategy=0.1)
  283. under = RandomUnderSampler(sampling_strategy=0.2)
  284. steps = [('o', over), ('u', under)]
  285. pipeline = Pipeline(steps=steps)
  286. X_drag, y_drag = pipeline.fit_resample(df_big, y_drag)
  287. acc_score_drag = 0
  288. for i in range(100):
  289. if (i+1) % 10 == 0:
  290. print("iterations: {}/100".format(i+1))
  291. X_drag_train, X_drag_test, y_drag_train, y_drag_test = train_test_split(X_drag, y_drag)
  292. sc = StandardScaler()
  293. X_drag_train = sc.fit_transform(X_drag_train)
  294. X_drag_test = sc.transform(X_drag_test)
  295. rfc_drag = RandomForestClassifier(bootstrap=True).fit(X_drag_train, y_drag_train)
  296. pred_rfc_drag = rfc_drag.predict(X_drag_test)
  297. if accuracy_score(y_drag_test, pred_rfc_drag) > acc_score_drag:
  298. best_model_drag = rfc_drag
  299. acc_score_drag = accuracy_score(y_drag_test, pred_rfc_drag)
  300. result_drag = permutation_importance(best_model_drag, X_drag_test, y_drag_test, n_repeats=10, random_state=42)
  301. dill.dump_session('CY_2D_'+z+'_data.pkl')
  302. print(z+' ready!')