QC.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
  1. #%%
  2. """
  3. Version 1.0
  4. Name: Aref Kalantari
  5. Email: aref.kalantari-sarcheshmeh@uk-koeln.de
  6. Date: 24.08.2021 - 02.03.2022
  7. -----------------------------
  8. Code Describtion: Quality Control Toolbox. Every tool (function) needed can be found here and be modified.
  9. -----------------------------
  10. Lab: AG Neuroimaging and neuroengineering of experimental stroke
  11. Supervisor: Dr. rer. nat. Markus Aswendt (markus.aswendt@uk-koeln.de)
  12. """
  13. #%% Loading nececcery libraries
  14. from sklearn.covariance import EllipticEnvelope
  15. from sklearn.ensemble import IsolationForest
  16. from sklearn.neighbors import LocalOutlierFactor
  17. from sklearn.svm import OneClassSVM
  18. import numpy as np
  19. import os
  20. import pandas as pd
  21. import glob
  22. import matplotlib.patches as mpatches
  23. import time
  24. import matplotlib.pyplot as plt
  25. from scipy import ndimage
  26. from scipy import signal
  27. import changSNR as ch
  28. from matplotlib.ticker import MaxNLocator
  29. from matplotlib import font_manager as fm
  30. from matplotlib.font_manager import FontProperties
  31. #%% Tic Toc Timer
  32. def TicTocGenerator():
  33. # Generator that returns time differences
  34. ti = 0 # initial time
  35. tf = time.time() # final time
  36. while True:
  37. ti = tf
  38. tf = time.time()
  39. yield tf-ti # returns the time difference
  40. TicToc = TicTocGenerator() # create an instance of the TicTocGen generator
  41. # This will be the main function through which we define both tic() and toc()
  42. def toc(tempBool=True):
  43. # Prints the time difference yielded by generator instance TicToc
  44. tempTimeInterval = next(TicToc)
  45. if tempBool:
  46. print( "Elapsed time: %f seconds.\n" %tempTimeInterval )
  47. def tic():
  48. # Records a time in TicToc, marks the beginning of a time interval
  49. toc(False)
  50. #%% Goasing
  51. def GoastCheck(input_file):
  52. #input_file= nib.load(tf)
  53. img = input_file
  54. img_data = img.get_fdata()
  55. img_shape = np.shape(img_data)
  56. MI_vec = []
  57. n = 1
  58. Mmos = []
  59. while (img_shape[1]%(2**n)) == 0:
  60. Mmos.append(img_shape[1]/2**n)
  61. n = n+1
  62. Mmos = np.asarray(Mmos)
  63. if len(img_shape)>3:
  64. img_data = np.mean(img_data,axis=-1)
  65. Im_ref = img_data[:,:,int(img_shape[2]/2)]
  66. for ii in range(0,int(img_shape[1])):
  67. Im_rol = np.roll(Im_ref,ii)
  68. MI_vec.append(mutualInfo(Im_rol,Im_ref))
  69. peaks_strong, prop = signal.find_peaks(MI_vec, height = 0.25*max(MI_vec))
  70. peaks_weak, prop = signal.find_peaks(MI_vec)
  71. StrongGoast = np.sum(np.isin(peaks_strong,Mmos))
  72. WeekGoast = np.sum(np.isin(peaks_weak,Mmos))
  73. if WeekGoast > 2 or StrongGoast > 0:
  74. GMetric = True
  75. else:
  76. GMetric = False
  77. #plt.plot((MI_vec))
  78. #plt.show()
  79. return GMetric
  80. #%% Res function
  81. def Image_Selection(input_file):
  82. img = input_file
  83. img_data=img.get_fdata()
  84. img_shape=img_data.shape
  85. middle=int(img_shape[2]/2)
  86. if len(img_shape) == 3:
  87. selected_img= img_data[:, :,middle]
  88. elif len(img_shape) > 3:
  89. time_middle=int(img_shape[3]/2)
  90. selected_img= img_data[:, :,middle,time_middle]
  91. selected_img= np.rot90(selected_img,k=-1)
  92. return selected_img
  93. #%%
  94. def ResCalculator(input_file):
  95. HDR = input_file.header
  96. Spati_Res = HDR['pixdim'][1:4]
  97. return Spati_Res
  98. #%% SNR function
  99. def snrCalclualtor_chang(input_file):
  100. imgData = input_file
  101. IM = np.asanyarray(imgData.dataobj)
  102. imgData = np.squeeze(np.ndarray.astype(IM, 'float64'))
  103. mm = imgData.mean()
  104. if mm == 0:
  105. snrCh = np.nan
  106. return snrCh
  107. """
  108. cc = 0
  109. while mm < 1:
  110. mm = mm *10
  111. cc = cc+1
  112. imgData = imgData * (10**cc)
  113. """
  114. Sone = len(imgData.shape)
  115. if Sone < 3:
  116. snrCh = np.nan
  117. return snrCh
  118. snr_chang_slice_vec = []
  119. ns = imgData.shape[2] # Number of slices
  120. n_dir = imgData.shape[-1] # Number of directions if dti dataset
  121. if len(imgData.shape) > 3:
  122. if n_dir < 10 :
  123. fff = 0
  124. print()
  125. print("Warning: Be aware that the size of the 4th dimension (difusion direction or timepoints) is less than 10. This might result in unstable values")
  126. print()
  127. else:
  128. fff = 5
  129. nd = imgData.ndim
  130. if ns > 4:
  131. ns_lower = int(np.floor(ns/2) - 2)
  132. ns_upper = int(np.floor(ns/2) + 2)
  133. else:
  134. ns_lower=0
  135. ns_upper=ns
  136. #print('/NewData/',end=" ")
  137. #for slc in range(ns_lower,ns_upper):
  138. for slc in range(ns_lower,ns_upper):
  139. # Print % of progress
  140. #print('S' + str(slc + 1), end=",")
  141. # Decision if the input data is DTI type or T2w
  142. if nd == 3:
  143. Slice = imgData[:, :, slc]
  144. try:
  145. curSnrCHMap, estStdChang, estStdChangNorm = ch.calcSNR(Slice, 0, 1)
  146. except ValueError:
  147. estStdChang = np.nan
  148. snr_chang_slice = 20 * np.log10(np.mean(Slice)/estStdChang)
  149. snr_chang_slice_vec.append(snr_chang_slice)
  150. else:
  151. for bb in range(fff,n_dir-1):
  152. Slice = imgData[:, :,slc,bb]
  153. try:
  154. curSnrCHMap, estStdChang, estStdChangNorm = ch.calcSNR(Slice, 0, 1)
  155. except ValueError:
  156. estStdChang = np.nan
  157. snr_chang_slice = 20 * np.log10(np.mean(Slice)/estStdChang)
  158. snr_chang_slice_vec.append(snr_chang_slice)
  159. snr_chang_slice_vec = np.array(snr_chang_slice_vec)
  160. snrCh = np.mean(snr_chang_slice_vec[~np.isinf(snr_chang_slice_vec) * ~np.isnan(snr_chang_slice_vec)])
  161. return snrCh
  162. #%% SNR function 2
  163. def snrCalclualtor_normal(input_file):
  164. IM = np.asanyarray(input_file.dataobj)
  165. imgData = np.squeeze(np.ndarray.astype(IM, 'float64'))
  166. Sone = len(imgData.shape)
  167. if Sone < 3:
  168. imgData = np.tile(imgData[:, :, np.newaxis], (1, 1, 10))
  169. Data = imgData
  170. S = np.shape(np.squeeze(Data))
  171. #print(S)
  172. if len(S) == 3:
  173. imgData = np.squeeze(Data)
  174. if len(S) == 4:
  175. imgData = np.squeeze(Data[:,:,:,0]) #int((S[-1]/2))
  176. S = np.shape(np.squeeze(imgData))
  177. #local thresholding
  178. #imgData_new = np.zeros(S[0:3]);
  179. # =============================================================================
  180. # for ii in range(0,S[2]):
  181. # temp_image = imgData[:,:,ii]
  182. # global_thresh = threshold_isodata(temp_image)
  183. # binary_global = temp_image > global_thresh
  184. # imgData_new[:,:,ii] = binary_global
  185. # =============================================================================
  186. COM=[int(i) for i in (ndimage.measurements.center_of_mass(imgData))]
  187. r = np.floor(0.10*(np.mean(S)))
  188. if r > S[2]:
  189. r = S[2]
  190. Mask = sphere(S, int(r) , COM)
  191. Singal = np.mean(imgData[Mask])
  192. x = int(np.ceil(S[0]*0.15))
  193. y = int(np.ceil(S[1]*0.15))
  194. z = int(np.ceil(S[2]*0.15))
  195. MaskN = np.zeros(S[0:3]);
  196. MaskN[:x,:y,:z] = 2
  197. MaskN[:x,-y:,:z] = 2
  198. MaskN[-x:,:y,:z] = 2
  199. MaskN[-x:,-y:,:z] = 2
  200. MaskN[:x,:y,-z:] = 2
  201. MaskN[:x,-y:,-z:] = 2
  202. MaskN[-x:,:y,-z:] = 2
  203. MaskN[-x:,-y:,-z:] = 2
  204. n1 = np.squeeze(imgData[:x,:y,:z])
  205. n2 = np.squeeze(imgData[:x,-y:,:z])
  206. n3 = np.squeeze(imgData[-x:,:y,:z])
  207. n4 = np.squeeze(imgData[-x:,-y:,:z])
  208. n5 = np.squeeze(imgData[:x,:y,-z:])
  209. n6 = np.squeeze(imgData[:x,-y:,-z:])
  210. n7 = np.squeeze(imgData[-x:,:y,-z:])
  211. n8 = np.squeeze(imgData[-x:,-y:,-z:])
  212. Noise_std = np.std(np.array([n1,n2,n3,n4,n5,n6,n7,n8]))
  213. #show_slices([n8[:,:,3],np.squeeze(imgData[:,:,3])])
  214. #plt.show()
  215. SNR = 20 * np.log10(Singal/Noise_std)
  216. if np.isinf(SNR):
  217. SNR = np.nan
  218. print("Impossible: Infinite values were the result of SNR")
  219. print("Possible reason: already ROI extracted/preprocessed data with zeros around the ROI. S/0=inf'")
  220. print("for continuity, inf is replaced with NaN ...")
  221. return SNR
  222. def show_slices(slices):
  223. """ Function to display row of image slices """
  224. fig, axes = plt.subplots(1, len(slices))
  225. for i, Slice in enumerate(slices):
  226. axes[i].imshow(Slice.T, cmap="gray", origin="lower")
  227. def sphere(shape, radius, position):
  228. """Generate an n-dimensional spherical mask."""
  229. # assume shape and position have the same length and contain ints
  230. # the units are pixels / voxels (px for short)
  231. # radius is a int or float in px
  232. assert len(position) == len(shape)
  233. n = len(shape)
  234. semisizes = (radius,) * len(shape)
  235. # genereate the grid for the support points
  236. # centered at the position indicated by position
  237. grid = [slice(-x0, dim - x0) for x0, dim in zip(position, shape)]
  238. position = np.ogrid[grid]
  239. # calculate the distance of all points from `position` center
  240. # scaled by the radius
  241. arr = np.zeros(shape, dtype=float)
  242. for x_i, semisize in zip(position, semisizes):
  243. # this can be generalized for exponent != 2
  244. # in which case `(x_i / semisize)`
  245. # would become `np.abs(x_i / semisize)`
  246. arr += (x_i / semisize) ** 2
  247. # the inner part of the sphere will have distance below or equal to 1
  248. return arr <= 1.0
  249. #%% TSNR function
  250. def TsnrCalclualtor(input_file):
  251. imgData = input_file
  252. IM = np.asanyarray(imgData.dataobj)
  253. S=IM.shape
  254. if len(S) == 3:
  255. IM = IM.reshape((S[0],S[1],1,S[2]))
  256. imgData = np.ndarray.astype(IM, 'float64')
  257. if IM.shape[-1] < 10:
  258. fff = 0
  259. else:
  260. fff = 10
  261. signal_averge_over_time = imgData[:,:,:,fff:].mean(axis=-1)
  262. signal_std_over_time = imgData[:,:,:,fff:].std(axis=-1)
  263. tSNR_map = 20 * np.log10(signal_averge_over_time/signal_std_over_time)
  264. S = np.shape(IM)
  265. #local thresholding
  266. #imgData_new = np.zeros(S[0:3])
  267. imgData_average = np.mean(imgData,axis=-1)
  268. # =============================================================================
  269. # for ii in range(0,S[2]):
  270. # temp_image = imgData_average[:,:,ii]
  271. # global_thresh = threshold_isodata(temp_image)
  272. # binary_global = temp_image > global_thresh
  273. # imgData_new[:,:,ii] = binary_global
  274. #
  275. # =============================================================================
  276. COM=[int(i) for i in (ndimage.measurements.center_of_mass(imgData_average))]
  277. r = np.floor(0.10*(np.mean([S[0:2]])))
  278. Mask = sphere(S[0:3], int(r) , COM)
  279. tSNR = np.mean(tSNR_map[Mask])
  280. return tSNR
  281. #%% Calculating Mutual Information: based on https://matthew-brett.github.io/teaching/mutual_information.html
  282. def mutualInfo(Im1,Im2):
  283. t1_slice = Im1
  284. t2_slice = Im2
  285. hist_2d, x_edges, y_edges = np.histogram2d(t1_slice.ravel(),t2_slice.ravel(),bins=20)
  286. hist_2d_log = np.zeros(hist_2d.shape)
  287. non_zeros = hist_2d != 0
  288. hist_2d_log[non_zeros] = np.log(hist_2d[non_zeros])
  289. pxy = hist_2d / float(np.sum(hist_2d))
  290. px = np.sum(pxy, axis=1) # marginal for x over y
  291. py = np.sum(pxy, axis=0) # marginal for y over x
  292. px_py = px[:, None] * py[None, :] # Broadcast to multiply marginals
  293. # Now we can do the calculation using the pxy, px_py 2D arrays
  294. nzs = pxy > 0 # Only non-zero pxy values contribute to the sum
  295. MI = np.sum(pxy[nzs] * np.log(pxy[nzs] / px_py[nzs]))
  296. return MI
  297. #%% Motion detection of rsFRI function (based on mutual information)
  298. def Ismotion(input_file):
  299. GMV=[]
  300. imgData = input_file
  301. IM = np.asanyarray(imgData.dataobj)
  302. S = IM.shape
  303. if len(S) == 3:
  304. IM = IM.reshape((S[0],S[1],1,S[2]))
  305. if IM.shape[-1] < 11 :
  306. fff = 0
  307. else:
  308. fff = 10
  309. imgData = np.ndarray.astype(IM[:,:,:,fff:], 'float64')
  310. S = np.shape(imgData)
  311. temp_mean = imgData.mean(axis=(0,1,3))
  312. temp_max = temp_mean.argmax()
  313. temp_Data = imgData[:,:,temp_max,:]
  314. Im_fix = temp_Data[:,:,0]
  315. Im_rot = temp_Data
  316. MI_all = []
  317. for z in range(1,S[-1]):
  318. MI = mutualInfo(Im_fix,Im_rot[:,:,z])
  319. MI_all.append(MI)
  320. Final = np.asarray(MI_all)
  321. Max_mov_between = str([Final.argmin()+10,Final.argmax()+10])
  322. GMV = getrange(Final)
  323. LMV = np.std(Final)
  324. return Final,Max_mov_between,GMV,LMV
  325. #%% Getting range
  326. def getrange(numbers):
  327. return max(numbers) - min(numbers)
  328. #%% Plotting QC Histogram and etc.
  329. def QCPlot(Path):
  330. saving_path = (Path)
  331. QC_fig_path = os.path.join( (Path) , "QCfigures")
  332. if not os.path.isdir(QC_fig_path):
  333. os.mkdir(QC_fig_path)
  334. Abook = []
  335. Names =[]
  336. for file in glob.glob(os.path.join(Path, '*caculated_features*.csv')) :
  337. if "diff" in file:
  338. dti_path= file
  339. dti_features= pd.read_csv(dti_path)
  340. Abook.append(dti_features)
  341. Names.append("diff")
  342. elif "func" in file:
  343. fmri_path= file
  344. fmri_features= pd.read_csv(fmri_path)
  345. Abook.append(fmri_features)
  346. Names.append("func")
  347. elif "anat" in file:
  348. t2w_path= file
  349. t2w_features= pd.read_csv(t2w_path)
  350. Abook.append(t2w_features)
  351. Names.append("anat")
  352. ST = []
  353. COE = []
  354. AvV = []
  355. V = []
  356. Pathes = []
  357. Med = []
  358. MaX = []
  359. MiN= []
  360. hh = 1
  361. rr = 1
  362. # Set font properties
  363. title_font = {'family': 'serif', 'fontname': 'Times New Roman', 'weight': 'bold', 'size': 10}
  364. label_font = {'family': 'serif', 'fontname': 'Times New Roman', 'weight': 'normal', 'size': 8}
  365. tick_font = {'family': 'serif', 'fontname': 'Times New Roman', 'weight': 'normal', 'size': 8}
  366. for nn, N in enumerate(Names):
  367. COL = list(Abook[nn].columns)
  368. COL.pop(0)
  369. D = Abook[nn]
  370. for cc, C in enumerate(COL):
  371. Data = list(D[C])
  372. if C == 'SNR Chang' or C == 'tSNR (Averaged Brain ROI)' or C == 'SNR Normal' or C == 'Displacement factor (std of Mutual information)':
  373. # Plot histogram
  374. cm = 1/2.54 # centimeters in inches
  375. plt.figure(hh, figsize=(9, 5), dpi=300)
  376. ax2 = plt.subplot(1, 1, 1, label="histogram")
  377. for dd, DD in enumerate(Data): # If tSNR and SNR chang are also adjusted, this section can be eliminated
  378. if DD == np.inf:
  379. Data[dd] = np.nan
  380. q75, q25 = np.nanpercentile(Data, [75, 25])
  381. iqr = q75 - q25
  382. B = round((np.nanmax(Data) - np.nanmin(Data)) / (2 * iqr / (len(Data) ** (1/3))))
  383. if B * 5 > 22:
  384. XX = 22
  385. else:
  386. XX = B * 5
  387. y, x, bars = plt.hist(Data, bins=B * 7, histtype='bar', edgecolor='white')
  388. plt.xlabel(N + ': ' + C + ' [a.u.]', fontdict=label_font)
  389. plt.ylabel("Frequency", fontdict=label_font)
  390. ax2.spines['right'].set_visible(False)
  391. ax2.spines['top'].set_visible(False)
  392. plt.locator_params(axis='x', nbins=XX)
  393. ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
  394. # Calculate interquartile range of values in the 'points' column
  395. if C == 'Displacement factor (std of Mutual information)':
  396. ll = q75 + 1.5 * iqr
  397. plt.text(1.07 * ll, 2 * max(y) / 3, 'Q3 + 1.5*IQ', color='grey', fontdict=label_font)
  398. for b, bar in enumerate(bars):
  399. if bar.get_x() > ll:
  400. bar.set_facecolor("red")
  401. else:
  402. ll = q25 - 1.5 * iqr
  403. plt.text(1.001 * ll, 2 * max(y) / 3, 'Q1 - 1.5*IQ', color='grey', fontdict=label_font)
  404. for b, bar in enumerate(bars):
  405. if bar.get_x() < ll:
  406. bar.set_facecolor("red")
  407. plt.axvline(ll, color='grey', linestyle='--')
  408. plt.suptitle(N + ': ' + C, fontdict=title_font)
  409. red_patch = mpatches.Patch(color='red', label='Discard')
  410. blue_patch = mpatches.Patch(color='tab:blue', label='Keep')
  411. # Modify the legend with smaller font size and Times New Roman font
  412. legend = plt.legend(handles=[blue_patch, red_patch], fontsize=8)
  413. #legend.get_frame().set_linewidth(0.0) # Remove legend border
  414. # Set Times New Roman font for legend text
  415. # Set Times New Roman font for legend text
  416. for text in legend.get_texts():
  417. text.set_fontfamily('serif')
  418. text.set_fontsize(8)
  419. # Set the font for axis ticks
  420. ax2.xaxis.set_tick_params(labelsize=8)
  421. ax2.yaxis.set_tick_params(labelsize=8)
  422. plt.savefig(os.path.join(QC_fig_path, C + N + ".png"), dpi=300)
  423. plt.close()
  424. hh = hh + 1
  425. plt.figure(hh, figsize=(9, 5), dpi=300)
  426. for nn, N in enumerate(Names):
  427. COL = list(Abook[nn].columns)
  428. COL.pop(0)
  429. D = Abook[nn]
  430. for cc, C in enumerate(COL):
  431. Data = list(D[C])
  432. if C == 'SpatRx' or C == 'SpatRy' or C == 'Slicethick':
  433. # Plot pie plots
  434. labels = list(set(Data))
  435. sizes = [Data.count(l) for l in labels]
  436. labels = list(np.round(labels, 3))
  437. labels2 = [str(l) + ' mm' for l in labels]
  438. ax1 = plt.subplot(len(Names), 3, rr)
  439. ax1.pie(sizes, labels=labels2, autopct='%1.0f%%', startangle=180)
  440. ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
  441. ax1.set_title(N + ':' + C, fontdict=title_font)
  442. plt.suptitle('Resolution homogeneity between data', weight="bold")
  443. # Set the font for axis ticks
  444. ax1.xaxis.set_tick_params(labelsize=8)
  445. ax1.yaxis.set_tick_params(labelsize=8)
  446. rr = rr + 1
  447. plt.savefig(os.path.join(QC_fig_path, "Spatial_Resolution.png"), dpi=300)
  448. plt.close()
  449. #%%
  450. # machine learning methods
  451. def ML(Path, format_type) :
  452. result=[]
  453. for N, csv in enumerate(glob.glob(os.path.join(Path, '*_features_*.csv'))):
  454. csv_path = csv
  455. csv_path=os.path.join(Path,csv)
  456. Abook= pd.read_csv(csv_path)
  457. if np.any(Abook.isnull().all()[:]):
  458. print("The following csv file contains NaN values for one or more of its features:")
  459. print(csv_path)
  460. print("Voting can not be conducted.")
  461. print("Analyzing next sequence...")
  462. continue
  463. Abook= Abook.dropna(how='all',axis='columns')
  464. Abook= Abook.dropna(how='any')
  465. address= [i for i in Abook.iloc[:,1]]
  466. if format_type == "raw":
  467. sequence_name = [i for i in Abook.iloc[:,2]]
  468. img_name = [i for i in Abook.iloc[:,3]]
  469. X = Abook.iloc[:,7:]
  470. elif format_type == "nifti":
  471. img_name = [i for i in Abook.iloc[:,2]]
  472. X = Abook.iloc[:,6:]
  473. #X=preprocessing.normalize(X)
  474. ############## Fit the One-Class SVM
  475. nu = 0.05
  476. gamma = 2.0
  477. clf = OneClassSVM(gamma="auto", kernel="poly", nu=nu,shrinking=False).fit(X)
  478. svm_pre =clf.predict(X)
  479. ############## EllipticEnvelope
  480. elpenv = EllipticEnvelope(contamination=0.025, random_state=1)
  481. ell_pred = elpenv.fit_predict(X)
  482. ############## IsolationForest
  483. iforest = IsolationForest(n_estimators=100, max_samples='auto',
  484. contamination=0.05, max_features=1.0,
  485. bootstrap=False, n_jobs=-1, random_state=1)
  486. iso_pred = iforest.fit_predict(X)
  487. ############## LocalOutlierFactor
  488. lof = LocalOutlierFactor(n_neighbors=20, algorithm='auto',
  489. metric='minkowski', contamination=0.04,
  490. novelty=False, n_jobs=-1)
  491. local_pred = lof.fit_predict(X)
  492. ############## saving result
  493. algorythms=[svm_pre,ell_pred,iso_pred,local_pred]
  494. result.append(algorythms)
  495. result[N]= np.dstack((result[N][0], result[N][1],result[N][2],result[N][3]))
  496. result[N]= result[N][0]
  497. result[N]= pd.DataFrame(result[N], columns = ['One_class_SVM',' EllipticEnvelope','IsolationForest',"LocalOutlierFactor"])
  498. if "diff" in csv:
  499. dti=["diff"]*len(result[N])
  500. result[N]["sequence_type"] = dti
  501. elif "func" in csv:
  502. fmri=["func"]*len(result[N])
  503. result[N]["sequence_type"] = fmri
  504. elif "anat" in csv :
  505. t2w=["anat"]*len(result[N])
  506. result[N]["sequence_type"] = t2w
  507. result[N]["Pathes"] = address
  508. if format_type == "raw":
  509. result[N]["sequence_name"] = sequence_name
  510. result[N]["corresponding_img"] = img_name
  511. return(result)
  512. #%% Adjusting the existing feature table by adding a new sheet to it with the data that need to be discarded
  513. def QCtable(Path, format_type):
  514. ML_algorythms= ML(Path, format_type)
  515. ML_algorythms=pd.concat(ML_algorythms)
  516. ML_algorythms[['One_class_SVM',' EllipticEnvelope','IsolationForest',"LocalOutlierFactor"]]=ML_algorythms[['One_class_SVM',' EllipticEnvelope','IsolationForest',"LocalOutlierFactor"]]==-1
  517. Abook = []
  518. Names =[]
  519. for file in glob.glob(os.path.join(Path, '*caculated_features*.csv')) :
  520. if "diff" in file:
  521. dti_path= file
  522. dti_features= pd.read_csv(dti_path)
  523. Abook.append(dti_features)
  524. Names.append("diff")
  525. elif "func" in file :
  526. fmri_path= file
  527. fmri_features= pd.read_csv(fmri_path)
  528. Abook.append(fmri_features)
  529. Names.append("func")
  530. elif "anat" in file :
  531. t2w_path= file
  532. t2w_features= pd.read_csv(t2w_path)
  533. Abook.append(t2w_features)
  534. Names.append("anat")
  535. ST = []
  536. COE = []
  537. AvV = []
  538. V = []
  539. Pathes = []
  540. Med = []
  541. MaX = []
  542. MiN= []
  543. for nn,N in enumerate(Names):
  544. d= Abook[nn]
  545. COL = Abook[nn].columns
  546. for cc,C in enumerate(COL):
  547. D = d[C]
  548. if C == 'SNR Chang' or C == 'tSNR (Averaged Brain ROI)' or C =='SNR Normal':
  549. for dd,DD in enumerate(D):
  550. if DD == np.inf:
  551. D[dd] = np.nan
  552. q75, q25 = np.nanpercentile(D, [75 ,25])
  553. iqr = q75 - q25
  554. ll = q25-1.5*iqr #lower limit
  555. Index = D<ll
  556. P = d[COL[1]][Index]
  557. M = D.mean()
  558. Me = D.median()
  559. Mi = D.min()
  560. Ma = D.max()
  561. Pathes.extend(P)
  562. ST.extend([N]*len(P))
  563. COE.extend([C]*len(P))
  564. AvV.extend([M]*len(P))
  565. V.extend(D[Index])
  566. Med.extend([Me]*len(P))
  567. MiN.extend([Mi]*len(P))
  568. MaX.extend([Ma]*len(P))
  569. if C == 'Displacement factor (std of Mutual information)':
  570. q75, q25 = np.nanpercentile(D, [75 ,25])
  571. iqr = q75 - q25
  572. ul = q75+1.5*iqr #upper limit
  573. Index = D>ul
  574. P = d[COL[1]][Index]
  575. M = D.mean()
  576. Me = D.median()
  577. Mi = D.min()
  578. Ma = D.max()
  579. Pathes.extend(P)
  580. ST.extend([N]*len(P))
  581. COE.extend([C]*len(P))
  582. AvV.extend([M]*len(P))
  583. V.extend(D[Index])
  584. Med.extend([Me]*len(P))
  585. MiN.extend([Mi]*len(P))
  586. MaX.extend([Ma]*len(P))
  587. if N == 'ErrorData':
  588. Pathes.extend(D)
  589. S = 'Faulty Data'
  590. ST.extend([S]*len(D))
  591. COE.extend(['-']*len(D))
  592. AvV.extend(['-']*len(D))
  593. V.extend('-'*len(D))
  594. Med.extend('-'*len(D))
  595. MiN.extend('-'*len(D))
  596. MaX.extend('-'*len(D))
  597. #prepare outliers
  598. statiscal=[True if path in Pathes else False for path in ML_algorythms["Pathes"] ]
  599. ML_algorythms["statistical_method"]= statiscal
  600. ML_number=list(ML_algorythms[["One_class_SVM" ,'IsolationForest',"LocalOutlierFactor",' EllipticEnvelope',"statistical_method"]].sum(axis=1))
  601. if format_type == "raw":
  602. ML_algorythms= ML_algorythms[["Pathes","sequence_name", "corresponding_img","sequence_type","One_class_SVM" ,'IsolationForest',"LocalOutlierFactor",' EllipticEnvelope',"statistical_method"]]
  603. elif format_type == "nifti":
  604. ML_algorythms= ML_algorythms[["Pathes","corresponding_img","sequence_type","One_class_SVM" ,'IsolationForest',"LocalOutlierFactor",' EllipticEnvelope',"statistical_method"]]
  605. ML_algorythms["Voting outliers (from 5)"]= ML_number
  606. ML_algorythms= ML_algorythms[ML_algorythms["Voting outliers (from 5)"]>=1]
  607. final_result = os.path.join(Path,"votings.csv")
  608. ML_algorythms.to_csv( final_result)
  609. #%%
  610. #%% For Questions please Contact: aref.kalantari-sarcheshmeh@uk-koeln.de