online_microstate_sequence_analysis.py 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133
  1. '''
  2. For microstate sequence analysis after getting the sequences for each trial through fit-back process (microstate_to_sequence.py)
  3. Designed for analyzing Flight Simulator data collected at CTA, Montreal in November 2021
  4. Adapted from Jia's algorithm
  5. Prepared by Mengting Zhao, on Sep 16 2024
  6. '''
  7. import numpy as np
  8. import copy
  9. from scipy.stats import chi2, chi2_contingency, entropy, t, sem, norm
  10. from scipy import signal
  11. from utilis import load_data, read_subject_info, write_info, read_xlsx, to_string
  12. from multiprocessing import Pool
  13. import codecs, json
  14. import itertools
  15. import matplotlib.pyplot as plt
  16. import matplotlib.ticker as ticker
  17. from itertools import combinations
  18. from matplotlib.ticker import ScalarFormatter
  19. from sklearn.preprocessing import normalize
  20. import string
  21. import matplotlib.gridspec as gridspec
  22. from scipy import stats
  23. from statsmodels.stats.multitest import multipletests
  24. from math_utilis import nCr
  25. import mne
  26. from utilis import create_info
  27. from scipy.io import loadmat, savemat
  28. import os
  29. from collections import OrderedDict
  30. from os.path import join as pjoin
  31. import math
  32. import statsmodels.stats.multitest as smt
  33. def ceil_decimal(num, n):
  34. q = len(num)
  35. pvalues = []
  36. for i in range(q):
  37. p = math.ceil(num[i]*10**n)/(10**n)
  38. pvalues.append(p)
  39. return pvalues
  40. def pval_corrected(pvals, method='bonferroni'):
  41. return smt.multipletests(pvals, method=method)[1]
  42. class MicrostateParameter:
  43. def __init__(self, sequence, n_microstate):
  44. self.sequence = sequence
  45. self.n_microstate = n_microstate
  46. self.n_sequence = len(sequence)
  47. def calculate_duration(self, window=None):
  48. def duration(sequence):
  49. res = []
  50. label = sequence[0]
  51. count = 1
  52. res_temp = {}
  53. for i in range(self.n_microstate):
  54. res_temp[i] = []
  55. for j in range(1, len(sequence)):
  56. if label == sequence[j]:
  57. count += 1
  58. else:
  59. label = sequence[j]
  60. res_temp[sequence[j - 1]].append(count)
  61. count = 1
  62. for i in range(self.n_microstate):
  63. res.append(np.sum(res_temp[i]) / len(res_temp[i]) if len(res_temp[i])>0 else 1)
  64. return res
  65. if window:
  66. n = int(self.n_sequence / window)
  67. temp = []
  68. for i in range(n):
  69. res = duration(self.sequence[i*window:(i+1)*window])
  70. temp.append(res)
  71. return np.asarray(temp).mean(axis=0).tolist()
  72. else:
  73. return duration(self.sequence)
  74. def calculate_frequency(self, window):
  75. res = []
  76. res_temp = {}
  77. for i in range(self.n_microstate):
  78. res_temp[i] = []
  79. n_block = int(self.n_sequence / window)
  80. for i in range(n_block):
  81. label = self.sequence[i*window]
  82. temp = {}
  83. for j in range(i*window + 1, (i+1)*window):
  84. if label != self.sequence[j]:
  85. if label in temp:
  86. temp[label] += 1
  87. else:
  88. temp[label] = 1
  89. label = self.sequence[j]
  90. for key, value in temp.items():
  91. res_temp[key].append(value)
  92. for i in range(self.n_microstate):
  93. res.append(np.mean(res_temp[i]))
  94. return res
  95. def calculate_coverage(self):
  96. res = []
  97. for i in range(self.n_microstate):
  98. res.append(np.argwhere(np.asarray(self.sequence) == i).shape[0] / self.n_sequence)
  99. return res
  100. class MicrostateLongRangeDependence:
  101. def __init__(self, sequence, n_microstate):
  102. self.sequence = sequence
  103. self.n_microstate = n_microstate
  104. self.n_sequence = len(sequence)
  105. def partition_state(self, k):
  106. comb = combinations([i for i in range(self.n_microstate)], k)
  107. length = nCr(self.n_microstate, k)
  108. res = []
  109. for index, item in enumerate(comb):
  110. if k % 2 == 0:
  111. if index == length / 2:
  112. break
  113. else:
  114. res.append(item)
  115. else:
  116. res.append(item)
  117. return res
  118. def embed_random_walk(self, k):
  119. partitions = self.partition_state(k)
  120. np_sequence = np.asarray(self.sequence)
  121. res = {}
  122. for item in partitions:
  123. temp_x = np.ones(self.n_sequence) * -1
  124. for state in item:
  125. temp_x[np.where(np_sequence == state)[0]] = 1
  126. res[to_string(item)] = temp_x
  127. return res
  128. @staticmethod
  129. def detrend(embed_sequence, window_size):
  130. ## commented by Mengting
  131. shape = (embed_sequence.shape[0]//window_size, window_size)
  132. temp = np.lib.stride_tricks.as_strided(embed_sequence, shape)
  133. window_size_index = np.arange(window_size)
  134. res = np.zeros(shape[0])
  135. for index, y in enumerate(temp):
  136. coeff = np.polyfit(window_size_index, y, 1) # finding the trend through fitting
  137. y_hat = np.polyval(coeff, window_size_index)
  138. res[index] = np.sqrt(np.mean((y-y_hat)**2)) # subtracting the 'trend'
  139. return res
  140. @staticmethod
  141. def dfa(embed_sequence, segment_range, segment_density):
  142. ## this function is called in the dfa( ) function defined outside the Class
  143. ## commented by Mengting
  144. y = np.cumsum(embed_sequence-np.mean(embed_sequence))
  145. scales = (2**np.arange(segment_range[0], segment_range[1], segment_density)).astype(np.int)
  146. f = np.zeros(len(scales))
  147. for index, window_size in enumerate(scales):
  148. f[index] = np.sqrt(np.mean(MicrostateLongRangeDependence.detrend(y, window_size)**2))
  149. coeff = np.polyfit(np.log2(scales), np.log2(f), 1) # the core idea is to find the polyfitting coefficient
  150. return {'slope': coeff[0], 'fluctuation': f.tolist(), 'scales':scales.tolist()}
  151. @staticmethod
  152. def shanon_entropy(x, nx, ns):
  153. ## in our case, base 2 is applied
  154. p = np.zeros(ns)
  155. for t in range(nx):
  156. p[x[t]] += 1.0
  157. p /= nx
  158. return -np.sum(p[p>0]*np.log2(p[p>0]))
  159. @staticmethod
  160. def shanon_joint_entropy(x, y, nx, ny, ns):
  161. n = min(nx, ny)
  162. p = np.zeros((ns, ns))
  163. for t in range(n):
  164. p[x[t], y[t]] += 1.0
  165. p /= n
  166. return -np.sum(p[p>0]*np.log2(p[p>0]))
  167. @staticmethod
  168. def shanon_joint_entropy_k(x, nx, ns, k):
  169. # # ns = n_k
  170. p = np.zeros(tuple(k*[ns]))
  171. for t in range(nx-k):
  172. p[tuple(x[t:t+k])] += 1.0
  173. p /= (nx-k)
  174. h = -np.sum(p[p>0]*np.log2(p[p>0]))
  175. return h
  176. def mutual_information(self, lag):
  177. ### this part corresponds to the last line of Eq. (5) in Wenjun's paper
  178. lag = min(self.n_sequence, lag)
  179. res = np.zeros(lag)
  180. for time_lag in range(lag):
  181. nmax = self.n_sequence - time_lag # number of segments used for computation (moving forward with a length of 'lag')
  182. ## h respresents H(Xt)
  183. h = self.shanon_entropy(self.sequence[:nmax], nmax, self.n_microstate)
  184. ## h_lag respresents H(X(t+\tao))
  185. h_lag = self.shanon_entropy(self.sequence[time_lag:time_lag+nmax], nmax, self.n_microstate)
  186. ## h_h_lag respresents the joint entropy of the mentioned two sequence segments
  187. h_h_lag = self.shanon_joint_entropy(self.sequence[:nmax], self.sequence[time_lag:time_lag+nmax], nmax,
  188. nmax, self.n_microstate)
  189. res[time_lag] = h + h_lag - h_h_lag
  190. return res
  191. def partial_mutual_information(self, lag):
  192. p = np.zeros(lag)
  193. a = self.mutual_information(2)
  194. p[0], p[1] = a[0], a[1]
  195. for k in range(2, lag):
  196. h1 = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence,self.n_sequence,self.n_microstate,lag)
  197. h2 = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence,self.n_sequence,self.n_microstate,lag-1)
  198. h3 = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence, self.n_sequence, self.n_microstate,
  199. lag + 1)
  200. p[k] = 2*h1 -h2 - h3
  201. return p
  202. def excess_entropy_rate(self, kmax):
  203. h = np.zeros(kmax)
  204. for k in range(kmax):
  205. h[k] = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence, self.n_sequence, self.n_microstate, k+1)
  206. ks = np.arange(1, kmax+1)
  207. entropy_rate, excess_entropy = np.polyfit(ks, h, 1) # Least squares polynomial fit
  208. return entropy_rate, excess_entropy, ks
  209. @staticmethod
  210. def plot_mutual_information(data, title, subject, index, f):
  211. plt.semilogy(np.arange(0, data.shape[0]*f, f), data)
  212. plt.xlabel("time lag (1ms)")
  213. plt.ylabel("AIF (bits)")
  214. plt.title(subject+"_"+title+"_"+str(index))
  215. plt.show()
  216. # @staticmethod
  217. # def plot_mutual_information(data, title):
  218. # for i in range(len(data)):
  219. # temp = np.asarray(data[i])
  220. # plt.semilogy([j for j in range(2000)], temp[0:2000]/temp[0])
  221. # # plt.semilogy([j for j in range(len(data[i][0:2000]))], data[i][0:2000])
  222. # plt.xlabel("time lag (2ms)")
  223. # plt.ylabel("AIF (bits)")
  224. # plt.title(title)
  225. # plt.show()
  226. class MicrostateMarkov:
  227. def __init__(self, sequence, n_microstate):
  228. self.sequence = sequence
  229. self.n_microstate = n_microstate
  230. self.n_sequence = len(sequence)
  231. def transition_matrix_hat(self, k):
  232. ## When executing this function in Markov_probability, k is set to 1 & 2
  233. ## the output condition_distribution matrix is 7*7, saving the conditional probability of the Markom process
  234. # ### where row represents the current (t-th) status and column represents the (t+k)th status
  235. joint_distribution = np.zeros(self.n_microstate**k)
  236. condition_distribution = np.zeros((self.n_microstate**k, self.n_microstate)) # a 7*7 matrix for k=1
  237. sh = tuple(k*[self.n_microstate])
  238. d = {}
  239. for i, index in enumerate(np.ndindex(sh)): #for setting the dictionary 'd' as 7 lists with the first elements as 0, 2, ...6
  240. d[index] = i
  241. for t in range(self.n_sequence-k): ## counting the combinations of sequence[t] & sequence[t+k] in 'condition_distribution' matrix
  242. idx = tuple(self.sequence[t:t+k]) # for k=1, the element at sequence[t]=3 for example
  243. i = d[idx]
  244. j = self.sequence[t+k] # the element at sequence[t+k]=4 for example
  245. joint_distribution[i] += 1. ## saving the joint_distribution in a vector of 7 elements
  246. condition_distribution[i,j] += 1.
  247. joint_distribution /= joint_distribution.sum()
  248. p_row = condition_distribution.sum(axis=1, keepdims=True)
  249. p_row[p_row == 0] = 1.
  250. condition_distribution /= p_row
  251. return joint_distribution, condition_distribution
  252. @staticmethod
  253. def surrogate_markov_chain(joint_distribution, condition_distribution, k, n_microstate, n_surrogate):
  254. sh = tuple(k * [n_microstate])
  255. d = {}
  256. dinv = np.zeros((n_microstate**k, k))
  257. for i, idx in enumerate(np.ndindex(sh)):
  258. d[idx] = i
  259. dinv[i] = idx
  260. joint_distribution_sum = np.cumsum(joint_distribution)
  261. x = np.zeros(n_surrogate)
  262. x[:k] = dinv[np.min(np.argwhere(np.random.rand() < joint_distribution_sum))]
  263. condition_distribution_sum = np.cumsum(condition_distribution, axis=1)
  264. for t in range(n_surrogate-k-1):
  265. idx = tuple(x[t:t+k])
  266. i = d[idx]
  267. ## how to understand the following line??
  268. j = np.min(np.argwhere(np.random.rand() < condition_distribution_sum[i]))
  269. x[t+k] = j
  270. return x.astype('int')
  271. def test_markov(self, order):
  272. if order == 0:
  273. return self.test_markov0()
  274. n = len(self.sequence)
  275. df = np.power(self.n_microstate, 2 + order) - 2 * np.power(self.n_microstate, 1 + order) + np.power(
  276. self.n_microstate, order)
  277. temp = np.zeros(self.n_microstate)
  278. frequency = []
  279. # f_ijk..(n+2), f_ij..(n+1)*, f_*jk..(n+2), f_*jk...(n+1)*
  280. frequency.append(np.tile(temp, [self.n_microstate for i in range(order + 1)] + [1]))
  281. frequency.append(np.tile(temp, [self.n_microstate for i in range(order)] + [1]))
  282. frequency.append(np.tile(temp, [self.n_microstate for i in range(order)] + [1]))
  283. frequency.append(np.tile(temp, [self.n_microstate for i in range(order - 1)] + [1]))
  284. for t in range(n - order - 1):
  285. frequency_index = []
  286. for j in range(order + 2):
  287. frequency_index.append(self.sequence[t + j])
  288. frequency[0][tuple(frequency_index)] += 1
  289. frequency[1][tuple(frequency_index[:-1])] += 1
  290. frequency[2][tuple(frequency_index[1::])] += 1
  291. frequency[3][tuple(frequency_index[1:-1])] += 1
  292. T = 0.0
  293. for frequency_index in np.ndindex(frequency[0].shape):
  294. f = frequency[0][tuple(frequency_index)] * frequency[1][tuple(frequency_index[:-1])] * \
  295. frequency[2][tuple(frequency_index[1::])] * frequency[3][tuple(frequency_index[1:-1])]
  296. if f > 0:
  297. T += frequency[0][tuple(frequency_index)] * \
  298. np.log((frequency[0][tuple(frequency_index)] * frequency[3][tuple(frequency_index[1:-1])])
  299. / (frequency[1][tuple(frequency_index[:-1])] * frequency[2][tuple(frequency_index[1::])]))
  300. T *= 2.0
  301. p = chi2.sf(T, df)
  302. # print(T, df, p)
  303. return p
  304. def test_markov0(self):
  305. n = len(self.sequence)
  306. f_ij = np.zeros((self.n_microstate, self.n_microstate))
  307. f_i = np.zeros(self.n_microstate)
  308. f_j = np.zeros(self.n_microstate)
  309. for t in range(n - 1):
  310. i = self.sequence[t]
  311. j = self.sequence[t + 1]
  312. f_ij[i, j] += 1.0
  313. f_i[i] += 1.0
  314. f_j[i] += 1.0
  315. T = 0.0
  316. for i, j in np.ndindex(f_ij.shape):
  317. f = f_ij[i, j] * f_i[i] * f_j[j]
  318. if f > 0:
  319. T += (f_ij[i, j] * np.log((n * f_i[i] * f_j[j]) / f_i[i] * f_j[j]))
  320. T *= 2.0
  321. df = (self.n_microstate - 1) * (self.n_microstate - 1)
  322. p = chi2.sf(T, df)
  323. return p
  324. def test_conditional_homogeneity(self, block_size, order, s=None):
  325. if s is None:
  326. n = len(self.sequence)
  327. s = int(np.floor(float(n) / float(block_size)))
  328. df = (s - 1.) * (np.power(self.n_microstate, order + 1) - np.power(self.n_microstate, order))
  329. frequency = []
  330. # f_ijk..(n+2), f_ij..(n+1)*, f_*jk..(n+2), f_*jk...(n+1)*
  331. frequency.append(np.zeros(([s] + [self.n_microstate for i in range(order + 1)])))
  332. frequency.append(np.zeros(([s] + [self.n_microstate for i in range(order)])))
  333. frequency.append(np.zeros(([self.n_microstate for i in range(order + 1)])))
  334. frequency.append(np.zeros(([self.n_microstate for i in range(order)])))
  335. l_total = 0
  336. for i in range(s):
  337. if isinstance(block_size, list):
  338. l = block_size[i]
  339. else:
  340. l = block_size
  341. for j in range(l - order):
  342. frequency_index = []
  343. frequency_index.append(i)
  344. for k in range(order + 1):
  345. frequency_index.append(self.sequence[l_total + j + k])
  346. frequency[0][tuple(frequency_index)] += 1.
  347. frequency[1][tuple(frequency_index[:-1])] += 1.
  348. frequency[2][tuple(frequency_index[1::])] += 1.
  349. frequency[3][tuple(frequency_index[1:-1])] += 1.
  350. l_total += l
  351. T = 0.0
  352. for frequency_index in np.ndindex(frequency[0].shape):
  353. f = frequency[0][tuple(frequency_index)] * frequency[1][tuple(frequency_index[:-1])] * \
  354. frequency[2][tuple(frequency_index[1::])] * frequency[3][tuple(frequency_index[1:-1])]
  355. if f > 0:
  356. T += frequency[0][tuple(frequency_index)] * \
  357. np.log((frequency[0][tuple(frequency_index)] * frequency[3][tuple(frequency_index[1:-1])])
  358. / (frequency[1][tuple(frequency_index[:-1])] * frequency[2][tuple(frequency_index[1::])]))
  359. T *= 2.0
  360. p = chi2.sf(T, df, loc=0, scale=1.)
  361. return p
  362. def conditionalHomogeneityTest(self, l):
  363. n = len(self.sequence)
  364. ns = self.n_microstate
  365. X = self.sequence
  366. r = int(np.floor(float(n) / float(l))) # number of blocks
  367. nl = r * l
  368. f_ijk = np.zeros((r, ns, ns))
  369. f_ij = np.zeros((r, ns))
  370. f_jk = np.zeros((ns, ns))
  371. f_i = np.zeros(r)
  372. f_j = np.zeros(ns)
  373. # calculate f_ijk (time / block dep. transition matrix)
  374. for i in range(r): # block index
  375. for ii in range(l - 1): # pos. inside the current block
  376. j = X[i * l + ii]
  377. k = X[i * l + ii + 1]
  378. f_ijk[i, j, k] += 1.0
  379. f_ij[i, j] += 1.0
  380. f_jk[j, k] += 1.0
  381. f_i[i] += 1.0
  382. f_j[j] += 1.0
  383. # conditional homogeneity (Markovianity stationarity)
  384. T = 0.0
  385. for i, j, k in np.ndindex(f_ijk.shape):
  386. # conditional homogeneity
  387. f = f_ijk[i, j, k] * f_j[j] * f_ij[i, j] * f_jk[j, k]
  388. if (f > 0):
  389. T += (f_ijk[i, j, k] * np.log((f_ijk[i, j, k] * f_j[j]) / (f_ij[i, j] * f_jk[j, k])))
  390. T *= 2.0
  391. df = (r - 1) * (ns - 1) * ns
  392. # p = chi2test(T, df, alpha)
  393. p = chi2.sf(T, df, loc=0, scale=1)
  394. return p
  395. def paired_t_test_condition_parameter(path, savepath, sheets, n_microstates, conditions):
  396. n_conditions = len(conditions)
  397. alphabet_string = list(string.ascii_uppercase)
  398. for sheet in sheets:
  399. res = []
  400. res_correct = []
  401. data = np.asarray(read_xlsx(path, sheet))
  402. for i in range(n_microstates):
  403. data_temp = data[:,i*n_conditions:(i+1)*n_conditions]
  404. correct_p = []
  405. count = 0
  406. for comb in itertools.combinations([j for j in range(n_conditions)], 2):
  407. p = stats.ttest_rel(data_temp[:, comb[0]], data_temp[:, comb[1]])
  408. res.append([alphabet_string[i], conditions[comb[0]], conditions[comb[1]], p[0], p[1]])
  409. res_correct.append([alphabet_string[i], conditions[comb[0]], conditions[comb[1]]])
  410. correct_p.append(p[1])
  411. count += 1
  412. temp = multipletests(correct_p, method='bonferroni')[1].tolist()
  413. for j in range(len(res)-1, len(res)-count-1, -1):
  414. res_correct[j].append(temp[count-1])
  415. count -= 1
  416. write_info(savepath, sheet, res)
  417. write_info(savepath, sheet + '_' + 'bonferroni', res_correct)
  418. def paired_t_test_run_parameter(path, savepath, sheets, n_microstates, n_conditions, n_runs):
  419. for sheet in sheets:
  420. res = []
  421. res_correct = []
  422. data = np.asarray(read_xlsx(path, sheet))[:, n_microstates::]
  423. for i in range(0, n_microstates*n_conditions*n_runs, n_runs):
  424. data_temp = data[:, i*n_runs:(i+1)*n_runs]
  425. correct_p = []
  426. for comb in itertools.combinations([j for j in range(n_runs)], 2):
  427. p = stats.ttest_rel(data_temp[:, comb[0]], data_temp[:, comb[1]])
  428. res.append(p)
  429. correct_p.append(p)
  430. # res_correct.extend(multipletests(correct_p, method='bonferroni')[1])
  431. write_info(savepath, sheet, res)
  432. # write_info(savepath, sheet+'_'+'bonferroni', res_correct)
  433. def plot_run_parameter(path, sheets, row, n_microstates, n_runs, conditions):
  434. alphabet_string = list(string.ascii_uppercase)
  435. column = int(math.ceil(n_microstates / row))
  436. n_conditions = len(conditions)
  437. title = ['Microstate' + alphabet_string[i] for i in range(n_microstates)]
  438. block_size = n_microstates * n_runs
  439. for sheet in sheets:
  440. data = np.asarray(read_xlsx(path, sheet))[n_microstates::]
  441. for n_condition in range(n_conditions):
  442. fig, ax = plt.subplot(row, column)
  443. count = 0
  444. for i in range(row):
  445. for j in range(column):
  446. ax[i][j].errorbar(data[count:count+n_runs], np.mean(data[count:count+n_runs], axis=0),
  447. yerr=stats.sem(data, axis=0))
  448. count += n_runs
  449. def label_diff(i, j, yoffset, text, y, ax):
  450. y = max(y[i], y[j])+ yoffset
  451. props = {'arrowstyle': '-', 'linewidth': 2}
  452. ax.annotate(text, xy=(i, y+0.003), zorder=1)
  453. ax.annotate('', xy=(i, y), xytext=(j, y), arrowprops=props)
  454. def plot_block(mean, yerr, conditions, ax, title, ylabe, p_value=None, p_bar=None):
  455. n_conditions = len(conditions)
  456. if p_value is None:
  457. for index, value in enumerate(conditions):
  458. ax.errorbar(index, mean[index], yerr[index], fmt='.', marker='.', color='black', capsize=5, capthick=2)
  459. else:
  460. for index, value in enumerate(conditions):
  461. if index == 0:
  462. ax.errorbar(index, mean[index], yerr[index], fmt='.', marker='.', color='black', capsize=5, capthick=2)
  463. else:
  464. p = float(p_value[index-1])
  465. if p > 0.05:
  466. ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='.', color='black', capsize=5, capthick=2)
  467. if 0.01 <= p <= 0.05:
  468. ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='*', mfc='red', mec='red', color='black', capsize=5, capthick=2)
  469. if 0.005 <= p < 0.01:
  470. ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='*', mfc='green', mec='green', color='black', capsize=5, capthick=2)
  471. if p < 0.005:
  472. ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='*', mfc='blue', mec='blue', color='black', capsize=5, capthick=2)
  473. for i in range(len(conditions)-1, len(p_value)):
  474. p = float(p_value[i])
  475. if p < 0.05:
  476. p_str = "p=" + format(p, '.3f')
  477. label_diff(p_bar[i][0], p_bar[i][1], p_bar[i][2], p_str, mean+yerr, ax)
  478. ax.set_title(title)
  479. ax.set_xticks(list(range(n_conditions)))
  480. ax.set_xticklabels(conditions)
  481. ax.set_ylabel(ylabe)
  482. ax.set_ylim(ymax=max(mean+yerr) + 0.01)
  483. ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f'))
  484. def plot_condition_paramter(path, savepath, sheets, row, n_microstates, conditions, ylabe, path_t_test=None, p_value_bar=None):
  485. alphabet_string = list(string.ascii_uppercase)
  486. column = int(math.ceil(n_microstates/row))
  487. n_conditions = len(conditions)
  488. n_cr = int(nCr(n_conditions,2))
  489. row_num = column * n_conditions
  490. n_conditions = len(conditions)
  491. title = ['Microstate'+ alphabet_string[i] for i in range(n_microstates)]
  492. for sheet in sheets:
  493. data = np.asarray(read_xlsx(path, sheet))
  494. p_value = np.asarray(read_xlsx(path_t_test, sheet))[:,-1] if path_t_test else None
  495. fig, ax = plt.subplots(row, column, figsize=(14,8))
  496. for i in range(row):
  497. for j in range(column):
  498. if column*i+j == n_microstates:
  499. ax[i][j].axis('off')
  500. break
  501. plot_block(data[:, row_num*i+j*n_conditions:row_num*i+(j+1)*n_conditions], conditions, ax[i][j], title[column*i+j], ylabe[sheet], p_value[n_cr*(column*i+j):n_cr*(column*i+j+1)], p_value_bar)
  502. plt.subplots_adjust(hspace=0.4, wspace=0.5)
  503. plt.savefig(savepath+"//"+sheet+".png", dpi=1200)
  504. plt.show()
  505. def reorder_run_parameter(path, savepath, sheets, n_microstates, n_runs, n_conditions, order):
  506. for sheet in sheets:
  507. data = np.asarray(read_xlsx(path, sheet))
  508. data_ordered = np.zeros((data.shape[0], data.shape[1]))
  509. data_ordered[:, 0:n_microstates] = data[:, np.asarray(order)]
  510. block_size = n_microstates * n_runs
  511. for i in range(n_conditions):
  512. index = []
  513. for j in order:
  514. index.extend(list(range(j*n_runs+n_microstates+i*block_size,(j+1)*n_runs+n_microstates+i*block_size)))
  515. data_ordered[:,i*block_size+n_microstates:(i+1)*block_size+n_microstates] = data[:,index]
  516. write_info(savepath, sheet, data_ordered)
  517. def reorder_condition_parameter(path, savepath, sheets, n_conditions, order):
  518. for sheet in sheets:
  519. data = np.asarray(read_xlsx(path, sheet))
  520. data_ordered = np.zeros((data.shape[0], data.shape[1]))
  521. for i, i_value in enumerate(order):
  522. data_ordered[:, i * n_conditions:(i + 1) * n_conditions] = data[:, i_value * n_conditions:(i_value + 1) * n_conditions]
  523. write_info(savepath, sheet, data_ordered.tolist())
  524. def formulate_run_parameter(path, save_path, sheets, n_microstates, n_conditions, n_runs):
  525. block_size = n_microstates * n_runs
  526. for sheet in sheets:
  527. res = []
  528. data = read_xlsx(path, sheet)
  529. for row in data:
  530. temp_res = row[0:n_microstates]
  531. np_data = np.asarray(row[n_microstates::])
  532. for n_condition in range(n_conditions):
  533. index = []
  534. for j in range(n_microstates):
  535. for i in range(n_condition*block_size, (n_condition+1)*block_size, n_microstates):
  536. index.append(i+j)
  537. temp_res.extend(np_data[index])
  538. res.append(temp_res)
  539. write_info(save_path, sheet, res)
  540. def formulate_condition_parameter(path, save_path, sheets, n_microstates, n_conditions, n_runs):
  541. for sheet in sheets:
  542. res = []
  543. data = read_xlsx(path, sheet)
  544. for row in data:
  545. temp = []
  546. temp_res = []
  547. for i in range(n_microstates, len(row), n_runs):
  548. temp.append(np.mean(row[i:i+n_runs]))
  549. for i in range(n_microstates):
  550. temp_res.append(row[i])
  551. for j in range(n_conditions):
  552. temp_res.append(temp[i + j*n_microstates])
  553. res.append(temp_res)
  554. write_info(save_path, sheet, res)
  555. def batch_test_homogeneity_within_condition_within_subject(data, comb, order, n_microstate):
  556. res = {}
  557. multi_res = []
  558. pool = Pool(8)
  559. for combination in comb:
  560. data_merged = []
  561. block_size = []
  562. for item in combination:
  563. data_merged.extend(data[item])
  564. block_size.append(len(data[item]))
  565. multi_res.append(
  566. pool.apply_async(test_homogeneity, ([data_merged, n_microstate, block_size, order, len(combination)],)))
  567. pool.close()
  568. pool.join()
  569. for i in range(len(multi_res)):
  570. temp = '*'.join(comb[i])
  571. res[temp] = multi_res[i].get()
  572. return res
  573. def batch_test_homogeneity_within_task_across_subject(comb, task, order, n_microstate, subject_path):
  574. res = {}
  575. multi_res = []
  576. pool = Pool(8)
  577. for combination in comb:
  578. data_merged = []
  579. block_size = []
  580. for item in combination:
  581. data_temp = load_data(subject_path + "\\" + item + "_1_30_microstate_labels.json")[task]
  582. data_merged.extend(data_temp)
  583. block_size.append(len(data_temp))
  584. multi_res.append(
  585. pool.apply_async(test_homogeneity, ([data_merged, n_microstate, block_size, order, len(combination)],)))
  586. pool.close()
  587. pool.join()
  588. for i in range(len(multi_res)):
  589. temp = '*'.join(comb[i])
  590. res[temp] = multi_res[i].get()
  591. return res
  592. ## updated by Mengting Zhao on March 24 2022 for CTA anlaysis
  593. ## the function is ready for use
  594. def batch_surrogate_aif(joint_p, condition_p, n_markov, n_microstate, n_surrogate, n_lag, n_repetition):
  595. res = []
  596. multi_res = []
  597. # pool = Pool(7)
  598. for i in range(n_repetition):
  599. multi_res.append(surrogate_aif([joint_p, condition_p, n_markov, n_microstate, n_surrogate, n_lag]))
  600. # multi_res.append(
  601. # pool.apply_async(surrogate_aif, ([joint_p, condition_p, n_markov, n_microstate, n_surrogate, n_lag],))
  602. # )
  603. # pool.close()
  604. # pool.join()
  605. for i in range(len(multi_res)):
  606. # res.append(multi_res[i].get().tolist())
  607. res.append(multi_res[i].tolist())
  608. return res
  609. def surrogate_aif(para):
  610. j_p = para[0]
  611. c_p = para[1]
  612. n_markov = para[2]
  613. n_microstate = para[3]
  614. n_surrogate = para[4]
  615. n_lag = para[5]
  616. surrogate_data = MicrostateMarkov.surrogate_markov_chain(j_p, c_p, n_markov, n_microstate, n_surrogate)
  617. lrd = MicrostateLongRangeDependence(surrogate_data, n_microstate)
  618. return lrd.mutual_information(n_lag)
  619. ### updated by Mengting Zhao for CTA analysis on March 2
  620. ### function ready for use on CTA data analysis
  621. def batch_calculate_aif(data_path, tasks, n_microstate, lag, window_size, window_step, method='aif'):
  622. nb_trials = len(tasks)
  623. res = OrderedDict()
  624. multi_res = []
  625. if window_size == -1:
  626. # pool = Pool(11)
  627. for q in range(nb_trials):
  628. task = tasks[q]
  629. # res[task] = []
  630. data = loadmat(data_path + "\\" + path_name[q] + "\\" + task + "_seq.mat")['EEG'].flatten()
  631. multi_res.append(aif([data, n_microstate, lag, method]))
  632. # multi_res.append(pool.apply_async(aif, ([data, n_microstate, lag, method],)))
  633. for i in range(len(multi_res)):
  634. # res[tasks[i]].append(multi_res[i].get().tolist())
  635. res[tasks[i]].append(multi_res[i].tolist())
  636. return res
  637. def aif(para):
  638. ## change between AIF and partial-AIF with the fourth parameter
  639. ## the key function for AIF computation is mutual_information( )
  640. lrd = MicrostateLongRangeDependence(para[0], para[1])
  641. if para[3] == 'aif':
  642. res_aif = lrd.mutual_information(para[2])
  643. elif para[3] == 'paif':
  644. res_aif = lrd.partial_mutual_information(para[2])
  645. return res_aif
  646. ## updated by Mengting Zhao on March 24 2022 for CTA analysis
  647. ## function ready for use on preprocessed CTA data
  648. def batch_calculate_dfa(data, n_microstate, n_partitions, segment_range, segment_density):
  649. res = []
  650. # res = OrderedDict()
  651. multi_res = []
  652. # pool = Pool(11)
  653. lrd = MicrostateLongRangeDependence(data, n_microstate)
  654. #### !!! It is necessary to look into the details of embed_random_walk( )
  655. embeding = lrd.embed_random_walk(n_partitions)
  656. for embed_sequence_index, embed_sequence in embeding.items():
  657. # v_test = dfa([embed_sequence, embed_sequence_index, segment_range, segment_density])
  658. multi_res.append(dfa([embed_sequence, embed_sequence_index, segment_range, segment_density]))
  659. # multi_res.append(
  660. # pool.apply_async(dfa, ([embed_sequence, embed_sequence_index, segment_range, segment_density],))
  661. # )
  662. # pool.close()
  663. # pool.join()
  664. for i in range(len(multi_res)):
  665. # res.append(multi_res[i].get())
  666. res.append(multi_res[i])
  667. return res
  668. def dfa(para):
  669. embed_sequence = para[0]
  670. embed_sequence_index = para[1]
  671. segment_range = para[2]
  672. segment_density = para[3]
  673. res = {}
  674. res[embed_sequence_index] = MicrostateLongRangeDependence.dfa(embed_sequence, segment_range, segment_density)
  675. return res
  676. def test_homogeneity(para):
  677. sequence = MicrostateMarkov(para[0], para[1])
  678. p = sequence.test_conditional_homogeneity(para[2], para[3], para[4])
  679. return p
  680. def conditions_tasks(conditions, tasks):
  681. res = {}
  682. for condition in conditions:
  683. res[condition] = []
  684. for task in tasks:
  685. if task.startswith(condition):
  686. res[condition].append(task)
  687. return res
  688. def read_TLX_ratings(input_fname, sheet_name):
  689. # try_path = r'F:\FlightSim_experiment_2021\Segmentation_NRC\4. Subjective Ratings\Inlook_rating_zhao.xlsx'
  690. ddd = read_xlsx(input_fname, sheet_name, row=True)
  691. # # nb_trials = 22
  692. # lis_score = np.zeros(nb_trials)
  693. # id = 0
  694. res_score = []
  695. for row in ddd[1: :]:
  696. data_temp = row[1:7]
  697. score = np.sum(data_temp)/6
  698. # lis_score[id] = score
  699. # id += 1
  700. res_score.append(score)
  701. return res_score
  702. if __name__ == '__main__':
  703. n_k = 7
  704. n_chs = 64
  705. data_dir = r'F:\FlightSim_experiment_2021\Concordia_preprocessed'
  706. n_conditions = 3
  707. condition_name = ['training', 'practiceA', 'practiceB']
  708. subjects = ['P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'P07', 'P08', 'P09', 'P10', 'P11', 'P12',
  709. 'P13', 'P14', 'P15', 'P16', 'P17', 'P18', 'P19', 'P20', 'P21', 'P22', 'P23', 'P24']
  710. path_name = ['trial_1', 'trial_2', 'trial_3', 'trial_4', 'trial_5', 'trial_6', 'trial_7',
  711. 'trial_8', 'trial_9', 'trial_10', 'trial_11', 'trial_12', 'trial_13', 'trial_14', 'trial_15',
  712. 'trial_16', 'trial_17', 'trial_18', 'trial_19', 'trial_20', 'trial_21', 'trial_22']
  713. lis_name = ['trial_1.set', 'trial_2.set', 'trial_3.set', 'trial_4.set', 'trial_5.set', 'trial_6.set', 'trial_7.set',
  714. 'trial_8.set', 'trial_9.set', 'trial_10.set', 'trial_11.set', 'trial_12.set', 'trial_13.set', 'trial_14.set', 'trial_15.set',
  715. 'trial_16.set', 'trial_17.set', 'trial_18.set', 'trial_19.set', 'trial_20.set', 'trial_21.set', 'trial_22.set']
  716. tasks = ['training_1', 'training_2', 'training_3', 'training_4', 'training_5', 'training_6', 'training_7',
  717. 'practiceA_1', 'practiceA_2', 'practiceA_3', 'practiceA_4', 'practiceA_5', 'practiceA_6', 'practiceA_7', 'practiceA_8',
  718. 'practiceB_1', 'practiceB_2', 'practiceB_3', 'practiceB_4', 'practiceB_5', 'practiceB_6', 'practiceB_7']
  719. nb_trials = len(path_name)
  720. ################the computation of temporal parameters is independant from the AIF, DFA, and entropy rate analysis#################
  721. ################ calculate temporal parameters
  722. ##### tested with success on reordered sequences###
  723. save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters'
  724. save_path_excel = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters\condition_parameters.xlsx'
  725. f = 250
  726. f_m = 1000 / f
  727. n_k = 7
  728. res = {}
  729. res_xlsx = {'duration':[], 'frequency':[], 'coverage':[]}
  730. for subject in subjects:
  731. print(subject)
  732. subject_path = data_dir + "\\" + subject + "\\" + r'clean_data_matlab' + "\\" + r'data'
  733. res[subject] = {}
  734. # data = load_data(subject_path + "\\" + subject + "_microstate_labels.json")
  735. temp = [[],[],[]]
  736. for q in range(nb_trials):
  737. task = tasks[q]
  738. # res[task] = []
  739. data = loadmat(subject_path + "\\" + path_name[q] + "\\" + task + "_seq.mat")['EEG'].flatten()
  740. sequence = MicrostateParameter(data, n_k)
  741. duration = sequence.calculate_duration() #during each iteration, the size of duration is 7
  742. frequency = sequence.calculate_frequency(f) #during each iteration, the size of frequency is 7
  743. coverage = sequence.calculate_coverage() #during each iteration, the size of coverage is 7
  744. duration = [i*f_m for i in duration]
  745. coverage = [i*100 for i in coverage]
  746. res[subject][task] = {'duration': duration, 'frequency': frequency, 'coverage': coverage}
  747. temp[0].extend(duration)
  748. temp[1].extend(frequency)
  749. temp[2].extend(coverage)
  750. res_xlsx['duration'].append([i*f_m for i in temp[0]])
  751. res_xlsx['frequency'].append(temp[1])
  752. res_xlsx['coverage'].append([i*100 for i in temp[2]])
  753. print('tested with success')
  754. #################formulate task-wise and condition-wise paramters ###########################
  755. ### tested with success on microstate sequences###############
  756. read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters\parameters_subjects.json'
  757. save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters'
  758. n_k = 7
  759. paras = ['duration', 'frequency', 'coverage']
  760. for pa in paras:
  761. res = np.zeros((len(condition_name), n_k*len(subjects)))
  762. for index, condition in enumerate(condition_name):
  763. temp_res = []
  764. for task in tasks:
  765. temp = []
  766. for subject in subjects:
  767. if task.startswith(condition):
  768. data = load_data(read_path_json)[subject][task][pa]
  769. temp.extend(data)
  770. if temp:
  771. temp_res.append(temp)
  772. np_data = np.asarray(temp_res)
  773. res[index, :] = np_data.mean(axis=0)
  774. print('finish')
  775. #################formulate t-test data files##########################
  776. read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters'
  777. save_path_json = pjoin(read_path_json, r't-test') # results saved under the folder "t-test"
  778. paras = ['duration', 'frequency', 'coverage']
  779. n_k = 7
  780. length = n_k * len(subjects)
  781. m_index = [] # to save the index for each eegmap: e.g. 'A' corresponds to [0, 7, 14, ...], 'B' corresponds to [1, 8, 15, ...]
  782. for m in range(n_k):
  783. temp = []
  784. for n in range(m, length, n_k):
  785. temp.append(n)
  786. m_index.append(temp)
  787. for pa in paras:
  788. # for condition in condition_name:
  789. # read_path = read_path_json + '\\' + condition + '_' + pa + '.mat'
  790. read_path = read_path_json + '\\' + 'condition' + '_' + pa + '.mat' # change betwwen 'condition' and condition (within the condition loop)
  791. data = loadmat(read_path)['EEG']
  792. for index, value in enumerate(m_index):
  793. # save_path = save_path_json + "\\" + condition + '_' + pa + '_m' + str(index) + '.mat'
  794. save_path = save_path_json + "\\" + 'condition' + '_' + pa + '_m' + str(index) + '.mat'
  795. savemat(save_path, {'EEG':data[:, value]})
  796. print('print')
  797. #################conduct t-test on the formulated data ##########################
  798. ### tested with success on microstate sequences ###########################
  799. read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters\t-test'
  800. save_path_json = pjoin(read_path_json, r'results')
  801. n_k = 7
  802. paras = ['duration', 'frequency', 'coverage']
  803. alpha = 0.95
  804. sample_size = len(subjects)
  805. for pa in paras:
  806. for m in range(n_k):
  807. res = []
  808. res_t = []
  809. res_interval = [[],[]]
  810. save_path_t = save_path_json + "\\" + 'condition' + '_' + pa + "_mt" + str(m) + '.mat'
  811. save_path_c = save_path_json + "\\" + 'condition' + '_' + pa + "_mc" + str(m) + '.mat'
  812. read_path = read_path_json + "\\" + 'condition' + '_' + pa + '_m' + str(m) + '.mat'
  813. data = loadmat(read_path)['EEG']
  814. for comb in itertools.combinations([i for i in range(data.shape[0])], 2):
  815. p = stats.ttest_rel(data[comb[0], :], data[comb[1], :])
  816. res.append(p[1])
  817. res_t.append(p[0])
  818. diff = data[comb[0], :] - data[comb[1], :]
  819. diff_mean = np.mean(diff)
  820. diff_std = stats.sem(diff)
  821. ci = stats.t.interval(alpha, sample_size - 1, loc=diff_mean, scale=diff_std)
  822. res_interval[0].append(ci[0])
  823. res_interval[1].append(ci[1])
  824. savemat(save_path_t, {'EEG': np.asarray(res_t)})
  825. savemat(save_path_c, {'EEG': np.asarray(res_interval)})
  826. print('right')
  827. #################calculate temporal parameters and write to Excel spreadsheets ##########################
  828. for para in paras:
  829. f = 250
  830. f_m = 1000 / f
  831. n_microstate = 7
  832. res = {}
  833. res_xlsx = {'duration':[], 'frequency':[], 'coverage':[]}
  834. tasks = para['tasks']
  835. subject_path = para['subject_path']
  836. save_path_json = para['save_path_json']
  837. save_path_excel = para['save_path_excel']
  838. for subject in subjects:
  839. print(subject)
  840. res[subject] = {}
  841. # data = load_data(subject_path + "\\" + subject + "_microstate_labels.json")
  842. temp = [[],[],[]]
  843. for task in tasks:
  844. data = loadmat(subject_path + "\\" + subject + "\\" + task +"_seq.mat")['EEG'].flatten()
  845. sequence = MicrostateParameter(data, n_microstate)
  846. duration = sequence.calculate_duration()
  847. frequency = sequence.calculate_frequency(f)
  848. coverage = sequence.calculate_coverage()
  849. duration = [i*f_m for i in duration]
  850. coverage = [i*100 for i in coverage]
  851. res[subject][task] = {'duration': duration, 'frequency': frequency, 'coverage': coverage}
  852. json.dump(res, codecs.open(save_path_json, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True)
  853. ########### Part I: entropy rate anlaysis #########################################
  854. for lag in range(4, 7):
  855. for subject in subjects:
  856. subject_path = data_dir + "\\" + subject
  857. print(subject)
  858. res = {}
  859. for q in range(nb_trials):
  860. task = tasks[q]
  861. trial_path = subject_path + "\\" + r'clean_data_matlab' + "\\" + r'data' + "\\" + path_name[q]
  862. # data_path= pjoin(trial_path, lis_name[q])
  863. # EEG_epochs= mne.io.read_epochs_eeglab(data_path)
  864. data_path_save = trial_path + "\\" + tasks[q] + '_seq.mat'
  865. data = loadmat(trial_path + "\\" + tasks[q] + "_seq.mat")['EEG'].flatten()
  866. lrd = MicrostateLongRangeDependence(data, n_k)
  867. entropy_rate, excess_entropy, index = lrd.excess_entropy_rate(lag)
  868. res[task] = {'entropy_rate':entropy_rate.tolist(), 'excess_entropy':excess_entropy.tolist(), 'index':index.tolist()}
  869. json.dump(res, codecs.open(data_dir + "\\microstates_sequences" + "\\entropy_rate" + "\\history_" + str(lag) + '\\'
  870. + subject + ".json", 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True)
  871. ################ testing condition's entropy_rate computation on the microstate sequences ##########################
  872. read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\entropy_rate\history_6' #using history_6 as mentioned in the paper
  873. save_path_json = pjoin(read_path_json, r'condition_acrosstrials')
  874. conditions_res = np.zeros((len(condition_name), len(subjects)))
  875. for index, condition in enumerate(condition_name):
  876. res = []
  877. for task in tasks:
  878. temp = []
  879. if task.startswith(condition):
  880. for subject in subjects:
  881. data = load_data(read_path_json + "\\" + subject + ".json")[task]['entropy_rate']
  882. temp.append(data)
  883. if temp:
  884. res.append(temp)
  885. np_res = np.asarray(res)
  886. conditions_res[index, :] = np_res.mean(axis=0)
  887. savemat(save_path_json+"\\" + condition + ".mat", {'EEG':np_res}) #the entropy rate for one specific condition
  888. savemat(save_path_json+"\\" + 'condition' + ".mat", {'EEG':conditions_res}) # the entropy rates for all conditions
  889. ##################### Part III: DFA (detrended fluctuation analysis) computation #######################################
  890. save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA'
  891. n_k = 7
  892. n_partitions = 3 # one part has 3, the other part has 4
  893. for subject in subjects:
  894. print(subject)
  895. subject_path = data_dir + "\\" + subject
  896. data_path = subject_path + "\\" + r'clean_data_matlab' + "\\" + r'data'
  897. res = {}
  898. for q in range(nb_trials):
  899. task = tasks[q]
  900. data = loadmat(data_path + "\\" + path_name[q] + "\\" + task + "_seq.mat")['EEG'].flatten()
  901. segment_range = [2, int(np.log2(len(data)))]
  902. segment_density = 0.25
  903. res[task] = batch_calculate_dfa(data, n_k, n_partitions, segment_range, segment_density)
  904. json.dump(res, codecs.open(save_path_json+"\\"+subject+"_dfa.json", 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True)
  905. print('stop')
  906. ################# Section 2: generating Surrogate data ######################
  907. ### tested with success on microstate sequences #########
  908. # # markov probability computation, tested with success ####
  909. save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\markovianity.json'
  910. res = {}
  911. n_k = 7
  912. for subject in subjects:
  913. print(subject)
  914. subject_path = data_dir + "\\" + subject
  915. data = load_data(subject_path + "\\" + r'microstates' + "\\" + subject + "_microstate_labels.json")
  916. res[subject] = {}
  917. for task in tasks:
  918. res[subject][task] = {}
  919. markov = MicrostateMarkov(data[task], n_k)
  920. for k in range(1, 6):
  921. joint_p, condition_p = markov.transition_matrix_hat(k)
  922. res[subject][task][k] = {'joint_p': joint_p.tolist(), 'condition_p': condition_p.tolist()}
  923. json.dump(res, codecs.open(save_path_json, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4)
  924. print(999)
  925. #########################generating surrogate data, tested with success #################################################
  926. n_k = 7
  927. for subject in subjects:
  928. print(subject)
  929. res = {}
  930. subject_path = data_dir + "\\" + subject + "\\" + "microstates"
  931. data_path = subject_path + "\\" + subject + "_microstate_labels.json"
  932. data = load_data(data_path)
  933. save_path = subject_path + "\\" + subject + "_surrogate_microstate_labels.json" # the results are saved under each subject's folder
  934. matrix = load_data(r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\markovianity.json')
  935. for task in tasks:
  936. n_surrogate = len(data[task])
  937. res[task] = {}
  938. for k in range(1, 6):
  939. condition_p = matrix[subject][task][str(k)]['condition_p']
  940. joint_p = matrix[subject][task][str(k)]['joint_p']
  941. res[task][k] = MicrostateMarkov.surrogate_markov_chain(joint_p, condition_p, k, n_k, n_surrogate).tolist()
  942. json.dump(res, codecs.open(save_path, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4)
  943. print('check results')
  944. #################### DFA realated computation ##################################################
  945. ###### tested with success
  946. read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA'
  947. save_path_json = pjoin(read_path_json, 'formalated_empirical')
  948. n_k = 7
  949. n_paritions = 3
  950. comb = combinations([i for i in range(n_k)], n_paritions)
  951. for index, item in enumerate(comb):
  952. res = np.zeros((len(condition_name), len(subjects)))
  953. for c_index, condition in enumerate(condition_name):
  954. temp = []
  955. for task in tasks:
  956. slope = []
  957. if task.startswith(condition):
  958. for subject in subjects:
  959. data = load_data(read_path_json + "\\" + subject+"_dfa.json")
  960. slope.append(data[task][index][to_string(item)]['slope'])
  961. if slope:
  962. temp.append(slope)
  963. np_temp = np.asarray(temp)
  964. res[c_index, :] = np_temp.mean(axis=0) # c_index: 0 (training), 1 (practiceA), 2 (practiceB)
  965. savemat(save_path_json+"\\"+condition+"_"+to_string(item)+".mat",{'EEG':np_temp}) # saved the results for one condition (one combination)
  966. savemat(save_path_json+"\\"+"condition"+"_"+to_string(item)+".mat",{'EEG':res}) ## saved the results for all conditions (one combination)
  967. print(999)
  968. ###################plot the across-subjects DFA results: Hurst Exponent on empirical data (group level)#################
  969. read_path_excel = para['read_path_excel']
  970. p_read_path_excel = para['p_read_path_excel']
  971. read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA\formalated_empirical'
  972. save_path_fig = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA\figures'
  973. c_t = conditions_tasks(condition_name, tasks)
  974. n_k = 7
  975. n_condition = len(condition_name)
  976. n_paritions = 3
  977. row = 2
  978. column = 5
  979. all_comb = set([i for i in range(n_k)])
  980. comb = [item for item in combinations([i for i in range(n_k)], n_paritions)]
  981. label_str = ['_1_10.png', '_11_20.png', '_21_30.png', '_31_35.png']
  982. label_index = [0, 10, 20, 30]
  983. for index, index_item in enumerate(label_index):
  984. for condition in condition_name:
  985. fig, ax = plt.subplots(row, column, figsize=(20, 8), dpi=600)
  986. for i in range(row):
  987. for j in range(column):
  988. if i == 1 and index == len(label_index)-1:
  989. ax[i][j].axis('off')
  990. continue
  991. item = comb[i*column+j+index_item]
  992. data = loadmat(read_path_json+"\\"+'condition'+"_"+to_string(item)+".mat")['EEG']
  993. data = np.asarray(read_xlsx(read_path_excel, to_string(item)))
  994. p_value = np.asarray(read_xlsx(p_read_path_excel, to_string(item)))[:, 1]
  995. title = to_string(item) + " Vs. " + to_string(all_comb - set(item))
  996. if row == 1:
  997. plot_block(data.mean(axis=0), stats.sem(data, axis=0), condition_name, ax[j], title, 'H')
  998. # plot_block(data.mean(axis=0), stats.sem(data, axis=0), c_t[condition], ax[j], title, 'H')
  999. else:
  1000. # plot_block(data.mean(axis=0), stats.sem(data, axis=0), c_t[condition], ax[i][j], title, 'H')
  1001. plot_block(data.mean(axis=1), stats.sem(data, axis=1), condition_name, ax[i][j], title, 'H')
  1002. plt.subplots_adjust(hspace=0.4, wspace=0.5)
  1003. # plt.savefig(save_path_fig + "\\" + condition + "\\" + condition + "_1_10" + ".png", dpi=600)
  1004. plt.savefig(save_path_fig + "\\" + 'condition' + label_str[index], dpi=600)
  1005. # plt.show()
  1006. plt.clf()
  1007. plt.close()
  1008. ss = 8