''' For microstate sequence analysis after getting the sequences for each trial through fit-back process (microstate_to_sequence.py) Designed for analyzing Flight Simulator data collected at CTA, Montreal in November 2021 Adapted from Jia's algorithm Prepared by Mengting Zhao, on Sep 16 2024 ''' import numpy as np import copy from scipy.stats import chi2, chi2_contingency, entropy, t, sem, norm from scipy import signal from utilis import load_data, read_subject_info, write_info, read_xlsx, to_string from multiprocessing import Pool import codecs, json import itertools import matplotlib.pyplot as plt import matplotlib.ticker as ticker from itertools import combinations from matplotlib.ticker import ScalarFormatter from sklearn.preprocessing import normalize import string import matplotlib.gridspec as gridspec from scipy import stats from statsmodels.stats.multitest import multipletests from math_utilis import nCr import mne from utilis import create_info from scipy.io import loadmat, savemat import os from collections import OrderedDict from os.path import join as pjoin import math import statsmodels.stats.multitest as smt def ceil_decimal(num, n): q = len(num) pvalues = [] for i in range(q): p = math.ceil(num[i]*10**n)/(10**n) pvalues.append(p) return pvalues def pval_corrected(pvals, method='bonferroni'): return smt.multipletests(pvals, method=method)[1] class MicrostateParameter: def __init__(self, sequence, n_microstate): self.sequence = sequence self.n_microstate = n_microstate self.n_sequence = len(sequence) def calculate_duration(self, window=None): def duration(sequence): res = [] label = sequence[0] count = 1 res_temp = {} for i in range(self.n_microstate): res_temp[i] = [] for j in range(1, len(sequence)): if label == sequence[j]: count += 1 else: label = sequence[j] res_temp[sequence[j - 1]].append(count) count = 1 for i in range(self.n_microstate): res.append(np.sum(res_temp[i]) / len(res_temp[i]) if len(res_temp[i])>0 else 1) return res if window: n = int(self.n_sequence / window) temp = [] for i in range(n): res = duration(self.sequence[i*window:(i+1)*window]) temp.append(res) return np.asarray(temp).mean(axis=0).tolist() else: return duration(self.sequence) def calculate_frequency(self, window): res = [] res_temp = {} for i in range(self.n_microstate): res_temp[i] = [] n_block = int(self.n_sequence / window) for i in range(n_block): label = self.sequence[i*window] temp = {} for j in range(i*window + 1, (i+1)*window): if label != self.sequence[j]: if label in temp: temp[label] += 1 else: temp[label] = 1 label = self.sequence[j] for key, value in temp.items(): res_temp[key].append(value) for i in range(self.n_microstate): res.append(np.mean(res_temp[i])) return res def calculate_coverage(self): res = [] for i in range(self.n_microstate): res.append(np.argwhere(np.asarray(self.sequence) == i).shape[0] / self.n_sequence) return res class MicrostateLongRangeDependence: def __init__(self, sequence, n_microstate): self.sequence = sequence self.n_microstate = n_microstate self.n_sequence = len(sequence) def partition_state(self, k): comb = combinations([i for i in range(self.n_microstate)], k) length = nCr(self.n_microstate, k) res = [] for index, item in enumerate(comb): if k % 2 == 0: if index == length / 2: break else: res.append(item) else: res.append(item) return res def embed_random_walk(self, k): partitions = self.partition_state(k) np_sequence = np.asarray(self.sequence) res = {} for item in partitions: temp_x = np.ones(self.n_sequence) * -1 for state in item: temp_x[np.where(np_sequence == state)[0]] = 1 res[to_string(item)] = temp_x return res @staticmethod def detrend(embed_sequence, window_size): ## commented by Mengting shape = (embed_sequence.shape[0]//window_size, window_size) temp = np.lib.stride_tricks.as_strided(embed_sequence, shape) window_size_index = np.arange(window_size) res = np.zeros(shape[0]) for index, y in enumerate(temp): coeff = np.polyfit(window_size_index, y, 1) # finding the trend through fitting y_hat = np.polyval(coeff, window_size_index) res[index] = np.sqrt(np.mean((y-y_hat)**2)) # subtracting the 'trend' return res @staticmethod def dfa(embed_sequence, segment_range, segment_density): ## this function is called in the dfa( ) function defined outside the Class ## commented by Mengting y = np.cumsum(embed_sequence-np.mean(embed_sequence)) scales = (2**np.arange(segment_range[0], segment_range[1], segment_density)).astype(np.int) f = np.zeros(len(scales)) for index, window_size in enumerate(scales): f[index] = np.sqrt(np.mean(MicrostateLongRangeDependence.detrend(y, window_size)**2)) coeff = np.polyfit(np.log2(scales), np.log2(f), 1) # the core idea is to find the polyfitting coefficient return {'slope': coeff[0], 'fluctuation': f.tolist(), 'scales':scales.tolist()} @staticmethod def shanon_entropy(x, nx, ns): ## in our case, base 2 is applied p = np.zeros(ns) for t in range(nx): p[x[t]] += 1.0 p /= nx return -np.sum(p[p>0]*np.log2(p[p>0])) @staticmethod def shanon_joint_entropy(x, y, nx, ny, ns): n = min(nx, ny) p = np.zeros((ns, ns)) for t in range(n): p[x[t], y[t]] += 1.0 p /= n return -np.sum(p[p>0]*np.log2(p[p>0])) @staticmethod def shanon_joint_entropy_k(x, nx, ns, k): # # ns = n_k p = np.zeros(tuple(k*[ns])) for t in range(nx-k): p[tuple(x[t:t+k])] += 1.0 p /= (nx-k) h = -np.sum(p[p>0]*np.log2(p[p>0])) return h def mutual_information(self, lag): ### this part corresponds to the last line of Eq. (5) in Wenjun's paper lag = min(self.n_sequence, lag) res = np.zeros(lag) for time_lag in range(lag): nmax = self.n_sequence - time_lag # number of segments used for computation (moving forward with a length of 'lag') ## h respresents H(Xt) h = self.shanon_entropy(self.sequence[:nmax], nmax, self.n_microstate) ## h_lag respresents H(X(t+\tao)) h_lag = self.shanon_entropy(self.sequence[time_lag:time_lag+nmax], nmax, self.n_microstate) ## h_h_lag respresents the joint entropy of the mentioned two sequence segments h_h_lag = self.shanon_joint_entropy(self.sequence[:nmax], self.sequence[time_lag:time_lag+nmax], nmax, nmax, self.n_microstate) res[time_lag] = h + h_lag - h_h_lag return res def partial_mutual_information(self, lag): p = np.zeros(lag) a = self.mutual_information(2) p[0], p[1] = a[0], a[1] for k in range(2, lag): h1 = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence,self.n_sequence,self.n_microstate,lag) h2 = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence,self.n_sequence,self.n_microstate,lag-1) h3 = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence, self.n_sequence, self.n_microstate, lag + 1) p[k] = 2*h1 -h2 - h3 return p def excess_entropy_rate(self, kmax): h = np.zeros(kmax) for k in range(kmax): h[k] = MicrostateLongRangeDependence.shanon_joint_entropy_k(self.sequence, self.n_sequence, self.n_microstate, k+1) ks = np.arange(1, kmax+1) entropy_rate, excess_entropy = np.polyfit(ks, h, 1) # Least squares polynomial fit return entropy_rate, excess_entropy, ks @staticmethod def plot_mutual_information(data, title, subject, index, f): plt.semilogy(np.arange(0, data.shape[0]*f, f), data) plt.xlabel("time lag (1ms)") plt.ylabel("AIF (bits)") plt.title(subject+"_"+title+"_"+str(index)) plt.show() # @staticmethod # def plot_mutual_information(data, title): # for i in range(len(data)): # temp = np.asarray(data[i]) # plt.semilogy([j for j in range(2000)], temp[0:2000]/temp[0]) # # plt.semilogy([j for j in range(len(data[i][0:2000]))], data[i][0:2000]) # plt.xlabel("time lag (2ms)") # plt.ylabel("AIF (bits)") # plt.title(title) # plt.show() class MicrostateMarkov: def __init__(self, sequence, n_microstate): self.sequence = sequence self.n_microstate = n_microstate self.n_sequence = len(sequence) def transition_matrix_hat(self, k): ## When executing this function in Markov_probability, k is set to 1 & 2 ## the output condition_distribution matrix is 7*7, saving the conditional probability of the Markom process # ### where row represents the current (t-th) status and column represents the (t+k)th status joint_distribution = np.zeros(self.n_microstate**k) condition_distribution = np.zeros((self.n_microstate**k, self.n_microstate)) # a 7*7 matrix for k=1 sh = tuple(k*[self.n_microstate]) d = {} for i, index in enumerate(np.ndindex(sh)): #for setting the dictionary 'd' as 7 lists with the first elements as 0, 2, ...6 d[index] = i for t in range(self.n_sequence-k): ## counting the combinations of sequence[t] & sequence[t+k] in 'condition_distribution' matrix idx = tuple(self.sequence[t:t+k]) # for k=1, the element at sequence[t]=3 for example i = d[idx] j = self.sequence[t+k] # the element at sequence[t+k]=4 for example joint_distribution[i] += 1. ## saving the joint_distribution in a vector of 7 elements condition_distribution[i,j] += 1. joint_distribution /= joint_distribution.sum() p_row = condition_distribution.sum(axis=1, keepdims=True) p_row[p_row == 0] = 1. condition_distribution /= p_row return joint_distribution, condition_distribution @staticmethod def surrogate_markov_chain(joint_distribution, condition_distribution, k, n_microstate, n_surrogate): sh = tuple(k * [n_microstate]) d = {} dinv = np.zeros((n_microstate**k, k)) for i, idx in enumerate(np.ndindex(sh)): d[idx] = i dinv[i] = idx joint_distribution_sum = np.cumsum(joint_distribution) x = np.zeros(n_surrogate) x[:k] = dinv[np.min(np.argwhere(np.random.rand() < joint_distribution_sum))] condition_distribution_sum = np.cumsum(condition_distribution, axis=1) for t in range(n_surrogate-k-1): idx = tuple(x[t:t+k]) i = d[idx] ## how to understand the following line?? j = np.min(np.argwhere(np.random.rand() < condition_distribution_sum[i])) x[t+k] = j return x.astype('int') def test_markov(self, order): if order == 0: return self.test_markov0() n = len(self.sequence) df = np.power(self.n_microstate, 2 + order) - 2 * np.power(self.n_microstate, 1 + order) + np.power( self.n_microstate, order) temp = np.zeros(self.n_microstate) frequency = [] # f_ijk..(n+2), f_ij..(n+1)*, f_*jk..(n+2), f_*jk...(n+1)* frequency.append(np.tile(temp, [self.n_microstate for i in range(order + 1)] + [1])) frequency.append(np.tile(temp, [self.n_microstate for i in range(order)] + [1])) frequency.append(np.tile(temp, [self.n_microstate for i in range(order)] + [1])) frequency.append(np.tile(temp, [self.n_microstate for i in range(order - 1)] + [1])) for t in range(n - order - 1): frequency_index = [] for j in range(order + 2): frequency_index.append(self.sequence[t + j]) frequency[0][tuple(frequency_index)] += 1 frequency[1][tuple(frequency_index[:-1])] += 1 frequency[2][tuple(frequency_index[1::])] += 1 frequency[3][tuple(frequency_index[1:-1])] += 1 T = 0.0 for frequency_index in np.ndindex(frequency[0].shape): f = frequency[0][tuple(frequency_index)] * frequency[1][tuple(frequency_index[:-1])] * \ frequency[2][tuple(frequency_index[1::])] * frequency[3][tuple(frequency_index[1:-1])] if f > 0: T += frequency[0][tuple(frequency_index)] * \ np.log((frequency[0][tuple(frequency_index)] * frequency[3][tuple(frequency_index[1:-1])]) / (frequency[1][tuple(frequency_index[:-1])] * frequency[2][tuple(frequency_index[1::])])) T *= 2.0 p = chi2.sf(T, df) # print(T, df, p) return p def test_markov0(self): n = len(self.sequence) f_ij = np.zeros((self.n_microstate, self.n_microstate)) f_i = np.zeros(self.n_microstate) f_j = np.zeros(self.n_microstate) for t in range(n - 1): i = self.sequence[t] j = self.sequence[t + 1] f_ij[i, j] += 1.0 f_i[i] += 1.0 f_j[i] += 1.0 T = 0.0 for i, j in np.ndindex(f_ij.shape): f = f_ij[i, j] * f_i[i] * f_j[j] if f > 0: T += (f_ij[i, j] * np.log((n * f_i[i] * f_j[j]) / f_i[i] * f_j[j])) T *= 2.0 df = (self.n_microstate - 1) * (self.n_microstate - 1) p = chi2.sf(T, df) return p def test_conditional_homogeneity(self, block_size, order, s=None): if s is None: n = len(self.sequence) s = int(np.floor(float(n) / float(block_size))) df = (s - 1.) * (np.power(self.n_microstate, order + 1) - np.power(self.n_microstate, order)) frequency = [] # f_ijk..(n+2), f_ij..(n+1)*, f_*jk..(n+2), f_*jk...(n+1)* frequency.append(np.zeros(([s] + [self.n_microstate for i in range(order + 1)]))) frequency.append(np.zeros(([s] + [self.n_microstate for i in range(order)]))) frequency.append(np.zeros(([self.n_microstate for i in range(order + 1)]))) frequency.append(np.zeros(([self.n_microstate for i in range(order)]))) l_total = 0 for i in range(s): if isinstance(block_size, list): l = block_size[i] else: l = block_size for j in range(l - order): frequency_index = [] frequency_index.append(i) for k in range(order + 1): frequency_index.append(self.sequence[l_total + j + k]) frequency[0][tuple(frequency_index)] += 1. frequency[1][tuple(frequency_index[:-1])] += 1. frequency[2][tuple(frequency_index[1::])] += 1. frequency[3][tuple(frequency_index[1:-1])] += 1. l_total += l T = 0.0 for frequency_index in np.ndindex(frequency[0].shape): f = frequency[0][tuple(frequency_index)] * frequency[1][tuple(frequency_index[:-1])] * \ frequency[2][tuple(frequency_index[1::])] * frequency[3][tuple(frequency_index[1:-1])] if f > 0: T += frequency[0][tuple(frequency_index)] * \ np.log((frequency[0][tuple(frequency_index)] * frequency[3][tuple(frequency_index[1:-1])]) / (frequency[1][tuple(frequency_index[:-1])] * frequency[2][tuple(frequency_index[1::])])) T *= 2.0 p = chi2.sf(T, df, loc=0, scale=1.) return p def conditionalHomogeneityTest(self, l): n = len(self.sequence) ns = self.n_microstate X = self.sequence r = int(np.floor(float(n) / float(l))) # number of blocks nl = r * l f_ijk = np.zeros((r, ns, ns)) f_ij = np.zeros((r, ns)) f_jk = np.zeros((ns, ns)) f_i = np.zeros(r) f_j = np.zeros(ns) # calculate f_ijk (time / block dep. transition matrix) for i in range(r): # block index for ii in range(l - 1): # pos. inside the current block j = X[i * l + ii] k = X[i * l + ii + 1] f_ijk[i, j, k] += 1.0 f_ij[i, j] += 1.0 f_jk[j, k] += 1.0 f_i[i] += 1.0 f_j[j] += 1.0 # conditional homogeneity (Markovianity stationarity) T = 0.0 for i, j, k in np.ndindex(f_ijk.shape): # conditional homogeneity f = f_ijk[i, j, k] * f_j[j] * f_ij[i, j] * f_jk[j, k] if (f > 0): T += (f_ijk[i, j, k] * np.log((f_ijk[i, j, k] * f_j[j]) / (f_ij[i, j] * f_jk[j, k]))) T *= 2.0 df = (r - 1) * (ns - 1) * ns # p = chi2test(T, df, alpha) p = chi2.sf(T, df, loc=0, scale=1) return p def paired_t_test_condition_parameter(path, savepath, sheets, n_microstates, conditions): n_conditions = len(conditions) alphabet_string = list(string.ascii_uppercase) for sheet in sheets: res = [] res_correct = [] data = np.asarray(read_xlsx(path, sheet)) for i in range(n_microstates): data_temp = data[:,i*n_conditions:(i+1)*n_conditions] correct_p = [] count = 0 for comb in itertools.combinations([j for j in range(n_conditions)], 2): p = stats.ttest_rel(data_temp[:, comb[0]], data_temp[:, comb[1]]) res.append([alphabet_string[i], conditions[comb[0]], conditions[comb[1]], p[0], p[1]]) res_correct.append([alphabet_string[i], conditions[comb[0]], conditions[comb[1]]]) correct_p.append(p[1]) count += 1 temp = multipletests(correct_p, method='bonferroni')[1].tolist() for j in range(len(res)-1, len(res)-count-1, -1): res_correct[j].append(temp[count-1]) count -= 1 write_info(savepath, sheet, res) write_info(savepath, sheet + '_' + 'bonferroni', res_correct) def paired_t_test_run_parameter(path, savepath, sheets, n_microstates, n_conditions, n_runs): for sheet in sheets: res = [] res_correct = [] data = np.asarray(read_xlsx(path, sheet))[:, n_microstates::] for i in range(0, n_microstates*n_conditions*n_runs, n_runs): data_temp = data[:, i*n_runs:(i+1)*n_runs] correct_p = [] for comb in itertools.combinations([j for j in range(n_runs)], 2): p = stats.ttest_rel(data_temp[:, comb[0]], data_temp[:, comb[1]]) res.append(p) correct_p.append(p) # res_correct.extend(multipletests(correct_p, method='bonferroni')[1]) write_info(savepath, sheet, res) # write_info(savepath, sheet+'_'+'bonferroni', res_correct) def plot_run_parameter(path, sheets, row, n_microstates, n_runs, conditions): alphabet_string = list(string.ascii_uppercase) column = int(math.ceil(n_microstates / row)) n_conditions = len(conditions) title = ['Microstate' + alphabet_string[i] for i in range(n_microstates)] block_size = n_microstates * n_runs for sheet in sheets: data = np.asarray(read_xlsx(path, sheet))[n_microstates::] for n_condition in range(n_conditions): fig, ax = plt.subplot(row, column) count = 0 for i in range(row): for j in range(column): ax[i][j].errorbar(data[count:count+n_runs], np.mean(data[count:count+n_runs], axis=0), yerr=stats.sem(data, axis=0)) count += n_runs def label_diff(i, j, yoffset, text, y, ax): y = max(y[i], y[j])+ yoffset props = {'arrowstyle': '-', 'linewidth': 2} ax.annotate(text, xy=(i, y+0.003), zorder=1) ax.annotate('', xy=(i, y), xytext=(j, y), arrowprops=props) def plot_block(mean, yerr, conditions, ax, title, ylabe, p_value=None, p_bar=None): n_conditions = len(conditions) if p_value is None: for index, value in enumerate(conditions): ax.errorbar(index, mean[index], yerr[index], fmt='.', marker='.', color='black', capsize=5, capthick=2) else: for index, value in enumerate(conditions): if index == 0: ax.errorbar(index, mean[index], yerr[index], fmt='.', marker='.', color='black', capsize=5, capthick=2) else: p = float(p_value[index-1]) if p > 0.05: ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='.', color='black', capsize=5, capthick=2) if 0.01 <= p <= 0.05: ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='*', mfc='red', mec='red', color='black', capsize=5, capthick=2) if 0.005 <= p < 0.01: ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='*', mfc='green', mec='green', color='black', capsize=5, capthick=2) if p < 0.005: ax.errorbar(index, mean[index], yerr=yerr[index], fmt='.', marker='*', mfc='blue', mec='blue', color='black', capsize=5, capthick=2) for i in range(len(conditions)-1, len(p_value)): p = float(p_value[i]) if p < 0.05: p_str = "p=" + format(p, '.3f') label_diff(p_bar[i][0], p_bar[i][1], p_bar[i][2], p_str, mean+yerr, ax) ax.set_title(title) ax.set_xticks(list(range(n_conditions))) ax.set_xticklabels(conditions) ax.set_ylabel(ylabe) ax.set_ylim(ymax=max(mean+yerr) + 0.01) ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f')) def plot_condition_paramter(path, savepath, sheets, row, n_microstates, conditions, ylabe, path_t_test=None, p_value_bar=None): alphabet_string = list(string.ascii_uppercase) column = int(math.ceil(n_microstates/row)) n_conditions = len(conditions) n_cr = int(nCr(n_conditions,2)) row_num = column * n_conditions n_conditions = len(conditions) title = ['Microstate'+ alphabet_string[i] for i in range(n_microstates)] for sheet in sheets: data = np.asarray(read_xlsx(path, sheet)) p_value = np.asarray(read_xlsx(path_t_test, sheet))[:,-1] if path_t_test else None fig, ax = plt.subplots(row, column, figsize=(14,8)) for i in range(row): for j in range(column): if column*i+j == n_microstates: ax[i][j].axis('off') break plot_block(data[:, row_num*i+j*n_conditions:row_num*i+(j+1)*n_conditions], conditions, ax[i][j], title[column*i+j], ylabe[sheet], p_value[n_cr*(column*i+j):n_cr*(column*i+j+1)], p_value_bar) plt.subplots_adjust(hspace=0.4, wspace=0.5) plt.savefig(savepath+"//"+sheet+".png", dpi=1200) plt.show() def reorder_run_parameter(path, savepath, sheets, n_microstates, n_runs, n_conditions, order): for sheet in sheets: data = np.asarray(read_xlsx(path, sheet)) data_ordered = np.zeros((data.shape[0], data.shape[1])) data_ordered[:, 0:n_microstates] = data[:, np.asarray(order)] block_size = n_microstates * n_runs for i in range(n_conditions): index = [] for j in order: index.extend(list(range(j*n_runs+n_microstates+i*block_size,(j+1)*n_runs+n_microstates+i*block_size))) data_ordered[:,i*block_size+n_microstates:(i+1)*block_size+n_microstates] = data[:,index] write_info(savepath, sheet, data_ordered) def reorder_condition_parameter(path, savepath, sheets, n_conditions, order): for sheet in sheets: data = np.asarray(read_xlsx(path, sheet)) data_ordered = np.zeros((data.shape[0], data.shape[1])) for i, i_value in enumerate(order): data_ordered[:, i * n_conditions:(i + 1) * n_conditions] = data[:, i_value * n_conditions:(i_value + 1) * n_conditions] write_info(savepath, sheet, data_ordered.tolist()) def formulate_run_parameter(path, save_path, sheets, n_microstates, n_conditions, n_runs): block_size = n_microstates * n_runs for sheet in sheets: res = [] data = read_xlsx(path, sheet) for row in data: temp_res = row[0:n_microstates] np_data = np.asarray(row[n_microstates::]) for n_condition in range(n_conditions): index = [] for j in range(n_microstates): for i in range(n_condition*block_size, (n_condition+1)*block_size, n_microstates): index.append(i+j) temp_res.extend(np_data[index]) res.append(temp_res) write_info(save_path, sheet, res) def formulate_condition_parameter(path, save_path, sheets, n_microstates, n_conditions, n_runs): for sheet in sheets: res = [] data = read_xlsx(path, sheet) for row in data: temp = [] temp_res = [] for i in range(n_microstates, len(row), n_runs): temp.append(np.mean(row[i:i+n_runs])) for i in range(n_microstates): temp_res.append(row[i]) for j in range(n_conditions): temp_res.append(temp[i + j*n_microstates]) res.append(temp_res) write_info(save_path, sheet, res) def batch_test_homogeneity_within_condition_within_subject(data, comb, order, n_microstate): res = {} multi_res = [] pool = Pool(8) for combination in comb: data_merged = [] block_size = [] for item in combination: data_merged.extend(data[item]) block_size.append(len(data[item])) multi_res.append( pool.apply_async(test_homogeneity, ([data_merged, n_microstate, block_size, order, len(combination)],))) pool.close() pool.join() for i in range(len(multi_res)): temp = '*'.join(comb[i]) res[temp] = multi_res[i].get() return res def batch_test_homogeneity_within_task_across_subject(comb, task, order, n_microstate, subject_path): res = {} multi_res = [] pool = Pool(8) for combination in comb: data_merged = [] block_size = [] for item in combination: data_temp = load_data(subject_path + "\\" + item + "_1_30_microstate_labels.json")[task] data_merged.extend(data_temp) block_size.append(len(data_temp)) multi_res.append( pool.apply_async(test_homogeneity, ([data_merged, n_microstate, block_size, order, len(combination)],))) pool.close() pool.join() for i in range(len(multi_res)): temp = '*'.join(comb[i]) res[temp] = multi_res[i].get() return res ## updated by Mengting Zhao on March 24 2022 for CTA anlaysis ## the function is ready for use def batch_surrogate_aif(joint_p, condition_p, n_markov, n_microstate, n_surrogate, n_lag, n_repetition): res = [] multi_res = [] # pool = Pool(7) for i in range(n_repetition): multi_res.append(surrogate_aif([joint_p, condition_p, n_markov, n_microstate, n_surrogate, n_lag])) # multi_res.append( # pool.apply_async(surrogate_aif, ([joint_p, condition_p, n_markov, n_microstate, n_surrogate, n_lag],)) # ) # pool.close() # pool.join() for i in range(len(multi_res)): # res.append(multi_res[i].get().tolist()) res.append(multi_res[i].tolist()) return res def surrogate_aif(para): j_p = para[0] c_p = para[1] n_markov = para[2] n_microstate = para[3] n_surrogate = para[4] n_lag = para[5] surrogate_data = MicrostateMarkov.surrogate_markov_chain(j_p, c_p, n_markov, n_microstate, n_surrogate) lrd = MicrostateLongRangeDependence(surrogate_data, n_microstate) return lrd.mutual_information(n_lag) ### updated by Mengting Zhao for CTA analysis on March 2 ### function ready for use on CTA data analysis def batch_calculate_aif(data_path, tasks, n_microstate, lag, window_size, window_step, method='aif'): nb_trials = len(tasks) res = OrderedDict() multi_res = [] if window_size == -1: # pool = Pool(11) for q in range(nb_trials): task = tasks[q] # res[task] = [] data = loadmat(data_path + "\\" + path_name[q] + "\\" + task + "_seq.mat")['EEG'].flatten() multi_res.append(aif([data, n_microstate, lag, method])) # multi_res.append(pool.apply_async(aif, ([data, n_microstate, lag, method],))) for i in range(len(multi_res)): # res[tasks[i]].append(multi_res[i].get().tolist()) res[tasks[i]].append(multi_res[i].tolist()) return res def aif(para): ## change between AIF and partial-AIF with the fourth parameter ## the key function for AIF computation is mutual_information( ) lrd = MicrostateLongRangeDependence(para[0], para[1]) if para[3] == 'aif': res_aif = lrd.mutual_information(para[2]) elif para[3] == 'paif': res_aif = lrd.partial_mutual_information(para[2]) return res_aif ## updated by Mengting Zhao on March 24 2022 for CTA analysis ## function ready for use on preprocessed CTA data def batch_calculate_dfa(data, n_microstate, n_partitions, segment_range, segment_density): res = [] # res = OrderedDict() multi_res = [] # pool = Pool(11) lrd = MicrostateLongRangeDependence(data, n_microstate) #### !!! It is necessary to look into the details of embed_random_walk( ) embeding = lrd.embed_random_walk(n_partitions) for embed_sequence_index, embed_sequence in embeding.items(): # v_test = dfa([embed_sequence, embed_sequence_index, segment_range, segment_density]) multi_res.append(dfa([embed_sequence, embed_sequence_index, segment_range, segment_density])) # multi_res.append( # pool.apply_async(dfa, ([embed_sequence, embed_sequence_index, segment_range, segment_density],)) # ) # pool.close() # pool.join() for i in range(len(multi_res)): # res.append(multi_res[i].get()) res.append(multi_res[i]) return res def dfa(para): embed_sequence = para[0] embed_sequence_index = para[1] segment_range = para[2] segment_density = para[3] res = {} res[embed_sequence_index] = MicrostateLongRangeDependence.dfa(embed_sequence, segment_range, segment_density) return res def test_homogeneity(para): sequence = MicrostateMarkov(para[0], para[1]) p = sequence.test_conditional_homogeneity(para[2], para[3], para[4]) return p def conditions_tasks(conditions, tasks): res = {} for condition in conditions: res[condition] = [] for task in tasks: if task.startswith(condition): res[condition].append(task) return res def read_TLX_ratings(input_fname, sheet_name): # try_path = r'F:\FlightSim_experiment_2021\Segmentation_NRC\4. Subjective Ratings\Inlook_rating_zhao.xlsx' ddd = read_xlsx(input_fname, sheet_name, row=True) # # nb_trials = 22 # lis_score = np.zeros(nb_trials) # id = 0 res_score = [] for row in ddd[1: :]: data_temp = row[1:7] score = np.sum(data_temp)/6 # lis_score[id] = score # id += 1 res_score.append(score) return res_score if __name__ == '__main__': n_k = 7 n_chs = 64 data_dir = r'F:\FlightSim_experiment_2021\Concordia_preprocessed' n_conditions = 3 condition_name = ['training', 'practiceA', 'practiceB'] subjects = ['P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'P07', 'P08', 'P09', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17', 'P18', 'P19', 'P20', 'P21', 'P22', 'P23', 'P24'] path_name = ['trial_1', 'trial_2', 'trial_3', 'trial_4', 'trial_5', 'trial_6', 'trial_7', 'trial_8', 'trial_9', 'trial_10', 'trial_11', 'trial_12', 'trial_13', 'trial_14', 'trial_15', 'trial_16', 'trial_17', 'trial_18', 'trial_19', 'trial_20', 'trial_21', 'trial_22'] lis_name = ['trial_1.set', 'trial_2.set', 'trial_3.set', 'trial_4.set', 'trial_5.set', 'trial_6.set', 'trial_7.set', 'trial_8.set', 'trial_9.set', 'trial_10.set', 'trial_11.set', 'trial_12.set', 'trial_13.set', 'trial_14.set', 'trial_15.set', 'trial_16.set', 'trial_17.set', 'trial_18.set', 'trial_19.set', 'trial_20.set', 'trial_21.set', 'trial_22.set'] tasks = ['training_1', 'training_2', 'training_3', 'training_4', 'training_5', 'training_6', 'training_7', 'practiceA_1', 'practiceA_2', 'practiceA_3', 'practiceA_4', 'practiceA_5', 'practiceA_6', 'practiceA_7', 'practiceA_8', 'practiceB_1', 'practiceB_2', 'practiceB_3', 'practiceB_4', 'practiceB_5', 'practiceB_6', 'practiceB_7'] nb_trials = len(path_name) ################the computation of temporal parameters is independant from the AIF, DFA, and entropy rate analysis################# ################ calculate temporal parameters ##### tested with success on reordered sequences### save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters' save_path_excel = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters\condition_parameters.xlsx' f = 250 f_m = 1000 / f n_k = 7 res = {} res_xlsx = {'duration':[], 'frequency':[], 'coverage':[]} for subject in subjects: print(subject) subject_path = data_dir + "\\" + subject + "\\" + r'clean_data_matlab' + "\\" + r'data' res[subject] = {} # data = load_data(subject_path + "\\" + subject + "_microstate_labels.json") temp = [[],[],[]] for q in range(nb_trials): task = tasks[q] # res[task] = [] data = loadmat(subject_path + "\\" + path_name[q] + "\\" + task + "_seq.mat")['EEG'].flatten() sequence = MicrostateParameter(data, n_k) duration = sequence.calculate_duration() #during each iteration, the size of duration is 7 frequency = sequence.calculate_frequency(f) #during each iteration, the size of frequency is 7 coverage = sequence.calculate_coverage() #during each iteration, the size of coverage is 7 duration = [i*f_m for i in duration] coverage = [i*100 for i in coverage] res[subject][task] = {'duration': duration, 'frequency': frequency, 'coverage': coverage} temp[0].extend(duration) temp[1].extend(frequency) temp[2].extend(coverage) res_xlsx['duration'].append([i*f_m for i in temp[0]]) res_xlsx['frequency'].append(temp[1]) res_xlsx['coverage'].append([i*100 for i in temp[2]]) print('tested with success') #################formulate task-wise and condition-wise paramters ########################### ### tested with success on microstate sequences############### read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters\parameters_subjects.json' save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters' n_k = 7 paras = ['duration', 'frequency', 'coverage'] for pa in paras: res = np.zeros((len(condition_name), n_k*len(subjects))) for index, condition in enumerate(condition_name): temp_res = [] for task in tasks: temp = [] for subject in subjects: if task.startswith(condition): data = load_data(read_path_json)[subject][task][pa] temp.extend(data) if temp: temp_res.append(temp) np_data = np.asarray(temp_res) res[index, :] = np_data.mean(axis=0) print('finish') #################formulate t-test data files########################## read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters' save_path_json = pjoin(read_path_json, r't-test') # results saved under the folder "t-test" paras = ['duration', 'frequency', 'coverage'] n_k = 7 length = n_k * len(subjects) m_index = [] # to save the index for each eegmap: e.g. 'A' corresponds to [0, 7, 14, ...], 'B' corresponds to [1, 8, 15, ...] for m in range(n_k): temp = [] for n in range(m, length, n_k): temp.append(n) m_index.append(temp) for pa in paras: # for condition in condition_name: # read_path = read_path_json + '\\' + condition + '_' + pa + '.mat' read_path = read_path_json + '\\' + 'condition' + '_' + pa + '.mat' # change betwwen 'condition' and condition (within the condition loop) data = loadmat(read_path)['EEG'] for index, value in enumerate(m_index): # save_path = save_path_json + "\\" + condition + '_' + pa + '_m' + str(index) + '.mat' save_path = save_path_json + "\\" + 'condition' + '_' + pa + '_m' + str(index) + '.mat' savemat(save_path, {'EEG':data[:, value]}) print('print') #################conduct t-test on the formulated data ########################## ### tested with success on microstate sequences ########################### read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\parameters\t-test' save_path_json = pjoin(read_path_json, r'results') n_k = 7 paras = ['duration', 'frequency', 'coverage'] alpha = 0.95 sample_size = len(subjects) for pa in paras: for m in range(n_k): res = [] res_t = [] res_interval = [[],[]] save_path_t = save_path_json + "\\" + 'condition' + '_' + pa + "_mt" + str(m) + '.mat' save_path_c = save_path_json + "\\" + 'condition' + '_' + pa + "_mc" + str(m) + '.mat' read_path = read_path_json + "\\" + 'condition' + '_' + pa + '_m' + str(m) + '.mat' data = loadmat(read_path)['EEG'] for comb in itertools.combinations([i for i in range(data.shape[0])], 2): p = stats.ttest_rel(data[comb[0], :], data[comb[1], :]) res.append(p[1]) res_t.append(p[0]) diff = data[comb[0], :] - data[comb[1], :] diff_mean = np.mean(diff) diff_std = stats.sem(diff) ci = stats.t.interval(alpha, sample_size - 1, loc=diff_mean, scale=diff_std) res_interval[0].append(ci[0]) res_interval[1].append(ci[1]) savemat(save_path_t, {'EEG': np.asarray(res_t)}) savemat(save_path_c, {'EEG': np.asarray(res_interval)}) print('right') #################calculate temporal parameters and write to Excel spreadsheets ########################## for para in paras: f = 250 f_m = 1000 / f n_microstate = 7 res = {} res_xlsx = {'duration':[], 'frequency':[], 'coverage':[]} tasks = para['tasks'] subject_path = para['subject_path'] save_path_json = para['save_path_json'] save_path_excel = para['save_path_excel'] for subject in subjects: print(subject) res[subject] = {} # data = load_data(subject_path + "\\" + subject + "_microstate_labels.json") temp = [[],[],[]] for task in tasks: data = loadmat(subject_path + "\\" + subject + "\\" + task +"_seq.mat")['EEG'].flatten() sequence = MicrostateParameter(data, n_microstate) duration = sequence.calculate_duration() frequency = sequence.calculate_frequency(f) coverage = sequence.calculate_coverage() duration = [i*f_m for i in duration] coverage = [i*100 for i in coverage] res[subject][task] = {'duration': duration, 'frequency': frequency, 'coverage': coverage} json.dump(res, codecs.open(save_path_json, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True) ########### Part I: entropy rate anlaysis ######################################### for lag in range(4, 7): for subject in subjects: subject_path = data_dir + "\\" + subject print(subject) res = {} for q in range(nb_trials): task = tasks[q] trial_path = subject_path + "\\" + r'clean_data_matlab' + "\\" + r'data' + "\\" + path_name[q] # data_path= pjoin(trial_path, lis_name[q]) # EEG_epochs= mne.io.read_epochs_eeglab(data_path) data_path_save = trial_path + "\\" + tasks[q] + '_seq.mat' data = loadmat(trial_path + "\\" + tasks[q] + "_seq.mat")['EEG'].flatten() lrd = MicrostateLongRangeDependence(data, n_k) entropy_rate, excess_entropy, index = lrd.excess_entropy_rate(lag) res[task] = {'entropy_rate':entropy_rate.tolist(), 'excess_entropy':excess_entropy.tolist(), 'index':index.tolist()} json.dump(res, codecs.open(data_dir + "\\microstates_sequences" + "\\entropy_rate" + "\\history_" + str(lag) + '\\' + subject + ".json", 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True) ################ testing condition's entropy_rate computation on the microstate sequences ########################## read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\entropy_rate\history_6' #using history_6 as mentioned in the paper save_path_json = pjoin(read_path_json, r'condition_acrosstrials') conditions_res = np.zeros((len(condition_name), len(subjects))) for index, condition in enumerate(condition_name): res = [] for task in tasks: temp = [] if task.startswith(condition): for subject in subjects: data = load_data(read_path_json + "\\" + subject + ".json")[task]['entropy_rate'] temp.append(data) if temp: res.append(temp) np_res = np.asarray(res) conditions_res[index, :] = np_res.mean(axis=0) savemat(save_path_json+"\\" + condition + ".mat", {'EEG':np_res}) #the entropy rate for one specific condition savemat(save_path_json+"\\" + 'condition' + ".mat", {'EEG':conditions_res}) # the entropy rates for all conditions ##################### Part III: DFA (detrended fluctuation analysis) computation ####################################### save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA' n_k = 7 n_partitions = 3 # one part has 3, the other part has 4 for subject in subjects: print(subject) subject_path = data_dir + "\\" + subject data_path = subject_path + "\\" + r'clean_data_matlab' + "\\" + r'data' res = {} for q in range(nb_trials): task = tasks[q] data = loadmat(data_path + "\\" + path_name[q] + "\\" + task + "_seq.mat")['EEG'].flatten() segment_range = [2, int(np.log2(len(data)))] segment_density = 0.25 res[task] = batch_calculate_dfa(data, n_k, n_partitions, segment_range, segment_density) json.dump(res, codecs.open(save_path_json+"\\"+subject+"_dfa.json", 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True) print('stop') ################# Section 2: generating Surrogate data ###################### ### tested with success on microstate sequences ######### # # markov probability computation, tested with success #### save_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\markovianity.json' res = {} n_k = 7 for subject in subjects: print(subject) subject_path = data_dir + "\\" + subject data = load_data(subject_path + "\\" + r'microstates' + "\\" + subject + "_microstate_labels.json") res[subject] = {} for task in tasks: res[subject][task] = {} markov = MicrostateMarkov(data[task], n_k) for k in range(1, 6): joint_p, condition_p = markov.transition_matrix_hat(k) res[subject][task][k] = {'joint_p': joint_p.tolist(), 'condition_p': condition_p.tolist()} json.dump(res, codecs.open(save_path_json, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4) print(999) #########################generating surrogate data, tested with success ################################################# n_k = 7 for subject in subjects: print(subject) res = {} subject_path = data_dir + "\\" + subject + "\\" + "microstates" data_path = subject_path + "\\" + subject + "_microstate_labels.json" data = load_data(data_path) save_path = subject_path + "\\" + subject + "_surrogate_microstate_labels.json" # the results are saved under each subject's folder matrix = load_data(r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\markovianity.json') for task in tasks: n_surrogate = len(data[task]) res[task] = {} for k in range(1, 6): condition_p = matrix[subject][task][str(k)]['condition_p'] joint_p = matrix[subject][task][str(k)]['joint_p'] res[task][k] = MicrostateMarkov.surrogate_markov_chain(joint_p, condition_p, k, n_k, n_surrogate).tolist() json.dump(res, codecs.open(save_path, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4) print('check results') #################### DFA realated computation ################################################## ###### tested with success read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA' save_path_json = pjoin(read_path_json, 'formalated_empirical') n_k = 7 n_paritions = 3 comb = combinations([i for i in range(n_k)], n_paritions) for index, item in enumerate(comb): res = np.zeros((len(condition_name), len(subjects))) for c_index, condition in enumerate(condition_name): temp = [] for task in tasks: slope = [] if task.startswith(condition): for subject in subjects: data = load_data(read_path_json + "\\" + subject+"_dfa.json") slope.append(data[task][index][to_string(item)]['slope']) if slope: temp.append(slope) np_temp = np.asarray(temp) res[c_index, :] = np_temp.mean(axis=0) # c_index: 0 (training), 1 (practiceA), 2 (practiceB) savemat(save_path_json+"\\"+condition+"_"+to_string(item)+".mat",{'EEG':np_temp}) # saved the results for one condition (one combination) savemat(save_path_json+"\\"+"condition"+"_"+to_string(item)+".mat",{'EEG':res}) ## saved the results for all conditions (one combination) print(999) ###################plot the across-subjects DFA results: Hurst Exponent on empirical data (group level)################# read_path_excel = para['read_path_excel'] p_read_path_excel = para['p_read_path_excel'] read_path_json = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA\formalated_empirical' save_path_fig = r'F:\FlightSim_experiment_2021\Concordia_preprocessed\microstates_sequences\DFA\figures' c_t = conditions_tasks(condition_name, tasks) n_k = 7 n_condition = len(condition_name) n_paritions = 3 row = 2 column = 5 all_comb = set([i for i in range(n_k)]) comb = [item for item in combinations([i for i in range(n_k)], n_paritions)] label_str = ['_1_10.png', '_11_20.png', '_21_30.png', '_31_35.png'] label_index = [0, 10, 20, 30] for index, index_item in enumerate(label_index): for condition in condition_name: fig, ax = plt.subplots(row, column, figsize=(20, 8), dpi=600) for i in range(row): for j in range(column): if i == 1 and index == len(label_index)-1: ax[i][j].axis('off') continue item = comb[i*column+j+index_item] data = loadmat(read_path_json+"\\"+'condition'+"_"+to_string(item)+".mat")['EEG'] data = np.asarray(read_xlsx(read_path_excel, to_string(item))) p_value = np.asarray(read_xlsx(p_read_path_excel, to_string(item)))[:, 1] title = to_string(item) + " Vs. " + to_string(all_comb - set(item)) if row == 1: plot_block(data.mean(axis=0), stats.sem(data, axis=0), condition_name, ax[j], title, 'H') # plot_block(data.mean(axis=0), stats.sem(data, axis=0), c_t[condition], ax[j], title, 'H') else: # plot_block(data.mean(axis=0), stats.sem(data, axis=0), c_t[condition], ax[i][j], title, 'H') plot_block(data.mean(axis=1), stats.sem(data, axis=1), condition_name, ax[i][j], title, 'H') plt.subplots_adjust(hspace=0.4, wspace=0.5) # plt.savefig(save_path_fig + "\\" + condition + "\\" + condition + "_1_10" + ".png", dpi=600) plt.savefig(save_path_fig + "\\" + 'condition' + label_str[index], dpi=600) # plt.show() plt.clf() plt.close() ss = 8