%% Setup % paths to data eeg_path = fullfile('raw_data', 'eeg-pc', 'pictureword'); beh_path = fullfile('raw_data', 'stim-pc', 'data', 'pictureword'); % import eeglab (assumes eeglab has been added to path), e.g. addpath('C:/EEGLAB/eeglab2020_0') [ALLEEG, EEG, CURRENTSET, ALLCOM] = eeglab; % This script uses fastica algorithm for ICA, so FastICA needs to be on the path, e.g. addpath('C:/EEGLAB/FastICA_25') % region of interest for trial-level ROI average roi = {'TP7', 'CP5', 'P7', 'P5', 'P9', 'PO7', 'PO3', 'O1'}; % cutoff probability for identifying eye and muscle related ICA components with ICLabel icl_cutoff = 0.85; % sigma parameter for ASR asr_sigma = 20; %% Clear output folders delete(fullfile('sample_data_picture', '*.csv')) %% Import lab book % handle commas in vectors lab_book_file = fullfile('raw_data', 'stim-pc', 'participants.csv'); lab_book_raw_dat = fileread(lab_book_file); [regstart, regend] = regexp(lab_book_raw_dat, '\[.*?\]'); for regmatch_i = 1:numel(regstart) str_i = lab_book_raw_dat(regstart(regmatch_i):regend(regmatch_i)); str_i(str_i==',') = '.'; lab_book_raw_dat(regstart(regmatch_i):regend(regmatch_i)) = str_i; end lab_book_fixed_file = fullfile('raw_data', 'stim-pc', 'participants_tmp.csv'); lab_book_fixed_conn = fopen(lab_book_fixed_file, 'w'); fprintf(lab_book_fixed_conn, lab_book_raw_dat); fclose(lab_book_fixed_conn); lab_book_readopts = detectImportOptions(lab_book_fixed_file, 'VariableNamesLine', 1, 'Delimiter', ','); % read subject ids as class character lab_book_readopts.VariableTypes{strcmp(lab_book_readopts.SelectedVariableNames, 'subj_id')} = 'char'; lab_book = readtable(lab_book_fixed_file, lab_book_readopts); delete(lab_book_fixed_file) %% Count the total number of excluded electrodes n_bads = 0; n_bads_per_s = zeros(size(lab_book, 1), 0); for subject_nr = 1:size(lab_book, 1) bad_channels = eval(strrep(strrep(strrep(lab_book.pw_bad_channels{subject_nr}, '[', '{'), ']', '}'), '.', ',')); n_bads_per_s(subject_nr) = numel(bad_channels); n_bads = n_bads + numel(bad_channels); end perc_bads = n_bads / (64 * size(lab_book, 1)) * 100; %% Import max electrode info % this contains participants' maximal electrodes for the N170 from the % localisation task max_elecs = readtable('max_elecs.csv'); %% Iterate over subjects % record trial exclusions total_excl_trials_incorr = zeros(1, size(lab_book, 1)); total_excl_trials_rt = zeros(1, size(lab_book, 1)); n_bad_ica = zeros(size(lab_book, 1), 0); for subject_nr = 1:size(lab_book, 1) subject_id = lab_book.subj_id{subject_nr}; fprintf('\n\n Subject Iteration %g/%g, ID: %s\n', subject_nr, size(lab_book, 1), subject_id) %% get subject-specific info from lab book exclude = lab_book.exclude(subject_nr); bad_channels = eval(strrep(strrep(strrep(lab_book.pw_bad_channels{subject_nr}, '[', '{'), ']', '}'), '.', ',')); bad_trigger_indices = eval(strrep(lab_book.pw_bad_trigger_indices{subject_nr}, '.', ',')); % add PO4 to bad channels, which seems to be consistently noisy, even when not marked as bad if sum(strcmp('PO4', bad_channels))==0 bad_channels(numel(bad_channels)+1) = {'PO4'}; end %% abort if excluded if exclude fprintf('Subject %s excluded. Preprocessing aborted.\n', subject_id) fprintf('Lab book note: %s\n', lab_book.note{subject_nr}) continue end %% load participant's data % load raw eeg raw_datapath = fullfile(eeg_path, append(subject_id, '.bdf')); % abort if no EEG data collected yet if ~isfile(raw_datapath) fprintf('Subject %s skipped: no EEG data found\n', subject_id) continue end EEG = pop_biosig(raw_datapath, 'importevent', 'on', 'rmeventchan', 'off'); % load behavioural all_beh_files = dir(beh_path); beh_regex_matches = regexpi({all_beh_files.name}, append('^', subject_id, '_.+\.csv$'), 'match'); regex_emptymask = cellfun('isempty', beh_regex_matches); beh_regex_matches(regex_emptymask) = []; subj_beh_files = cellfun(@(x) x{:}, beh_regex_matches, 'UniformOutput', false); if size(subj_beh_files)>1 fprintf('%g behavioural files found?\n', size(subj_beh_files)) break end beh_datapath = fullfile(beh_path, subj_beh_files{1}); beh = readtable(beh_datapath); %% Set data features % set channel locations orig_locs = EEG.chanlocs; EEG.chanlocs = pop_chanedit(EEG.chanlocs, 'load', {'BioSemi64.loc', 'filetype', 'loc'}); % doesn't match order for the data % set channel types for ch_nr = 1:64 EEG.chanlocs(ch_nr).type = 'EEG'; end for ch_nr = 65:72 EEG.chanlocs(ch_nr).type = 'EOG'; end for ch_nr = 73:79 EEG.chanlocs(ch_nr).type = 'MISC'; end for ch_nr = 65:79 EEG.chanlocs(ch_nr).theta = []; EEG.chanlocs(ch_nr).radius = []; EEG.chanlocs(ch_nr).sph_theta = []; EEG.chanlocs(ch_nr).sph_phi = []; EEG.chanlocs(ch_nr).X = []; EEG.chanlocs(ch_nr).Y = []; EEG.chanlocs(ch_nr).Z = []; end % change the order of channels in EEG.data to match the new order in chanlocs data_reordered = EEG.data; for ch_nr = 1:64 % make sure the new eeg data array matches the listed order ch_lab = EEG.chanlocs(ch_nr).labels; orig_locs_idx = find(strcmp(lower({orig_locs.labels}), lower(ch_lab))); data_reordered(ch_nr, :) = EEG.data(orig_locs_idx, :); end EEG.data = data_reordered; % remove unused channels EEG = pop_select(EEG, 'nochannel', 69:79); % remove bad channels ur_chanlocs = EEG.chanlocs; % store a copy of the full channel locations before removing (for later interpolation) bad_channels_indices = find(ismember(lower({EEG.chanlocs.labels}), lower(bad_channels))); EEG = pop_select(EEG, 'nochannel', bad_channels_indices); %% Identify events (trials) - getting the picture as the trigger instead of the word % make the sopen function happy x = fileparts( which('sopen') ); rmpath(x); addpath(x,'-begin'); % build the events manually from the raw eeg file (pop_biosig removes event offsets) % NB: this assumes no resampling between reading the BDF file and now bdf_dat = sopen(raw_datapath, 'r', [0, Inf], 'OVERFLOWDETECTION:OFF'); event_types = bdf_dat.BDF.Trigger.TYP; event_pos = bdf_dat.BDF.Trigger.POS; event_time = EEG.times(event_pos); sclose(bdf_dat); clear bdf_dat; triggers = struct(... 'off', 0,... 'A1', 1,... 'A2', 2,... 'practice', 25,... 'image', 99); % add 61440 to each trigger value (because of number of bits in pp) trigger_labels = fieldnames(triggers); for field_nr = 1:numel(trigger_labels) triggers.(trigger_labels{field_nr}) = triggers.(trigger_labels{field_nr}) + 61440; end % remove the first trigger if it is at time 0 and has a value which isn't a recognised trigger if (event_time(1)==0 && ~ismember(event_types(1), [triggers.off, triggers.A1, triggers.A2, triggers.practice, triggers.image])) event_types(1) = []; event_pos(1) = []; event_time(1) = []; end % remove the new first trigger if it has a value of off if (event_types(1)==triggers.off) event_types(1) = []; event_pos(1) = []; event_time(1) = []; end % check every second trigger is an offset offset_locs = find(event_types==triggers.off); if any(offset_locs' ~= 2:2:numel(event_types)) fprintf('Expected each second trigger to be an off?') break end % check every first trigger is non-zero onset_locs = find(event_types~=triggers.off); if any(onset_locs' ~= 1:2:numel(event_types)) fprintf('Expected each first trigger to be an event?') break end % create the events struct manually events_onset_types = event_types(onset_locs); events_onsets = event_pos(onset_locs); events_offsets = event_pos(offset_locs); events_durations = events_offsets - events_onsets; EEG.event = struct(); for event_nr = 1:numel(events_onsets) EEG.event(event_nr).type = events_onset_types(event_nr); EEG.event(event_nr).latency = events_onsets(event_nr); EEG.event(event_nr).offset = events_offsets(event_nr); EEG.event(event_nr).duration = events_durations(event_nr); end % copy the details over to urevent EEG.urevent = EEG.event; % record the urevent for event_nr = 1:numel(events_onsets) EEG.event(event_nr).urevent = event_nr; end % remove bad events recorded in lab book (misfired triggers) EEG = pop_editeventvals(EEG, 'delete', find(ismember([EEG.event.urevent], bad_trigger_indices))); % remove practice trials EEG = pop_editeventvals(EEG, 'delete', find(ismember([EEG.event.type], triggers.practice))); % remove triggers for words EEG = pop_editeventvals(EEG, 'delete', find(ismember([EEG.event.type], [triggers.A1, triggers.A2]))); % remove triggers for all but the last 200 triggers (i.e., remove the practice images) EEG = pop_editeventvals(EEG, 'delete', fliplr( 1:numel([EEG.event.type]) ) > 200); % check the events make sense if sum(~ismember([EEG.event.type], triggers.image)) > 0 fprintf('Unexpected trial types?\n') break end if numel({EEG.event.type})~=200 fprintf('%g trial triggers detected?\n', numel({EEG.event.type})) break end % add the trials' onsets, offsets, durations, and triggers to the behavioural data beh.event = zeros(size(beh, 1), 1); beh.latency = zeros(size(beh, 1), 1); for row_nr = 1:size(beh, 1) cond_i = beh.condition(row_nr); beh.event(row_nr) = triggers.(cond_i{:}); beh.latency(row_nr) = EEG.event(row_nr).latency; beh.offset(row_nr) = EEG.event(row_nr).offset; beh.duration(row_nr) = EEG.event(row_nr).duration; beh.duration_ms(row_nr) = (EEG.event(row_nr).duration * 1000/EEG.srate) - 500; % minus 500 as event timer starts at word presentation, but rt timer starts once word turns green end % record trial numbers in EEG.event for row_nr = 1:size(beh, 1) EEG.event(row_nr).trl_nr = beh.trl_nr(row_nr); end %% Remove segments of data that fall outside of blocks % record block starts beh.is_block_start(1) = 1; for row_nr = 2:size(beh, 1) beh.is_block_start(row_nr) = beh.block_nr(row_nr) - beh.block_nr(row_nr-1) == 1; end % record block ends beh.is_block_end(size(beh, 1)) = 1; for row_nr = 1:(size(beh, 1)-1) beh.is_block_end(row_nr) = beh.block_nr(row_nr+1) - beh.block_nr(row_nr) == 1; end % record block boundaries (first start and last end point of each block, with 1 seconds buffer) beh.block_boundary = zeros(size(beh, 1), 1); for row_nr = 1:size(beh, 1) if beh.is_block_start(row_nr) beh.block_boundary(row_nr) = beh.latency(row_nr) - (EEG.srate * 1); elseif beh.is_block_end(row_nr) beh.block_boundary(row_nr) = beh.offset(row_nr) + (EEG.srate * 1); end end % get the boundary indices in required format (start1, end1; start2, end2; start3, end3) block_boundaries = reshape(beh.block_boundary(beh.block_boundary~=0), 2, [])'; % remove anything outside of blocks EEG = pop_select(EEG, 'time', (block_boundaries / EEG.srate)); %% Trial selection % include only correct responses beh_filt_acc_only = beh(beh.acc==1, :); excl_trials_incorr = size(beh, 1)-size(beh_filt_acc_only, 1); total_excl_trials_incorr(subject_nr) = excl_trials_incorr; fprintf('Lost %g trials to incorrect responses\n', excl_trials_incorr) % include only responses between 100 and 1500 ms beh_filt = beh_filt_acc_only(beh_filt_acc_only.rt<=1500, :); excl_trials_rt = size(beh_filt_acc_only, 1)-size(beh_filt, 1); total_excl_trials_rt(subject_nr) = excl_trials_rt; fprintf('Lost %g trials to RTs above 1500\n', excl_trials_rt) fprintf('Lost %g trials in total to behavioural data\n', size(beh, 1)-size(beh_filt, 1)) % filter the events structure discarded_trls = beh.trl_nr(~ismember(beh.trl_nr, beh_filt.trl_nr)); discarded_events_indices = []; % (collect in a for loop, as [EEG.event.trl_nr] would remove missing data) for event_nr = 1:size(EEG.event, 2) if ismember(EEG.event(event_nr).trl_nr, discarded_trls) discarded_events_indices = [discarded_events_indices, event_nr]; end end EEG = pop_editeventvals(EEG, 'delete', discarded_events_indices); % check the discarded trials are the expected length if numel(discarded_trls) ~= size(beh, 1)-size(beh_filt, 1) fprintf('Mismatch between behavioural data and EEG events in the number of trials to discard?') break end % check the sizes match if numel([EEG.event.trl_nr]) ~= size(beh_filt, 1) fprintf('Inconsistent numbers of trials between events structure and behavioural data after discarding trials?') break end % check the trl numbers match if any([EEG.event.trl_nr]' ~= beh_filt.trl_nr) fprintf('Trial IDs mmismatch between events structure and behavioural data after discarding trials?') break end %% Rereference, downsample, and filter % rereference EEG = pop_reref(EEG, []); % downsample EEG = pop_resample(EEG, 512); % filter % EEG = eeglab_butterworth(EEG, 0.5, 40, 4, 1:size(EEG.chanlocs, 2)); % preregistered filter EEG = eeglab_butterworth(EEG, 0.1, 40, 4, 1:size(EEG.chanlocs, 2)); % filter with lower highpass %% ICA % apply ASR %EEG_no_asr = EEG; %EEG = clean_asr(EEG, asr_sigma, [], [], [], [], [], [], [], [], 1024); % The last number is available memory in mb, needed for reproducibility % ASR is not used in this exploratory analysis rng(3101) % set seed for reproducibility EEG = pop_runica(EEG, 'icatype', 'fastica', 'approach', 'symm'); % classify components with ICLabel EEG = iclabel(EEG); % store results for easy indexing icl_res = EEG.etc.ic_classification.ICLabel.classifications; icl_classes = EEG.etc.ic_classification.ICLabel.classes; % identify and remove artefact components artefact_comps = find(icl_res(:, strcmp(icl_classes, 'Eye')) >= icl_cutoff | icl_res(:, strcmp(icl_classes, 'Muscle')) >= icl_cutoff); fprintf('Removing %g artefact-related ICA components\n', numel(artefact_comps)) n_bad_ica(subject_nr) = numel(artefact_comps); %EEG_no_iclabel = EEG; EEG = pop_subcomp(EEG, artefact_comps); %% Interpolate bad channels % give the original chanlocs structure so EEGLAB interpolates the missing electrode(s) if numel(bad_channels)>0 EEG = pop_interp(EEG, ur_chanlocs); end %% Get sample level microvolts for exploratory analysis checking image ERPs disp('Getting sample-level results...') % resample to 256 Hz EEG_256 = pop_resample(EEG, 256); % get epochs of low-srate data EEG_epo_256 = pop_epoch(EEG_256, {triggers.image}, [-0.25, 1.8]); % remove baseline EEG_epo_256 = pop_rmbase(EEG_epo_256, [-200, 0]); % pre-allocate the table var_names = {'subj_id', 'stim_grp', 'resp_grp', 'item_nr', 'ch_name', 'time', 'uV'}; var_types = {'string', 'string', 'string', 'double', 'string', 'double', 'double'}; nrows = 64 * size(EEG_epo_256.times, 2) * size(beh_filt, 1); sample_res = table('Size',[nrows, numel(var_names)], 'VariableTypes',var_types, 'VariableNames',var_names); sample_res.subj_id = repmat(beh_filt.subj_id, 64*size(EEG_epo_256.times, 2), 1); sample_res.stim_grp = repmat(beh_filt.stim_grp, 64*size(EEG_epo_256.times, 2), 1); sample_res.resp_grp = repmat(beh_filt.resp_grp, 64*size(EEG_epo_256.times, 2), 1); % get the 64 channel eeg data as an array eeg_arr = EEG_epo_256.data(1:64, :, :); % a vector of all eeg data eeg_vec = squeeze(reshape(eeg_arr, 1, 1, [])); % array and vector of the channel labels for each value in EEG.data channel_labels_arr = cell(size(eeg_arr)); channel_label_lookup = {EEG_epo_256.chanlocs.labels}; for chan_nr = 1:size(eeg_arr, 1) channel_labels_arr(chan_nr, :, :) = repmat(channel_label_lookup(chan_nr), size(channel_labels_arr, 2), size(channel_labels_arr, 3)); end channel_labels_vec = squeeze(reshape(channel_labels_arr, 1, 1, [])); % array and vector of the item numbers for each value in EEG.data times_arr = zeros(size(eeg_arr)); times_lookup = EEG_epo_256.times; for time_idx = 1:size(eeg_arr, 2) times_arr(:, time_idx, :) = repmat(times_lookup(time_idx), size(times_arr, 1), size(times_arr, 3)); end times_vec = squeeze(reshape(times_arr, 1, 1, [])); % array and vector of the trial numbers trials_arr = zeros(size(eeg_arr)); trials_lookup = beh_filt.item_nr; for trl_idx = 1:size(eeg_arr, 3) trials_arr(:, :, trl_idx) = repmat(trials_lookup(trl_idx), size(trials_arr, 1), size(trials_arr, 2)); end trials_vec = squeeze(reshape(trials_arr, 1, 1, [])); % store sample-level results in the table sample_res.ch_name = channel_labels_vec; sample_res.item_nr = trials_vec; sample_res.time = times_vec; sample_res.uV = eeg_vec; % look up and store some info about the trials trial_info_lookup = beh_filt(:, {'item_nr', 'condition', 'image', 'string'}); sample_res = outerjoin(sample_res, trial_info_lookup, 'MergeKeys', true); % sort by time, channel, item_nr sample_res = sortrows(sample_res, {'time', 'ch_name', 'item_nr'}); %% save the results disp('Saving results...') writetable(sample_res, fullfile('sample_data_picture', [subject_id, '.csv'])); end fprintf('\nFinished preprocessing picture-word data!\n') %% Functions % custom function for applying a Butterworth filter to EEGLAB data function EEG = eeglab_butterworth(EEG, low, high, order, chanind) fprintf('Applying Butterworth filter between %g and %g Hz (order of %g)\n', low, high, order) % create filter [b, a] = butter(order, [low, high]/(EEG.srate/2)); % apply to data (requires transposition for filtfilt) data_trans = single(filtfilt(b, a, double(EEG.data(chanind, :)'))); EEG.data(chanind, :) = data_trans'; end % custom function for finding the closest timepoint in an EEG dataset function [idx, closesttime] = eeglab_closest_time(EEG, time) dists = abs(EEG.times - time); idx = find(dists == min(dists)); % in the unlikely case there are two equidistant times, select one randomly if numel(idx) > 1 fprintf('Two equidistant times! Selecting one randomly.') idx = idx(randperm(numel(idx))); idx = idx(1); end closesttime = EEG.times(idx); end