123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516 |
- %% Setup
- % paths to data
- eeg_path = fullfile('raw_data', 'eeg-pc', 'pictureword');
- beh_path = fullfile('raw_data', 'stim-pc', 'data', 'pictureword');
- % import eeglab (assumes eeglab has been added to path), e.g.
- addpath('C:/EEGLAB/eeglab2020_0')
- [ALLEEG, EEG, CURRENTSET, ALLCOM] = eeglab;
- % This script uses fastica algorithm for ICA, so FastICA needs to be on the path, e.g.
- addpath('C:/EEGLAB/FastICA_25')
- % region of interest for trial-level ROI average
- roi = {'TP7', 'CP5', 'P7', 'P5', 'P9', 'PO7', 'PO3', 'O1'};
- % cutoff probability for identifying eye and muscle related ICA components with ICLabel
- icl_cutoff = 0.85;
- % sigma parameter for ASR
- asr_sigma = 20;
- %% Clear output folders
- delete(fullfile('sample_data_picture', '*.csv'))
- %% Import lab book
- % handle commas in vectors
- lab_book_file = fullfile('raw_data', 'stim-pc', 'participants.csv');
- lab_book_raw_dat = fileread(lab_book_file);
- [regstart, regend] = regexp(lab_book_raw_dat, '\[.*?\]');
- for regmatch_i = 1:numel(regstart)
- str_i = lab_book_raw_dat(regstart(regmatch_i):regend(regmatch_i));
- str_i(str_i==',') = '.';
- lab_book_raw_dat(regstart(regmatch_i):regend(regmatch_i)) = str_i;
- end
- lab_book_fixed_file = fullfile('raw_data', 'stim-pc', 'participants_tmp.csv');
- lab_book_fixed_conn = fopen(lab_book_fixed_file, 'w');
- fprintf(lab_book_fixed_conn, lab_book_raw_dat);
- fclose(lab_book_fixed_conn);
- lab_book_readopts = detectImportOptions(lab_book_fixed_file, 'VariableNamesLine', 1, 'Delimiter', ',');
- % read subject ids as class character
- lab_book_readopts.VariableTypes{strcmp(lab_book_readopts.SelectedVariableNames, 'subj_id')} = 'char';
- lab_book = readtable(lab_book_fixed_file, lab_book_readopts);
- delete(lab_book_fixed_file)
- %% Count the total number of excluded electrodes
- n_bads = 0;
- n_bads_per_s = zeros(size(lab_book, 1), 0);
- for subject_nr = 1:size(lab_book, 1)
- bad_channels = eval(strrep(strrep(strrep(lab_book.pw_bad_channels{subject_nr}, '[', '{'), ']', '}'), '.', ','));
- n_bads_per_s(subject_nr) = numel(bad_channels);
- n_bads = n_bads + numel(bad_channels);
- end
- perc_bads = n_bads / (64 * size(lab_book, 1)) * 100;
- %% Import max electrode info
- % this contains participants' maximal electrodes for the N170 from the
- % localisation task
- max_elecs = readtable('max_elecs.csv');
- %% Iterate over subjects
- % record trial exclusions
- total_excl_trials_incorr = zeros(1, size(lab_book, 1));
- total_excl_trials_rt = zeros(1, size(lab_book, 1));
- n_bad_ica = zeros(size(lab_book, 1), 0);
- for subject_nr = 1:size(lab_book, 1)
-
- subject_id = lab_book.subj_id{subject_nr};
- fprintf('\n\n Subject Iteration %g/%g, ID: %s\n', subject_nr, size(lab_book, 1), subject_id)
-
- %% get subject-specific info from lab book
- exclude = lab_book.exclude(subject_nr);
- bad_channels = eval(strrep(strrep(strrep(lab_book.pw_bad_channels{subject_nr}, '[', '{'), ']', '}'), '.', ','));
- bad_trigger_indices = eval(strrep(lab_book.pw_bad_trigger_indices{subject_nr}, '.', ','));
-
- % add PO4 to bad channels, which seems to be consistently noisy, even when not marked as bad
- if sum(strcmp('PO4', bad_channels))==0
- bad_channels(numel(bad_channels)+1) = {'PO4'};
- end
- %% abort if excluded
-
- if exclude
- fprintf('Subject %s excluded. Preprocessing aborted.\n', subject_id)
- fprintf('Lab book note: %s\n', lab_book.note{subject_nr})
- continue
- end
-
- %% load participant's data
-
- % load raw eeg
- raw_datapath = fullfile(eeg_path, append(subject_id, '.bdf'));
-
- % abort if no EEG data collected yet
- if ~isfile(raw_datapath)
- fprintf('Subject %s skipped: no EEG data found\n', subject_id)
- continue
- end
-
- EEG = pop_biosig(raw_datapath, 'importevent', 'on', 'rmeventchan', 'off');
-
- % load behavioural
- all_beh_files = dir(beh_path);
- beh_regex_matches = regexpi({all_beh_files.name}, append('^', subject_id, '_.+\.csv$'), 'match');
- regex_emptymask = cellfun('isempty', beh_regex_matches);
- beh_regex_matches(regex_emptymask) = [];
- subj_beh_files = cellfun(@(x) x{:}, beh_regex_matches, 'UniformOutput', false);
-
- if size(subj_beh_files)>1
- fprintf('%g behavioural files found?\n', size(subj_beh_files))
- break
- end
-
- beh_datapath = fullfile(beh_path, subj_beh_files{1});
- beh = readtable(beh_datapath);
-
- %% Set data features
-
- % set channel locations
-
- orig_locs = EEG.chanlocs;
- EEG.chanlocs = pop_chanedit(EEG.chanlocs, 'load', {'BioSemi64.loc', 'filetype', 'loc'}); % doesn't match order for the data
-
- % set channel types
- for ch_nr = 1:64
- EEG.chanlocs(ch_nr).type = 'EEG';
- end
-
- for ch_nr = 65:72
- EEG.chanlocs(ch_nr).type = 'EOG';
- end
-
- for ch_nr = 73:79
- EEG.chanlocs(ch_nr).type = 'MISC';
- end
-
- for ch_nr = 65:79
- EEG.chanlocs(ch_nr).theta = [];
- EEG.chanlocs(ch_nr).radius = [];
- EEG.chanlocs(ch_nr).sph_theta = [];
- EEG.chanlocs(ch_nr).sph_phi = [];
- EEG.chanlocs(ch_nr).X = [];
- EEG.chanlocs(ch_nr).Y = [];
- EEG.chanlocs(ch_nr).Z = [];
- end
-
- % change the order of channels in EEG.data to match the new order in chanlocs
- data_reordered = EEG.data;
- for ch_nr = 1:64
- % make sure the new eeg data array matches the listed order
- ch_lab = EEG.chanlocs(ch_nr).labels;
- orig_locs_idx = find(strcmp(lower({orig_locs.labels}), lower(ch_lab)));
- data_reordered(ch_nr, :) = EEG.data(orig_locs_idx, :);
- end
- EEG.data = data_reordered;
-
- % remove unused channels
- EEG = pop_select(EEG, 'nochannel', 69:79);
-
- % remove bad channels
- ur_chanlocs = EEG.chanlocs; % store a copy of the full channel locations before removing (for later interpolation)
- bad_channels_indices = find(ismember(lower({EEG.chanlocs.labels}), lower(bad_channels)));
- EEG = pop_select(EEG, 'nochannel', bad_channels_indices);
-
- %% Identify events (trials) - getting the picture as the trigger instead of the word
-
- % make the sopen function happy
- x = fileparts( which('sopen') );
- rmpath(x);
- addpath(x,'-begin');
-
- % build the events manually from the raw eeg file (pop_biosig removes event offsets)
- % NB: this assumes no resampling between reading the BDF file and now
- bdf_dat = sopen(raw_datapath, 'r', [0, Inf], 'OVERFLOWDETECTION:OFF');
- event_types = bdf_dat.BDF.Trigger.TYP;
- event_pos = bdf_dat.BDF.Trigger.POS;
- event_time = EEG.times(event_pos);
- sclose(bdf_dat);
- clear bdf_dat;
-
- triggers = struct(...
- 'off', 0,...
- 'A1', 1,...
- 'A2', 2,...
- 'practice', 25,...
- 'image', 99);
-
- % add 61440 to each trigger value (because of number of bits in pp)
- trigger_labels = fieldnames(triggers);
- for field_nr = 1:numel(trigger_labels)
- triggers.(trigger_labels{field_nr}) = triggers.(trigger_labels{field_nr}) + 61440;
- end
-
- % remove the first trigger if it is at time 0 and has a value which isn't a recognised trigger
- if (event_time(1)==0 && ~ismember(event_types(1), [triggers.off, triggers.A1, triggers.A2, triggers.practice, triggers.image]))
- event_types(1) = [];
- event_pos(1) = [];
- event_time(1) = [];
- end
-
- % remove the new first trigger if it has a value of off
- if (event_types(1)==triggers.off)
- event_types(1) = [];
- event_pos(1) = [];
- event_time(1) = [];
- end
-
- % check every second trigger is an offset
- offset_locs = find(event_types==triggers.off);
- if any(offset_locs' ~= 2:2:numel(event_types))
- fprintf('Expected each second trigger to be an off?')
- break
- end
-
- % check every first trigger is non-zero
- onset_locs = find(event_types~=triggers.off);
- if any(onset_locs' ~= 1:2:numel(event_types))
- fprintf('Expected each first trigger to be an event?')
- break
- end
-
- % create the events struct manually
- events_onset_types = event_types(onset_locs);
- events_onsets = event_pos(onset_locs);
- events_offsets = event_pos(offset_locs);
- events_durations = events_offsets - events_onsets;
-
- EEG.event = struct();
- for event_nr = 1:numel(events_onsets)
- EEG.event(event_nr).type = events_onset_types(event_nr);
- EEG.event(event_nr).latency = events_onsets(event_nr);
- EEG.event(event_nr).offset = events_offsets(event_nr);
- EEG.event(event_nr).duration = events_durations(event_nr);
- end
-
- % copy the details over to urevent
- EEG.urevent = EEG.event;
-
- % record the urevent
- for event_nr = 1:numel(events_onsets)
- EEG.event(event_nr).urevent = event_nr;
- end
-
- % remove bad events recorded in lab book (misfired triggers)
- EEG = pop_editeventvals(EEG, 'delete', find(ismember([EEG.event.urevent], bad_trigger_indices)));
-
- % remove practice trials
- EEG = pop_editeventvals(EEG, 'delete', find(ismember([EEG.event.type], triggers.practice)));
-
- % remove triggers for words
- EEG = pop_editeventvals(EEG, 'delete', find(ismember([EEG.event.type], [triggers.A1, triggers.A2])));
- % remove triggers for all but the last 200 triggers (i.e., remove the practice images)
- EEG = pop_editeventvals(EEG, 'delete', fliplr( 1:numel([EEG.event.type]) ) > 200);
-
- % check the events make sense
- if sum(~ismember([EEG.event.type], triggers.image)) > 0
- fprintf('Unexpected trial types?\n')
- break
- end
-
- if numel({EEG.event.type})~=200
- fprintf('%g trial triggers detected?\n', numel({EEG.event.type}))
- break
- end
-
- % add the trials' onsets, offsets, durations, and triggers to the behavioural data
- beh.event = zeros(size(beh, 1), 1);
- beh.latency = zeros(size(beh, 1), 1);
- for row_nr = 1:size(beh, 1)
- cond_i = beh.condition(row_nr);
- beh.event(row_nr) = triggers.(cond_i{:});
- beh.latency(row_nr) = EEG.event(row_nr).latency;
- beh.offset(row_nr) = EEG.event(row_nr).offset;
- beh.duration(row_nr) = EEG.event(row_nr).duration;
- beh.duration_ms(row_nr) = (EEG.event(row_nr).duration * 1000/EEG.srate) - 500; % minus 500 as event timer starts at word presentation, but rt timer starts once word turns green
- end
-
- % record trial numbers in EEG.event
- for row_nr = 1:size(beh, 1)
- EEG.event(row_nr).trl_nr = beh.trl_nr(row_nr);
- end
-
- %% Remove segments of data that fall outside of blocks
-
- % record block starts
- beh.is_block_start(1) = 1;
- for row_nr = 2:size(beh, 1)
- beh.is_block_start(row_nr) = beh.block_nr(row_nr) - beh.block_nr(row_nr-1) == 1;
- end
- % record block ends
- beh.is_block_end(size(beh, 1)) = 1;
- for row_nr = 1:(size(beh, 1)-1)
- beh.is_block_end(row_nr) = beh.block_nr(row_nr+1) - beh.block_nr(row_nr) == 1;
- end
- % record block boundaries (first start and last end point of each block, with 1 seconds buffer)
- beh.block_boundary = zeros(size(beh, 1), 1);
- for row_nr = 1:size(beh, 1)
- if beh.is_block_start(row_nr)
- beh.block_boundary(row_nr) = beh.latency(row_nr) - (EEG.srate * 1);
- elseif beh.is_block_end(row_nr)
- beh.block_boundary(row_nr) = beh.offset(row_nr) + (EEG.srate * 1);
- end
- end
-
- % get the boundary indices in required format (start1, end1; start2, end2; start3, end3)
- block_boundaries = reshape(beh.block_boundary(beh.block_boundary~=0), 2, [])';
-
- % remove anything outside of blocks
- EEG = pop_select(EEG, 'time', (block_boundaries / EEG.srate));
-
- %% Trial selection
-
- % include only correct responses
- beh_filt_acc_only = beh(beh.acc==1, :);
- excl_trials_incorr = size(beh, 1)-size(beh_filt_acc_only, 1);
- total_excl_trials_incorr(subject_nr) = excl_trials_incorr;
- fprintf('Lost %g trials to incorrect responses\n', excl_trials_incorr)
-
- % include only responses between 100 and 1500 ms
- beh_filt = beh_filt_acc_only(beh_filt_acc_only.rt<=1500, :);
- excl_trials_rt = size(beh_filt_acc_only, 1)-size(beh_filt, 1);
- total_excl_trials_rt(subject_nr) = excl_trials_rt;
- fprintf('Lost %g trials to RTs above 1500\n', excl_trials_rt)
-
- fprintf('Lost %g trials in total to behavioural data\n', size(beh, 1)-size(beh_filt, 1))
-
- % filter the events structure
- discarded_trls = beh.trl_nr(~ismember(beh.trl_nr, beh_filt.trl_nr));
- discarded_events_indices = []; % (collect in a for loop, as [EEG.event.trl_nr] would remove missing data)
- for event_nr = 1:size(EEG.event, 2)
- if ismember(EEG.event(event_nr).trl_nr, discarded_trls)
- discarded_events_indices = [discarded_events_indices, event_nr];
- end
- end
- EEG = pop_editeventvals(EEG, 'delete', discarded_events_indices);
-
- % check the discarded trials are the expected length
- if numel(discarded_trls) ~= size(beh, 1)-size(beh_filt, 1)
- fprintf('Mismatch between behavioural data and EEG events in the number of trials to discard?')
- break
- end
-
- % check the sizes match
- if numel([EEG.event.trl_nr]) ~= size(beh_filt, 1)
- fprintf('Inconsistent numbers of trials between events structure and behavioural data after discarding trials?')
- break
- end
-
- % check the trl numbers match
- if any([EEG.event.trl_nr]' ~= beh_filt.trl_nr)
- fprintf('Trial IDs mmismatch between events structure and behavioural data after discarding trials?')
- break
- end
-
- %% Rereference, downsample, and filter
-
- % rereference
- EEG = pop_reref(EEG, []);
-
- % downsample
- EEG = pop_resample(EEG, 512);
-
- % filter
- % EEG = eeglab_butterworth(EEG, 0.5, 40, 4, 1:size(EEG.chanlocs, 2)); % preregistered filter
- EEG = eeglab_butterworth(EEG, 0.1, 40, 4, 1:size(EEG.chanlocs, 2)); % filter with lower highpass
-
- %% ICA
-
- % apply ASR
- %EEG_no_asr = EEG;
- %EEG = clean_asr(EEG, asr_sigma, [], [], [], [], [], [], [], [], 1024); % The last number is available memory in mb, needed for reproducibility
-
- % ASR is not used in this exploratory analysis
- rng(3101) % set seed for reproducibility
- EEG = pop_runica(EEG, 'icatype', 'fastica', 'approach', 'symm');
- % classify components with ICLabel
- EEG = iclabel(EEG);
- % store results for easy indexing
- icl_res = EEG.etc.ic_classification.ICLabel.classifications;
- icl_classes = EEG.etc.ic_classification.ICLabel.classes;
-
- % identify and remove artefact components
- artefact_comps = find(icl_res(:, strcmp(icl_classes, 'Eye')) >= icl_cutoff | icl_res(:, strcmp(icl_classes, 'Muscle')) >= icl_cutoff);
- fprintf('Removing %g artefact-related ICA components\n', numel(artefact_comps))
- n_bad_ica(subject_nr) = numel(artefact_comps);
- %EEG_no_iclabel = EEG;
- EEG = pop_subcomp(EEG, artefact_comps);
-
- %% Interpolate bad channels
-
- % give the original chanlocs structure so EEGLAB interpolates the missing electrode(s)
- if numel(bad_channels)>0
- EEG = pop_interp(EEG, ur_chanlocs);
- end
-
- %% Get sample level microvolts for exploratory analysis checking image ERPs
-
- disp('Getting sample-level results...')
-
- % resample to 256 Hz
- EEG_256 = pop_resample(EEG, 256);
-
- % get epochs of low-srate data
- EEG_epo_256 = pop_epoch(EEG_256, {triggers.image}, [-0.25, 1.8]);
-
- % remove baseline
- EEG_epo_256 = pop_rmbase(EEG_epo_256, [-200, 0]);
-
- % pre-allocate the table
- var_names = {'subj_id', 'stim_grp', 'resp_grp', 'item_nr', 'ch_name', 'time', 'uV'};
- var_types = {'string', 'string', 'string', 'double', 'string', 'double', 'double'};
- nrows = 64 * size(EEG_epo_256.times, 2) * size(beh_filt, 1);
- sample_res = table('Size',[nrows, numel(var_names)], 'VariableTypes',var_types, 'VariableNames',var_names);
-
- sample_res.subj_id = repmat(beh_filt.subj_id, 64*size(EEG_epo_256.times, 2), 1);
- sample_res.stim_grp = repmat(beh_filt.stim_grp, 64*size(EEG_epo_256.times, 2), 1);
- sample_res.resp_grp = repmat(beh_filt.resp_grp, 64*size(EEG_epo_256.times, 2), 1);
-
- % get the 64 channel eeg data as an array
- eeg_arr = EEG_epo_256.data(1:64, :, :);
-
- % a vector of all eeg data
- eeg_vec = squeeze(reshape(eeg_arr, 1, 1, []));
-
- % array and vector of the channel labels for each value in EEG.data
- channel_labels_arr = cell(size(eeg_arr));
- channel_label_lookup = {EEG_epo_256.chanlocs.labels};
- for chan_nr = 1:size(eeg_arr, 1)
- channel_labels_arr(chan_nr, :, :) = repmat(channel_label_lookup(chan_nr), size(channel_labels_arr, 2), size(channel_labels_arr, 3));
- end
-
- channel_labels_vec = squeeze(reshape(channel_labels_arr, 1, 1, []));
-
- % array and vector of the item numbers for each value in EEG.data
- times_arr = zeros(size(eeg_arr));
- times_lookup = EEG_epo_256.times;
- for time_idx = 1:size(eeg_arr, 2)
- times_arr(:, time_idx, :) = repmat(times_lookup(time_idx), size(times_arr, 1), size(times_arr, 3));
- end
-
- times_vec = squeeze(reshape(times_arr, 1, 1, []));
-
- % array and vector of the trial numbers
- trials_arr = zeros(size(eeg_arr));
- trials_lookup = beh_filt.item_nr;
- for trl_idx = 1:size(eeg_arr, 3)
- trials_arr(:, :, trl_idx) = repmat(trials_lookup(trl_idx), size(trials_arr, 1), size(trials_arr, 2));
- end
-
- trials_vec = squeeze(reshape(trials_arr, 1, 1, []));
-
- % store sample-level results in the table
- sample_res.ch_name = channel_labels_vec;
- sample_res.item_nr = trials_vec;
- sample_res.time = times_vec;
- sample_res.uV = eeg_vec;
-
- % look up and store some info about the trials
- trial_info_lookup = beh_filt(:, {'item_nr', 'condition', 'image', 'string'});
- sample_res = outerjoin(sample_res, trial_info_lookup, 'MergeKeys', true);
-
- % sort by time, channel, item_nr
- sample_res = sortrows(sample_res, {'time', 'ch_name', 'item_nr'});
-
- %% save the results
-
- disp('Saving results...')
- writetable(sample_res, fullfile('sample_data_picture', [subject_id, '.csv']));
-
- end
- fprintf('\nFinished preprocessing picture-word data!\n')
- %% Functions
- % custom function for applying a Butterworth filter to EEGLAB data
- function EEG = eeglab_butterworth(EEG, low, high, order, chanind)
- fprintf('Applying Butterworth filter between %g and %g Hz (order of %g)\n', low, high, order)
- % create filter
- [b, a] = butter(order, [low, high]/(EEG.srate/2));
- % apply to data (requires transposition for filtfilt)
- data_trans = single(filtfilt(b, a, double(EEG.data(chanind, :)')));
- EEG.data(chanind, :) = data_trans';
- end
- % custom function for finding the closest timepoint in an EEG dataset
- function [idx, closesttime] = eeglab_closest_time(EEG, time)
- dists = abs(EEG.times - time);
- idx = find(dists == min(dists));
- % in the unlikely case there are two equidistant times, select one randomly
- if numel(idx) > 1
- fprintf('Two equidistant times! Selecting one randomly.')
- idx = idx(randperm(numel(idx)));
- idx = idx(1);
- end
- closesttime = EEG.times(idx);
- end
|