123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- function clean_datasets(files)
- %% Convert double recording files to a more convenient structure
- % If not found in the original dataset a appropriate time vector and a
- % stimulus vector are added. Only trials are copied to the new datasets
- % that contain recordings of 30 seconds in length, that were obtained with
- % the standard stimulus protocol and that are not on the black list with
- % trials showing unphysiological behaviour.
- % -------------------------------------------------------------------------
- % Input
- % -----
- % files number(s) of the files to convert
- %
- % ------
- % Output
- % ------
- % standardized files, data is stored in a structure named 'RecordingData'
- % recordingT a matrix containing the recorded data of all trials
- % (columns) for the T cell
- % recordingINT a matrix containing the recorded data of all trials
- % (columns) for the interneuron
- % stimulus the standard stimulation protocol
- % timeVector corresponding time vector for stimulus and the
- % recordings
- % stimulatedT a vector giving all trials in which the T cell was
- % stimulated
- %
- % All files are stored in '../CleanDatasets'
- % -------------------------------------------------------------------------
- % Program by: Bjarne Schultze [last modified: 02.03.2022]
- % -------------------------------------------------------------------------
- %% Preperations
- % obtain the current directory
- old_cd = cd;
- % change current directory
- cd("../DoubleRecordings/")
- % load a list which contains information about which data belongs to a
- % T cell and which to an interneuron
- load("../AdditionalFiles/lineInfo.mat", 'lineInfo')
- % load the standard stimulus
- load("../AdditionalFiles/standard_stimulus.mat",'standard_stimulus')
- % load a file giving single trials to sort out
- load("../AdditionalFiles/blackListTrials.mat", 'blackListTrials')
- % get a list of '.mat' files in the current folder
- mat_directory = dir('*.mat');
- %% Data gathering end export
- % set length of recording
- rec_len = 300000;
- % iterate through the requested files
- for file = files
- % load the requested file and print the file name to the command window
- load(mat_directory(file).name)
- fprintf("Loaded file: %s\n", mat_directory(file).name);
-
- %-- Stimulus data
- % searching for the stimulus data, which can either missing or be
- % stored in one of four 'stimulus' columns
- if isequal(length(experiment(1).result.stimulus),rec_len)
- % searching for the column where the stimulus data is stored
- stim_search = any(experiment(1).result.stimulus);
- stim_col = find(stim_search);
- % selecting the right data
- stimulus = experiment(1).result.stimulus(:,stim_col);
- else
- % if the stimulus data is missing, a standard stimulus is used
- stimulus = standard_stimulus;
- fprintf("Notice: Because stimulus data for [%0.f, %0.f] is " + ...
- "missing, the standard stimulus is used.\n", file, 1);
- end
- %-- Time vector
- if isequal(length(experiment(1).result.timeVector),rec_len)
- % if a time vector is available in the data set it is used
- timeVector = experiment(1).result.timeVector;
- else
- % if the data set doesn't contain a time vector, it is generated
- % out of the number of items and the sampling rate
- num_items = rec_len;
- samp_rate = 10000;
- indices = 1:num_items;
- timeVector = indices/samp_rate;
- timeVector = timeVector';
- end
- %-- Recorded data
- h = 0; search_index = [];
- % find the two out of four columns with the data
- while length(search_index) < 2
- h = h+1;
- search_index = ...
- find(mean(experiment(h).result.recording(1:10,:)) < -1);
- end
-
- % set a counter for the number of columns needed to store the data
- % of all experiments and all trials
- counter = 0;
- % count up the trials, including those stored in a 3D matrix
- for exp = 1:(length(experiment))
- [rows, ~, zcolumns] = size(experiment(exp).result.recording);
- % searching for the column where the stimulus data is stored
- stim_search = any(experiment(exp).result.stimulus);
- stim_col = find(stim_search);
- % the test stimulus recordings will be neglegted
- if isequal(rows, rec_len) && ...
- isequal(experiment(exp).result.stimulus(:,stim_col), ...
- standard_stimulus) || ...
- isempty(experiment(exp).result.stimulus)
- % sum up the trials
- counter = counter + zcolumns;
- end
- end
-
- % pre-define matrix for the recording data
- recordingT = zeros(rec_len, counter);
- recordingINT = zeros(rec_len, counter);
-
- % counter used to iterate through the columns in the new matrices
- write_col = 0;
-
- % take into account on which line the T cell data is stored
- if isequal(lineInfo.tLine(file), 1)
- lineT = search_index(1);
- lineINT = search_index(2);
- else
- lineT = search_index(2);
- lineINT = search_index(1);
- end
-
- % gather recording data
- for exp = 1:(length(experiment))
- % get the size of the original matrix with the recording data
- [rows, ~, zcolumns] = size(experiment(exp).result.recording);
-
- % searching for the column where the stimulus data is stored
- stim_search = any(experiment(exp).result.stimulus);
- stim_col = find(stim_search);
- if isequal(rows, rec_len) && ...
- isequal(experiment(exp).result.stimulus(:,stim_col), ...
- standard_stimulus) || ...
- isempty(experiment(exp).result.stimulus)
-
- % discriminate between 2D and 3D matrices
- if isequal(zcolumns, 1) && isequal(rows, rec_len)
- write_col = write_col + 1;
- recordingT(:,write_col) = ... % gather the data of a 2D matrix
- experiment(exp).result.recording(:,lineT,zcolumns);
- recordingINT(:,write_col) = ...
- experiment(exp).result.recording(:,lineINT,zcolumns);
- elseif isequal(rows, rec_len)
- % iterate through the 3D matrix and gather all data
- for j = 0:(zcolumns-1)
- zcol = j+1;
- write_col = write_col+1;
- recordingT(:,write_col) = ...
- experiment(exp).result.recording(:,lineT,zcol);
- recordingINT(:,write_col) = ...
- experiment(exp).result.recording(:,lineINT,zcol);
- end
- end
- end
- end
-
- % check whether everything went right and there is data in each column
- if sum(any(recordingT)) ~= counter
- fprintf("Something went wrong! Please check the result for file %0.f!\n",...
- file_num);
- end
- %-- Adding information about which cell is stimulated
- stimulatedT = lineInfo{file,"tStim"}{1};
- % remove trials with bad quality or strange behaviour
- badTrials = blackListTrials{file,"badTrials"}{1};
- if ~isempty(badTrials)
- % remove the corresponding columns of the recording data
- recordingT(:,badTrials) = [];
- recordingINT(:,badTrials) = [];
- % remove the numbers of the bad trials in the 'stimulatedT' list
- for i = 1:length(badTrials)
- index = find(eq(stimulatedT,badTrials(i)));
- if ~isempty(index)
- stimulatedT(index) = [];
- end
- end
- % adapt the other numbers in the 'stimulatedT' list
- for j = length(badTrials):-1:1
- stimulatedT(stimulatedT > badTrials(j)) = ...
- stimulatedT(stimulatedT > badTrials(j)) - 1;
- end
- end
-
- %-- Exporting data
- % create a structure containing the necessary data
- RecordingData = struct("recordingT", recordingT, ...
- "recordingINT", recordingINT,...
- "stimulus", stimulus,...
- "timeVector", timeVector,...
- "stimulatedT", stimulatedT);
- % set the filename + path
- filename = sprintf('../CleanDatasets/[%02.f]%s.mat',...
- file, experiment(1).result.experimentName);
- % save the structure as a file
- save(filename, 'RecordingData');
- % feedback message for the user
- fprintf("Done! Saved new version of file %0.f. \n", file);
- % end of outer for-loop (file iteration)
- end
- % change directory back to the location from before the function call
- cd(old_cd);
- % end of function definition
- end
|