clean_datasets.m 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. function clean_datasets(files)
  2. %% Convert double recording files to a more convenient structure
  3. % If not found in the original dataset a appropriate time vector and a
  4. % stimulus vector are added. Only trials are copied to the new datasets
  5. % that contain recordings of 30 seconds in length, that were obtained with
  6. % the standard stimulus protocol and that are not on the black list with
  7. % trials showing unphysiological behaviour.
  8. % -------------------------------------------------------------------------
  9. % Input
  10. % -----
  11. % files number(s) of the files to convert
  12. %
  13. % ------
  14. % Output
  15. % ------
  16. % standardized files, data is stored in a structure named 'RecordingData'
  17. % recordingT a matrix containing the recorded data of all trials
  18. % (columns) for the T cell
  19. % recordingINT a matrix containing the recorded data of all trials
  20. % (columns) for the interneuron
  21. % stimulus the standard stimulation protocol
  22. % timeVector corresponding time vector for stimulus and the
  23. % recordings
  24. % stimulatedT a vector giving all trials in which the T cell was
  25. % stimulated
  26. %
  27. % All files are stored in '../CleanDatasets'
  28. % -------------------------------------------------------------------------
  29. % Program by: Bjarne Schultze [last modified: 02.03.2022]
  30. % -------------------------------------------------------------------------
  31. %% Preperations
  32. % obtain the current directory
  33. old_cd = cd;
  34. % change current directory
  35. cd("../DoubleRecordings/")
  36. % load a list which contains information about which data belongs to a
  37. % T cell and which to an interneuron
  38. load("../AdditionalFiles/lineInfo.mat", 'lineInfo')
  39. % load the standard stimulus
  40. load("../AdditionalFiles/standard_stimulus.mat",'standard_stimulus')
  41. % load a file giving single trials to sort out
  42. load("../AdditionalFiles/blackListTrials.mat", 'blackListTrials')
  43. % get a list of '.mat' files in the current folder
  44. mat_directory = dir('*.mat');
  45. %% Data gathering end export
  46. % set length of recording
  47. rec_len = 300000;
  48. % iterate through the requested files
  49. for file = files
  50. % load the requested file and print the file name to the command window
  51. load(mat_directory(file).name)
  52. fprintf("Loaded file: %s\n", mat_directory(file).name);
  53. %-- Stimulus data
  54. % searching for the stimulus data, which can either missing or be
  55. % stored in one of four 'stimulus' columns
  56. if isequal(length(experiment(1).result.stimulus),rec_len)
  57. % searching for the column where the stimulus data is stored
  58. stim_search = any(experiment(1).result.stimulus);
  59. stim_col = find(stim_search);
  60. % selecting the right data
  61. stimulus = experiment(1).result.stimulus(:,stim_col);
  62. else
  63. % if the stimulus data is missing, a standard stimulus is used
  64. stimulus = standard_stimulus;
  65. fprintf("Notice: Because stimulus data for [%0.f, %0.f] is " + ...
  66. "missing, the standard stimulus is used.\n", file, 1);
  67. end
  68. %-- Time vector
  69. if isequal(length(experiment(1).result.timeVector),rec_len)
  70. % if a time vector is available in the data set it is used
  71. timeVector = experiment(1).result.timeVector;
  72. else
  73. % if the data set doesn't contain a time vector, it is generated
  74. % out of the number of items and the sampling rate
  75. num_items = rec_len;
  76. samp_rate = 10000;
  77. indices = 1:num_items;
  78. timeVector = indices/samp_rate;
  79. timeVector = timeVector';
  80. end
  81. %-- Recorded data
  82. h = 0; search_index = [];
  83. % find the two out of four columns with the data
  84. while length(search_index) < 2
  85. h = h+1;
  86. search_index = ...
  87. find(mean(experiment(h).result.recording(1:10,:)) < -1);
  88. end
  89. % set a counter for the number of columns needed to store the data
  90. % of all experiments and all trials
  91. counter = 0;
  92. % count up the trials, including those stored in a 3D matrix
  93. for exp = 1:(length(experiment))
  94. [rows, ~, zcolumns] = size(experiment(exp).result.recording);
  95. % searching for the column where the stimulus data is stored
  96. stim_search = any(experiment(exp).result.stimulus);
  97. stim_col = find(stim_search);
  98. % the test stimulus recordings will be neglegted
  99. if isequal(rows, rec_len) && ...
  100. isequal(experiment(exp).result.stimulus(:,stim_col), ...
  101. standard_stimulus) || ...
  102. isempty(experiment(exp).result.stimulus)
  103. % sum up the trials
  104. counter = counter + zcolumns;
  105. end
  106. end
  107. % pre-define matrix for the recording data
  108. recordingT = zeros(rec_len, counter);
  109. recordingINT = zeros(rec_len, counter);
  110. % counter used to iterate through the columns in the new matrices
  111. write_col = 0;
  112. % take into account on which line the T cell data is stored
  113. if isequal(lineInfo.tLine(file), 1)
  114. lineT = search_index(1);
  115. lineINT = search_index(2);
  116. else
  117. lineT = search_index(2);
  118. lineINT = search_index(1);
  119. end
  120. % gather recording data
  121. for exp = 1:(length(experiment))
  122. % get the size of the original matrix with the recording data
  123. [rows, ~, zcolumns] = size(experiment(exp).result.recording);
  124. % searching for the column where the stimulus data is stored
  125. stim_search = any(experiment(exp).result.stimulus);
  126. stim_col = find(stim_search);
  127. if isequal(rows, rec_len) && ...
  128. isequal(experiment(exp).result.stimulus(:,stim_col), ...
  129. standard_stimulus) || ...
  130. isempty(experiment(exp).result.stimulus)
  131. % discriminate between 2D and 3D matrices
  132. if isequal(zcolumns, 1) && isequal(rows, rec_len)
  133. write_col = write_col + 1;
  134. recordingT(:,write_col) = ... % gather the data of a 2D matrix
  135. experiment(exp).result.recording(:,lineT,zcolumns);
  136. recordingINT(:,write_col) = ...
  137. experiment(exp).result.recording(:,lineINT,zcolumns);
  138. elseif isequal(rows, rec_len)
  139. % iterate through the 3D matrix and gather all data
  140. for j = 0:(zcolumns-1)
  141. zcol = j+1;
  142. write_col = write_col+1;
  143. recordingT(:,write_col) = ...
  144. experiment(exp).result.recording(:,lineT,zcol);
  145. recordingINT(:,write_col) = ...
  146. experiment(exp).result.recording(:,lineINT,zcol);
  147. end
  148. end
  149. end
  150. end
  151. % check whether everything went right and there is data in each column
  152. if sum(any(recordingT)) ~= counter
  153. fprintf("Something went wrong! Please check the result for file %0.f!\n",...
  154. file_num);
  155. end
  156. %-- Adding information about which cell is stimulated
  157. stimulatedT = lineInfo{file,"tStim"}{1};
  158. % remove trials with bad quality or strange behaviour
  159. badTrials = blackListTrials{file,"badTrials"}{1};
  160. if ~isempty(badTrials)
  161. % remove the corresponding columns of the recording data
  162. recordingT(:,badTrials) = [];
  163. recordingINT(:,badTrials) = [];
  164. % remove the numbers of the bad trials in the 'stimulatedT' list
  165. for i = 1:length(badTrials)
  166. index = find(eq(stimulatedT,badTrials(i)));
  167. if ~isempty(index)
  168. stimulatedT(index) = [];
  169. end
  170. end
  171. % adapt the other numbers in the 'stimulatedT' list
  172. for j = length(badTrials):-1:1
  173. stimulatedT(stimulatedT > badTrials(j)) = ...
  174. stimulatedT(stimulatedT > badTrials(j)) - 1;
  175. end
  176. end
  177. %-- Exporting data
  178. % create a structure containing the necessary data
  179. RecordingData = struct("recordingT", recordingT, ...
  180. "recordingINT", recordingINT,...
  181. "stimulus", stimulus,...
  182. "timeVector", timeVector,...
  183. "stimulatedT", stimulatedT);
  184. % set the filename + path
  185. filename = sprintf('../CleanDatasets/[%02.f]%s.mat',...
  186. file, experiment(1).result.experimentName);
  187. % save the structure as a file
  188. save(filename, 'RecordingData');
  189. % feedback message for the user
  190. fprintf("Done! Saved new version of file %0.f. \n", file);
  191. % end of outer for-loop (file iteration)
  192. end
  193. % change directory back to the location from before the function call
  194. cd(old_cd);
  195. % end of function definition
  196. end