convert2nwb.m 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741
  1. % Convert to NWB
  2. %
  3. % Run this script to convert derived data generated by the Brain-wide
  4. % infra-slow dynamics study at UoL to the Neurodata Without Borders (NWB)
  5. % file format.
  6. %
  7. % The script works by loading general, animal, and recording session
  8. % metadata from nwbParams, nwbAnimalParams, nwbSessionParams, respectively.
  9. % It then locates the derived data MAT files for each animal and converts
  10. % them into derived data NWB files dividing the data per recording session.
  11. % The derived data include spiking, waveform, pupil area size fluctuation,
  12. % and total facial movement data.
  13. %
  14. % The conversion pipeline depends on the specific structure of derived data
  15. % MAT files used in this study. The way the pipeline is organised is
  16. % dictated by the fact that the conversion procedure was adopted late in
  17. % the study. Ideally NWB file format should be adopted early in the study.
  18. %
  19. % You can use this pipeline to get an idea of how to convert your own
  20. % ecephys data to the NWB file format to store spiking and behavioural
  21. % data combined with some metadata.
  22. cleanUp
  23. % Load general NWB parameters
  24. nwbParams
  25. % Load animal-specific parameters
  26. nwbAnimalParams
  27. % Load session-specific parameters
  28. nwbSessionParams
  29. % Generate Matlab classes from NWB core schema files
  30. generateCore;
  31. % Generate NWB files for every recording session
  32. if ~exist(animalDerivedDataFolderNWB, 'file')
  33. mkdir(animalDerivedDataFolderNWB) % Folder to store animal's converted NWB data
  34. end
  35. derivedData = animalDerivedDataFile;
  36. for iSess = 1:numel(sessionID)
  37. % Assign NWB file fields
  38. nwb = NwbFile( ...
  39. 'session_description', sessionDescription{iSess},...
  40. 'identifier', [animalID '_' sessionID{iSess}], ...
  41. 'session_start_time', sessionStartTime{iSess}, ...
  42. 'general_experimenter', experimenter, ... % optional
  43. 'general_session_id', sessionID{iSess}, ... % optional
  44. 'general_institution', institution, ... % optional
  45. 'general_related_publications', publications, ... % optional
  46. 'general_notes', sessionNotes{iSess}, ... % optional
  47. 'general_lab', lab); % optional
  48. % Create subject object
  49. subject = types.core.Subject( ...
  50. 'subject_id', animalID, ...
  51. 'age', age, ...
  52. 'description', description, ...
  53. 'species', species, ...
  54. 'sex', sex);
  55. nwb.general_subject = subject;
  56. % Create electrode tables: Info about each recording channel
  57. input.iElectrode = 1;
  58. input.electrodeDescription = electrodeDescription{iSess};
  59. input.electrodeManufacturer = electrodeManufacturer{iSess};
  60. input.nShanks = nShanks{iSess};
  61. input.nChannelsPerShank = nChannelsPerShank{iSess};
  62. input.electrodeLocation = electrodeLocation{iSess};
  63. input.electrodeCoordinates = electrodeCoordinates{iSess};
  64. input.sessionID = sessionID{iSess};
  65. input.electrodeLabel = electrodeLabel{iSess};
  66. if probeInserted{iSess}{input.iElectrode} && ~isempty(endCh{iSess}{1})
  67. tbl1 = createElectrodeTable(nwb, input);
  68. else
  69. tbl1 = [];
  70. end
  71. input.iElectrode = 2;
  72. if probeInserted{iSess}{input.iElectrode} && ~isempty(endCh{iSess}{2})
  73. tbl2 = createElectrodeTable(nwb, input);
  74. else
  75. tbl2 = [];
  76. end
  77. tbl = [tbl1; tbl2];
  78. electrode_table = util.table2nwb(tbl, 'all electrodes');
  79. nwb.general_extracellular_ephys_electrodes = electrode_table;
  80. % Load spike times from the MAT file
  81. [spikes, metadata, derivedData] = getSpikes(derivedData, animalID, sessionID{iSess}, tbl);
  82. [spike_times_vector, spike_times_index] = util.create_indexed_column(spikes);
  83. spike_times_vector.description = 'Session spike times';
  84. spike_times_index.description = 'Indices dividing spike times into units';
  85. % Load and reshape unit waveforms
  86. waveformsFile1 = [electrodeFolder{iSess}{1} filesep 'waveforms.mat'];
  87. if exist(waveformsFile1, 'file')
  88. waveformsProbe1 = load(waveformsFile1);
  89. else
  90. waveformsProbe1 = [];
  91. waveformsProbe1.maxWaveforms = [];
  92. end
  93. if probeInserted{iSess}{1} && ~isempty(spikes)
  94. [waveformMat1, waveformVecGrp1, waveformVec1, waveformMeans1, nWaveformSamples1] = reshapeWaveforms(waveformsProbe1, 1, metadata, nCh{iSess}{1});
  95. else
  96. waveformMat1 = []; waveformVecGrp1 = {}; waveformVec1 = {}; waveformMeans1 = {}; nWaveformSamples1 = 0;
  97. end
  98. waveformsFile2 = [electrodeFolder{iSess}{2} filesep 'waveforms.mat'];
  99. if exist(waveformsFile2, 'file')
  100. waveformsProbe2 = load(waveformsFile2);
  101. else
  102. waveformsProbe2 = [];
  103. waveformsProbe2.maxWaveforms = [];
  104. end
  105. if probeInserted{iSess}{2} && ~isempty(spikes)
  106. [waveformMat2, waveformVecGrp2, waveformVec2, waveformMeans2, nWaveformSamples2] = reshapeWaveforms(waveformsProbe2, 2, metadata, nCh{iSess}{2});
  107. else
  108. waveformMat2 = []; waveformVecGrp2 = {}; waveformVec2 = {}; waveformMeans2 = {}; nWaveformSamples2 = 0;
  109. end
  110. maxWaveforms = [waveformsProbe1.maxWaveforms; waveformsProbe2.maxWaveforms];
  111. waveformMat = [waveformMat1; waveformMat2];
  112. waveformVec = [waveformVec1; waveformVec2];
  113. waveformMeans = [waveformMeans1; waveformMeans2];
  114. nWaveformSamples = max([nWaveformSamples1 nWaveformSamples2]);
  115. for iWave = 1:numel(waveformMeans)
  116. if isempty(waveformMeans{iWave})
  117. waveformMeans{iWave} = nan(1,nWaveformSamples);
  118. end
  119. end
  120. % Create waveform indices
  121. % These indices are not used as our waveform array has a different form and meaning than the one used in the NWB file.
  122. % We only store mean waveforms on maximum amplitude channels.
  123. % More on ragged array indexing used in NWB files see https://nwb-schema.readthedocs.io/en/latest/format_description.html
  124. [waveforms, waveformIndex] = util.create_indexed_column(waveformVec');
  125. waveformIndexIndex = {};
  126. if ~isempty(waveformVec1)
  127. waveformIndexIndex1 = reshape(waveformIndex.data(1:size(waveformVec1,1)),nCh{iSess}{1},size(waveformVec1,1)/nCh{iSess}{1})';
  128. for iUnit = 1:size(waveformVecGrp1,1)
  129. waveformIndexIndex = [waveformIndexIndex; waveformIndexIndex1(iUnit,:)];
  130. end
  131. end
  132. if ~isempty(waveformVec2)
  133. waveformIndexIndex2 = reshape(waveformIndex.data(size(waveformVec1,1)+1:end),nCh{iSess}{2},size(waveformVec2,1)/nCh{iSess}{2})';
  134. for iUnit = 1:size(waveformVecGrp2,1)
  135. waveformIndexIndex = [waveformIndexIndex; waveformIndexIndex2(iUnit,:)];
  136. end
  137. end
  138. [inds, waveformIndexIndex] = util.create_indexed_column(waveformIndexIndex');
  139. % Store spiking and waveform data inside the nwb object
  140. % see https://neurodatawithoutborders.github.io/matnwb/doc/+types/+core/Units.html
  141. if ~isempty(spikes)
  142. nwb.units = types.core.Units( ...
  143. 'colnames', {'cluster_id','local_cluster_id','type',...
  144. 'peak_channel_index','peak_channel_id',... % Provide the column order. All column names have to be defined below
  145. 'local_peak_channel_id','rel_horz_pos','rel_vert_pos',...
  146. 'isi_violations','isolation_distance','area','probe_id',...
  147. 'spike_times','spike_times_index'}, ...
  148. 'description', 'Units table', ...
  149. 'id', types.hdmf_common.ElementIdentifiers( ...
  150. 'data', int64(0:length(spikes) - 1)), ...
  151. 'cluster_id', types.hdmf_common.VectorData( ...
  152. 'data', cell2mat(metadata{:,1}), ...
  153. 'description', 'Unique cluster id'), ...
  154. 'local_cluster_id', types.hdmf_common.VectorData( ...
  155. 'data', cell2mat(metadata{:,2}), ...
  156. 'description', 'Local cluster id on the probe'), ...
  157. 'type', types.hdmf_common.VectorData( ...
  158. 'data', metadata{:,3}, ...
  159. 'description', 'Cluster type: unit vs mua'), ...
  160. 'peak_channel_index', types.hdmf_common.VectorData( ...
  161. 'data', cell2mat(metadata{:,4}), ...
  162. 'description', 'Peak channel row index in the electrode table'), ...
  163. 'peak_channel_id', types.hdmf_common.VectorData( ...
  164. 'data', cell2mat(metadata{:,5}), ...
  165. 'description', 'Unique ID of the channel with the largest cluster waveform amplitude'), ...
  166. 'local_peak_channel_id', types.hdmf_common.VectorData( ...
  167. 'data', cell2mat(metadata{:,6}), ...
  168. 'description', 'Local probe channel with the largest cluster waveform amplitude'), ...
  169. 'rel_horz_pos', types.hdmf_common.VectorData( ...
  170. 'data', cell2mat(metadata{:,7})./1000, ...
  171. 'description', 'Probe-relative horizontal position in mm'), ...
  172. 'rel_vert_pos', types.hdmf_common.VectorData( ...
  173. 'data', cell2mat(metadata{:,8})./1000, ...
  174. 'description', 'Probe tip-relative vertical position in mm'), ...
  175. 'isi_violations', types.hdmf_common.VectorData( ...
  176. 'data', cell2mat(metadata{:,9}), ...
  177. 'description', 'Interstimulus interval violations (unit quality measure)'), ...
  178. 'isolation_distance', types.hdmf_common.VectorData( ...
  179. 'data', cell2mat(metadata{:,10}), ...
  180. 'description', 'Cluster isolation distance (unit quality measure)'), ...
  181. 'area', types.hdmf_common.VectorData( ...
  182. 'data', metadata{:,11}, ...
  183. 'description', ['Brain area where the unit is located. Internal thalamic' ...
  184. 'nuclei divisions are not precise, because they are derived from unit locations on the probe.']), ...
  185. 'probe_id', types.hdmf_common.VectorData( ...
  186. 'data', metadata{:,12}, ...
  187. 'description', 'Probe id where the unit is located'), ...
  188. 'spike_times', spike_times_vector, ...
  189. 'spike_times_index', spike_times_index, ...
  190. 'electrode_group', types.hdmf_common.VectorData( ...
  191. 'data', metadata{:,13}, ...
  192. 'description', 'Recording channel groups'), ...
  193. 'electrodes', types.hdmf_common.DynamicTableRegion('table', ...
  194. types.untyped.ObjectView('/general/extracellular_ephys/electrodes'), ...
  195. 'description', 'Probe recording channels', ...
  196. 'data', cell2mat(table2array(metadata(:,1)))), ...
  197. 'waveform_mean', types.hdmf_common.VectorData( ...
  198. 'data', cell2mat(waveformMeans), ...
  199. 'description', ['Mean waveforms on the probe channel with the largest waveform amplitude.' ...
  200. 'MUA waveforms are excluded. The order that waveforms are stored match the order of units in the unit table.']) ...
  201. );
  202. end
  203. % Add behavioural data: Pupil area size
  204. % see https://neurodatawithoutborders.github.io/matnwb/doc/+types/+core/TimeSeries.html
  205. % and https://neurodatawithoutborders.github.io/matnwb/doc/+types/+core/PupilTracking.html
  206. if isfield(derivedData.dataStruct, 'eyeData') && isfield(derivedData.dataStruct.eyeData, [animalID '_s' sessionID{iSess}])
  207. acceptablePeriod = derivedData.dataStruct.eyeData.([animalID '_s' sessionID{iSess}]).period; % Acceptable quality range in seconds
  208. videoFrameTimes = derivedData.dataStruct.eyeData.([animalID '_s' sessionID{iSess}]).frameTimes; % seconds
  209. acceptableSamples = markQualitySamples(acceptablePeriod, videoFrameTimes);
  210. pupilAreaSize = derivedData.dataStruct.eyeData.([animalID '_s' sessionID{iSess}]).pupilArea; % pixels^2
  211. pupilAreaSize = types.core.TimeSeries( ...
  212. 'data', pupilAreaSize, ...
  213. 'timestamps', videoFrameTimes, ...
  214. 'data_unit', 'pixels^2', ...
  215. 'starting_time_rate', videoFrameRate,...
  216. 'control', uint8(acceptableSamples),...
  217. 'control_description', {'low quality samples that should be excluded from analyses';...
  218. 'acceptable quality samples'},...
  219. 'description', ['Pupil area size over the recording session measured in pixels^2' ...
  220. 'Acceptable quality period starting and ending times are given by data_continuity parameter.' ...
  221. 'The full data range can be divided into multiple acceptable periods'] ...
  222. );
  223. pupilTracking = types.core.PupilTracking('TimeSeries', pupilAreaSize);
  224. behaviorModule = types.core.ProcessingModule('description', 'contains behavioral data');
  225. behaviorModule.nwbdatainterface.set('PupilTracking', pupilTracking);
  226. else
  227. behaviorModule = [];
  228. end
  229. % Add behavioural data: Total movement of the facial area
  230. % see https://neurodatawithoutborders.github.io/matnwb/doc/+types/+core/TimeSeries.html
  231. % and https://neurodatawithoutborders.github.io/matnwb/doc/+types/+core/BehavioralTimeSeries.html
  232. if isfield(derivedData.dataStruct, 'motionData') && isfield(derivedData.dataStruct.motionData, [animalID '_s' sessionID{iSess}])
  233. acceptablePeriod = derivedData.dataStruct.motionData.([animalID '_s' sessionID{iSess}]).period; % Acceptable quality range in seconds
  234. videoFrameTimes = derivedData.dataStruct.motionData.([animalID '_s' sessionID{iSess}]).frameTimes; % seconds
  235. acceptableSamples = markQualitySamples(acceptablePeriod, videoFrameTimes);
  236. totalFaceMovement = derivedData.dataStruct.motionData.([animalID '_s' sessionID{iSess}]).sa; % z-scored change in the frame pixels' content with respect to the previous frame
  237. totalFaceMovement = types.core.TimeSeries( ...
  238. 'data', totalFaceMovement, ...
  239. 'timestamps', videoFrameTimes, ...
  240. 'data_unit', 'a.u.', ...
  241. 'control', uint8(acceptableSamples),...
  242. 'control_description', {'low quality samples that should be excluded from analyses';...
  243. 'acceptable quality samples'},...
  244. 'description', ['Z-scored change in the frame pixels'' content with respect to the previous frame.' ...
  245. 'It measures the total movement of objects inside the video.'] ...
  246. );
  247. behavioralTimeSeries = types.core.BehavioralTimeSeries('TimeSeries', totalFaceMovement);
  248. if ~exist('behaviorModule', 'var') || isempty(behaviorModule)
  249. behaviorModule = types.core.ProcessingModule('description', 'contains behavioral data');
  250. end
  251. behaviorModule.nwbdatainterface.set('BehavioralTimeSeries', behavioralTimeSeries);
  252. end
  253. if exist('behaviorModule', 'var') && ~isempty(behaviorModule)
  254. nwb.processing.set('behavior', behaviorModule);
  255. end
  256. % Save the NWB file for the session
  257. % If you encounter an hdf5lib2 error, you may need make sure that you are
  258. % not storing cell arrays of numbers in VectorData and that you delete
  259. % old NWB files rather than try overwriting them. For more details see
  260. % https://github.com/NeurodataWithoutBorders/matnwb/issues/462
  261. if iSess < 10
  262. nwbExport(nwb, [animalDerivedDataFolderNWB filesep 'ecephys_session_0' num2str(iSess) '.nwb']);
  263. else
  264. nwbExport(nwb, [animalDerivedDataFolderNWB filesep 'ecephys_session_' num2str(iSess) '.nwb']);
  265. end
  266. end
  267. % Read the NWB file
  268. % nwb2 = nwbRead('ecephys_session_01.nwb');
  269. % a = nwb2.units.getRow(1);
  270. %% Local functions
  271. function tbl = createElectrodeTable(nwb, input)
  272. % tbl = createElectrodeTable(nwb, input)
  273. %
  274. % Function creates an electrode table with the following columns:
  275. % channel_id: a unnique probe channel ID formed by combining session ID,
  276. % probe reference number, and channel number relative to the
  277. % tip of the probe.
  278. % channel_local_index: channel index relative to the tip of the probe.
  279. % Channel indices are only unique within a probe.
  280. % x: channel AP brain surface coordinate (probe inisertion location; mm).
  281. % y: channel ML brain surface coordinate (probe inisertion location; mm).
  282. % z: channel location relative to the tip of the probe in mm.
  283. % imp: channel impedance.
  284. % location: channel brain area location.
  285. % filtering: type of LFP filtering applied.
  286. % group: channel electrode group (e.g., shank 1). NWB documentation on
  287. % ElectrodeGroup datatype is provided in the following links:
  288. % https://nwb-schema.readthedocs.io/en/latest/format.html#electrodegroup
  289. % https://nwb-schema.readthedocs.io/en/latest/format.html#sec-electrodegroup-src
  290. % channel_label
  291. % probe_label.
  292. % The rows of the table correspond to individual recording channels.
  293. %
  294. % Input: nwb - nwb object.
  295. % input - structure with the following fields:
  296. % iElectrode: electrode reference number.
  297. % electrodeDescription: a cell array (n probes) with probe
  298. % desciptions.
  299. % electrodeManufacturer: a cell array of electrode manufacturers.
  300. % nShanks: a cell array of number of shanks.
  301. % nChannelsPerShank: a cell array of electrode number of
  302. % recording channels per shank.
  303. % electrodeLocation: a cell array (n channels) of channel brain
  304. % area locations.
  305. % electrodeCoordinates: a cell array (n probes) with recording
  306. % channel coordinate arrays (n channels by
  307. % 3).
  308. % sessionID: a string with the session ID.
  309. % electrodeLabel: a cell array (n probes) with probe labels.
  310. %
  311. % Output: tbl - a Matlab table object with rows and columns as described
  312. % above.
  313. % Parse input
  314. iEl = input.iElectrode;
  315. nSh = input.nShanks;
  316. nCh = input.nChannelsPerShank;
  317. % Create an table with given column labels
  318. variables = {'channel_id', 'channel_local_index', 'x', 'y', 'z', 'imp', 'location', 'filtering', 'group', 'channel_label', 'probe_label'};
  319. tbl = cell2table(cell(0, length(variables)), 'VariableNames', variables);
  320. % Register the probe device
  321. device = types.core.Device(...
  322. 'description', input.electrodeDescription{iEl}, ...
  323. 'manufacturer', input.electrodeManufacturer{iEl} ...
  324. );
  325. nwb.general_devices.set(['probe' num2str(iEl)], device);
  326. for iShank = 1:nSh{iEl}
  327. % Register a shank electrode group (only one because this is a single shank probe)
  328. electrode_group = types.core.ElectrodeGroup( ...
  329. 'description', ['electrode group for probe' num2str(iEl)], ...
  330. 'location', input.electrodeLocation{iEl}{end}, ...
  331. 'device', types.untyped.SoftLink(device), ...
  332. 'position', table(input.electrodeCoordinates{iEl}(1,1), ...
  333. input.electrodeCoordinates{iEl}(1,2), ...
  334. input.electrodeCoordinates{iEl}(1,3), ...
  335. 'VariableNames',{'x','y','z'}) ...
  336. );
  337. nwb.general_extracellular_ephys.set(['probe' num2str(iEl)], electrode_group);
  338. group_object_view = types.untyped.ObjectView(electrode_group);
  339. % Populate the electrode table
  340. for iCh = 1:nCh{iEl}
  341. if iCh < 10
  342. channelID = str2double([input.sessionID num2str(iEl) '00' num2str(iCh)]);
  343. elseif iCh < 99
  344. channelID = str2double([input.sessionID num2str(iEl) '0' num2str(iCh)]);
  345. else
  346. channelID = str2double([input.sessionID num2str(iEl) num2str(iCh)]);
  347. end
  348. channel_label = ['probe' num2str(iEl) 'shank' num2str(iShank) 'elec' num2str(iCh)];
  349. tbl = [tbl; ...
  350. {channelID, iCh, input.electrodeCoordinates{iEl}(iCh, 1), input.electrodeCoordinates{iEl}(iCh, 2), input.electrodeCoordinates{iEl}(iCh, 3),...
  351. NaN, input.electrodeLocation{iEl}{iCh}, 'unknown', group_object_view, channel_label, input.electrodeLabel{iEl}}]; %#ok<*AGROW>
  352. end
  353. end
  354. end
  355. function [spikes, metadataTbl, derivedData] = getSpikes(animalDerivedDataFile, animalID, sessionID, electrodeTbl)
  356. % getSpikes(animalDerivedDataFile, animalID, sessionID, electrodeTbl)
  357. %
  358. % Function loads Neuronexus spiking data from a MAT file with a custom data
  359. % structure. Input:
  360. % animalDerivedDataFile - a string with derived data file name or already
  361. % loaded data.
  362. % animalID - an animal ID string.
  363. % sessionID - a session of interest ID string.
  364. % electrodeTbl - a Matlab table with electrode information generated by
  365. % the function createElectrodeTable.
  366. % Output: spikes - a 1-by-n cell array (n units) with unit spike times in
  367. % seconds.
  368. % metadataTbl - a Matlab table with rows corresponding to
  369. % individual clusters (units) and columns to:
  370. % cluster_id: a unique cluster ID formed by combining session
  371. % ID, probe reference number, and unit cluster ID.
  372. % local_cluster_id: a unit cluster ID. This is only unique
  373. % within the probe.
  374. % type - activity type: single unit (unit) or multi-unit (mua).
  375. % channel_index: recording channel with the highest unit peak
  376. % index relative to the tip of the probe.
  377. % channel_id: a corresponding unnique probe channel ID formed by
  378. % combining session ID, probe reference number, and
  379. % channel number relative to the tip of the probe.
  380. % local_channel_id: a corresponding channel index relative to the
  381. % tip of the probe. Channel indices are only
  382. % unique within a probe.
  383. % rel_horz_position: relative horizontal position in um.
  384. % rel_vert_position: probe tip-relative vertical position in um.
  385. % isi_violations: interspike interval violations, a cluster
  386. % quality measure.
  387. % isolation_distance: cluster isolation distance, a cluster
  388. % quality measure.
  389. % area: unit brain area location.
  390. % probe_id: probe label.
  391. % electrode_group: channel electrode group (e.g., shank 1). NWB
  392. % documentation on ElectrodeGroup datatype is
  393. % provided in the following links:
  394. % https://nwb-schema.readthedocs.io/en/latest/format.html#electrodegroup
  395. % https://nwb-schema.readthedocs.io/en/latest/format.html#sec-electrodegroup-src
  396. % derivedData - animal data loaded from the MAT derived data file.
  397. % Data series names with different brain areas
  398. if ~isstruct(animalDerivedDataFile)
  399. derivedData = load(animalDerivedDataFile);
  400. else
  401. derivedData = animalDerivedDataFile;
  402. end
  403. dataSeriesNames = {};
  404. for iSeries = 1:11
  405. dataSeriesNames{iSeries} = [animalID '_s' sessionID num2str(iSeries)];
  406. end
  407. dataSeriesNames{iSeries+1} = [animalID '_s' sessionID];
  408. % Series data
  409. seriesDerivedData = {};
  410. for iSeries = 1:numel(dataSeriesNames)
  411. if isfield(derivedData.dataStruct.seriesData, dataSeriesNames{iSeries})
  412. seriesDerivedData{iSeries} = derivedData.dataStruct.seriesData.(dataSeriesNames{iSeries});
  413. srData = seriesDerivedData{iSeries}.conf.samplingParams.srData;
  414. else
  415. seriesDerivedData{iSeries} = [];
  416. end
  417. end
  418. % Series unit data
  419. seriesDerivedUnitData = {};
  420. for iSeries = 1:numel(dataSeriesNames)
  421. if ~isempty(seriesDerivedData{iSeries})
  422. seriesDerivedUnitData{iSeries} = seriesDerivedData{iSeries}.shankData.shank1;
  423. else
  424. seriesDerivedUnitData{iSeries} = [];
  425. end
  426. end
  427. % Series population data
  428. seriesDerivedPopulationData = {};
  429. for iSeries = 1:numel(dataSeriesNames)
  430. if ~isempty(seriesDerivedData{iSeries})
  431. seriesDerivedPopulationData{iSeries} = seriesDerivedData{iSeries}.popData;
  432. else
  433. seriesDerivedPopulationData{iSeries} = [];
  434. end
  435. end
  436. % Spike array
  437. sparseSpikes = [];
  438. for iSeries = 1:numel(dataSeriesNames)
  439. if ~isempty(seriesDerivedPopulationData{iSeries})
  440. sparseSpikes = concatenateMat(sparseSpikes, seriesDerivedPopulationData{iSeries}.spkDB);
  441. end
  442. end
  443. % Spike times
  444. nRows = size(sparseSpikes,1);
  445. if nRows
  446. timeVector = (1:size(sparseSpikes,2))./srData;
  447. for iUnit = 1:nRows
  448. spikes{iUnit} = timeVector(logical(full(sparseSpikes(iUnit,:)))); %#ok<*SAGROW>
  449. end
  450. else
  451. spikes = [];
  452. end
  453. % Unit metadata: [local_unit_id type local_probe_channel horizontal_position vertical_position ...
  454. % isi_violations isolation_distance anterior_posterior_ccf_coordinate ...
  455. % dorsal_ventral_ccf_coordinate left_right_ccf_coordinate]
  456. metadata = [];
  457. if nRows
  458. for iSeries = 1:numel(dataSeriesNames)
  459. if ~isempty(seriesDerivedPopulationData{iSeries})
  460. metadata = concatenateMat(metadata, seriesDerivedPopulationData{iSeries}.muaMetadata);
  461. end
  462. end
  463. end
  464. % Unit metadata: [metadata area]
  465. if nRows
  466. areas = {};
  467. for iSeries = 1:numel(dataSeriesNames)
  468. if ~isempty(seriesDerivedPopulationData{iSeries})
  469. if iSeries == 1
  470. areas = [areas; repmat({'S1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  471. elseif iSeries == 2
  472. areas = [areas; repmat({'VB'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  473. elseif iSeries == 3
  474. areas = [areas; repmat({'Po'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  475. elseif iSeries == 4
  476. areas = [areas; repmat({'LP'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  477. elseif iSeries == 5
  478. areas = [areas; repmat({'DG'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  479. elseif iSeries == 6
  480. areas = [areas; repmat({'CA1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  481. elseif iSeries == 7
  482. areas = [areas; repmat({'RSC'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  483. elseif iSeries == 8
  484. areas = [areas; repmat({'VB'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  485. elseif iSeries == 9
  486. areas = [areas; repmat({'LP'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  487. elseif iSeries == 10
  488. areas = [areas; repmat({'LGN'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  489. elseif iSeries == 11
  490. areas = [areas; repmat({'CA3'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  491. elseif iSeries == 12
  492. areas = [areas; repmat({'VB'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  493. end
  494. end
  495. end
  496. metadata = [num2cell(metadata) areas];
  497. end
  498. % Unit metadata: correct unit type
  499. if nRows
  500. type = {};
  501. for iSeries = 1:numel(dataSeriesNames)
  502. if ~isempty(seriesDerivedPopulationData{iSeries})
  503. units = ismember(seriesDerivedPopulationData{iSeries}.spkDB_units, seriesDerivedUnitData{iSeries}.units);
  504. typeArea = repmat({'mua'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1);
  505. typeArea(units) = {'unit'};
  506. type = [type; typeArea];
  507. end
  508. end
  509. metadata(:,2) = type;
  510. end
  511. % Unit metadata: [metadata probe_id]
  512. if nRows
  513. probeLabel = {};
  514. for iSeries = 1:numel(dataSeriesNames)
  515. if ~isempty(seriesDerivedPopulationData{iSeries})
  516. if iSeries == 1
  517. probeLabel = [probeLabel; repmat({'probe1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  518. elseif iSeries == 2
  519. probeLabel = [probeLabel; repmat({'probe2'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  520. elseif iSeries == 3
  521. probeLabel = [probeLabel; repmat({'probe2'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  522. elseif iSeries == 4
  523. probeLabel = [probeLabel; repmat({'probe2'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  524. elseif iSeries == 5
  525. probeLabel = [probeLabel; repmat({'probe2'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  526. elseif iSeries == 6
  527. probeLabel = [probeLabel; repmat({'probe2'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  528. elseif iSeries == 7
  529. probeLabel = [probeLabel; repmat({'probe2'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  530. elseif iSeries == 8
  531. probeLabel = [probeLabel; repmat({'probe1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  532. elseif iSeries == 9
  533. probeLabel = [probeLabel; repmat({'probe1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  534. elseif iSeries == 10
  535. probeLabel = [probeLabel; repmat({'probe1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  536. elseif iSeries == 11
  537. probeLabel = [probeLabel; repmat({'probe1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  538. elseif iSeries == 12
  539. probeLabel = [probeLabel; repmat({'probe1'}, size(seriesDerivedPopulationData{iSeries}.muaMetadata,1), 1)];
  540. end
  541. end
  542. end
  543. metadata = [metadata probeLabel];
  544. end
  545. % Unit metadata: [unit_id metadata]
  546. if nRows
  547. unitIDs = zeros(nRows,1);
  548. for iUnit = 1:nRows
  549. if strcmpi(metadata{iUnit, end}, 'probe1')
  550. unitID = [num2str(sessionID) '1'];
  551. else
  552. unitID = [num2str(sessionID) '2'];
  553. end
  554. if metadata{iUnit, 1} < 9
  555. unitID = [unitID '000' num2str(metadata{iUnit, 1})];
  556. elseif metadata{iUnit, 1} < 99
  557. unitID = [unitID '00' num2str(metadata{iUnit, 1})];
  558. elseif metadata{iUnit, 1} < 999
  559. unitID = [unitID '0' num2str(metadata{iUnit, 1})];
  560. else
  561. unitID = [unitID num2str(metadata{iUnit, 1})];
  562. end
  563. unitIDs(iUnit) = str2double(unitID);
  564. end
  565. metadata = [num2cell(unitIDs) metadata];
  566. end
  567. % Unit metadata: [metadata(:,1:3) probe_channel_index probe_channel_id metadata(:,4:end)]
  568. if nRows
  569. channelIndices = zeros(nRows,1);
  570. channelIDs = zeros(nRows,1);
  571. electrodeGroups = {};
  572. for iUnit = 1:nRows
  573. ind = table2array(electrodeTbl(:,2)) == cell2mat(metadata(iUnit,4)) &...
  574. contains(table2array(electrodeTbl(:,end)), metadata(iUnit,end));
  575. channelIndices(iUnit) = find(ind);
  576. channelIDs(iUnit) = table2array(electrodeTbl(ind,1));
  577. electrodeGroups = [electrodeGroups; table2array(electrodeTbl(ind,9))];
  578. end
  579. metadata = [metadata(:,1:3) num2cell(channelIndices) num2cell(channelIDs) metadata(:,4:end)];
  580. end
  581. % Unit metadata: [metadata electrode_group]
  582. if nRows
  583. metadataTbl = table(metadata(:,1), metadata(:,2), metadata(:,3), metadata(:,4), ...
  584. metadata(:,5), metadata(:,6), metadata(:,7), metadata(:,8), ...
  585. metadata(:,9), metadata(:,10), metadata(:,11), metadata(:,12), electrodeGroups, ...
  586. 'VariableNames', {'cluster_id', 'local_cluster_id', 'type',...
  587. 'channel_index', 'channel_id', 'local_channel_id',...
  588. 'rel_horz_pos', 'rel_vert_pos', 'isi_violations',...
  589. 'isolation_distance', 'area', 'probe_id', 'electrode_group'});
  590. else
  591. metadataTbl = [];
  592. end
  593. end
  594. function [reshapedWaveformsMat, reshapedWaveformsVecGrp, reshapedWaveformsVec, waveformsMean, nWaveformSamples] = reshapeWaveforms(waveforms, iEl, metadata, nCh)
  595. % [reshapedWaveformsMat, reshapedWaveformsVecGrp, reshapedWaveformsVec, waveformsMean] = reshapeWaveforms(waveforms, iEl, metadata)
  596. %
  597. % Function extracts relevant waveform information and reshapes the waveform
  598. % array which is 3-dimensional with the first dimension being the unit, the
  599. % second being the sample point, and the third one being the recording
  600. % channel.
  601. % Input: waveforms - a strucuture loaded from the waveforms MAT file.
  602. % Relevant fields are waveforms (described above),
  603. % maxWaveforms (same as waveforms but excluding all
  604. % channels except for the maximum amplitude one), and
  605. % cluIDs (unit cluster IDs corresponding to the
  606. % dimension one in waveforms and maxWaveforms).
  607. % iEl - probe reference number.
  608. % metadata - a Matlab unit table produced by the function
  609. % getSpikes.
  610. % nCh - number of recording channels with waveforms for the same
  611. % unit.
  612. % Output: reshapedWaveformsMat - a 2D array reshaping waveforms.waveforms
  613. % array by collapsing the third dimension
  614. % and stacking all waveforms vertically one
  615. % unit after another.
  616. % reshapedWaveformsVecGrp - a column cell array reshaping
  617. % waveforms.waveforms array and grouping
  618. % all waveforms from the same unit
  619. % together. Cell array entries correspond
  620. % to individual units. The missing MUAs
  621. % correspond to empty cell arrays.
  622. % reshapedWaveformsVec - a cell array with rows from
  623. % reshapedWaveformsMat. Missing MUAs are
  624. % also included as empty cells times the
  625. % number of recording channels.
  626. % waveformsMean - waveforms.waveforms converted into a cell column
  627. % array. MUAs are NaNs.
  628. % nWaveformSamples - a total number of data sample points forming a
  629. % single waveform.
  630. nWaveformSamples = size(waveforms.maxWaveforms,2);
  631. if isfield(waveforms, 'waveforms')
  632. reshapedWaveformsMat = zeros(size(waveforms.waveforms,1)*size(waveforms.waveforms,3),size(waveforms.waveforms,2));
  633. else
  634. reshapedWaveformsMat = [];
  635. end
  636. reshapedWaveformsVecGrp = {};
  637. reshapedWaveformsVec = {};
  638. waveformsMean = {};
  639. metadataInds = ismember(table2cell(metadata(:,12)), ['probe' num2str(iEl)]);
  640. metadata = metadata(metadataInds,:);
  641. for iUnit = 1:size(metadata,1)
  642. if isfield(waveforms, 'cluIDs')
  643. row = find(ismember(waveforms.cluIDs, cell2mat(table2cell(metadata(iUnit,2)))));
  644. else
  645. row = [];
  646. end
  647. if isfield(waveforms, 'cluIDs') && sum(ismember(waveforms.cluIDs, cell2mat(table2cell(metadata(iUnit,2)))))
  648. unitWaveformMat = squeeze(waveforms.waveforms(row,:,:))';
  649. reshapedWaveformsMat((row-1)*size(waveforms.waveforms,3)+1:row*size(waveforms.waveforms,3),:) = unitWaveformMat;
  650. reshapedWaveformsVecGrp = [reshapedWaveformsVecGrp; {unitWaveformMat}];
  651. for iWave = 1:size(unitWaveformMat,1)
  652. reshapedWaveformsVec = [reshapedWaveformsVec; {unitWaveformMat(iWave,:)}];
  653. end
  654. waveformsMean = [waveformsMean; {waveforms.maxWaveforms(row,:)}];
  655. else
  656. reshapedWaveformsVecGrp = [reshapedWaveformsVecGrp; {[]}];
  657. if isfield(waveforms, 'waveforms')
  658. for iWave = 1:size(waveforms.waveforms,3)
  659. reshapedWaveformsVec = [reshapedWaveformsVec; {[]}];
  660. end
  661. else
  662. for iWave = 1:nCh
  663. reshapedWaveformsVec = [reshapedWaveformsVec; {[]}];
  664. end
  665. end
  666. waveformsMean = [waveformsMean; {nan(1,size(waveforms.maxWaveforms,2))}];
  667. end
  668. end
  669. end
  670. function acceptableSamples = markQualitySamples(acceptablePeriod, videoFrameTimes)
  671. % acceptableSamples = markQualitySamples(acceptablePeriod, videoFrameTimes)
  672. %
  673. % Function marks acceptable behavioural samples given the sample times and
  674. % the range of acceptable time periods.
  675. % Input: acceptablePeriod - a vector or a cell array of vectors marking the
  676. % beginning and end of acceptable time periods.
  677. % videoFrameTimes - a vector with sample times.
  678. % Ouptut: acceptableSamples - a logical vector marking acceptable samples
  679. % by ones.
  680. if isempty(acceptablePeriod) || isempty(videoFrameTimes)
  681. acceptableSamples = [];
  682. else
  683. acceptableSamples = false(size(videoFrameTimes));
  684. if iscell(acceptablePeriod)
  685. for iPeriod = 1:numel(acceptablePeriod)
  686. acceptableSamples(videoFrameTimes >= acceptablePeriod{iPeriod}(1) & videoFrameTimes <= acceptablePeriod{iPeriod}(2)) = true;
  687. end
  688. else
  689. acceptableSamples(videoFrameTimes >= acceptablePeriod(1) & videoFrameTimes <= acceptablePeriod(2)) = true;
  690. end
  691. end
  692. end