ery_4a_prep_s1_write_events_tsv.m 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. %% ery_4a_prep_s1_write_events_tsv
  2. %
  3. % This script reads logfiles, extracts the onsets, durations, and ratings for
  4. % different conditions, and writes events.tsv files to the BIDS dir for
  5. % each subject
  6. % It also contains an option to write a single phenotype file with
  7. % trial-by-trial ratings for all subjects
  8. %
  9. % USAGE
  10. %
  11. % Script should be run from the root directory of the superdataset, e.g.
  12. % /data/proj_discoverie
  13. % The script is highly study-specific, as logfiles will vary with design,
  14. % stimulus presentation software used, etc
  15. % Hence, it is provided in LaBGAScore as an example and needs to be
  16. % downloaded and adapted to the code subdataset for your study/project
  17. % This example is from LaBGAS proj_erythritol_4a
  18. % (https://gin.g-node.org/labgas/proj_erythritol_4a)
  19. %
  20. %
  21. % DEPENDENCIES
  22. %
  23. % LaBGAScore Github repo on Matlab path, with subfolders
  24. % https://github.com/labgas/LaBGAScore
  25. %
  26. %
  27. % INPUTS
  28. %
  29. % Presentation .log files in sourcedata dir for each subject
  30. %
  31. %
  32. % OUTPUTS
  33. %
  34. % events.tsv files for each run in BIDS dir for each subject
  35. % phenotype.tsv file in BIDS/phenotype dir (optional)
  36. %
  37. %__________________________________________________________________________
  38. %
  39. % author: Lukas Van Oudenhove
  40. % date: December, 2021
  41. %
  42. %__________________________________________________________________________
  43. % @(#)% LaBGAScore_prep_s1_write_events_tsv.m v1.2
  44. % last modified: 2022/04/21
  45. %% DEFINE DIRECTORIES, SUBJECTS, RUNS, CONDITIONS, AND IMPORT OPTIONS
  46. %--------------------------------------------------------------------------
  47. ery_4a_prep_s0_define_directories; % lukasvo edited from original LaBGAScore script to enable standalone functioning of proj_ery_4a dataset
  48. subjs2write = {}; % enter subjects separated by comma if you only want to write files for selected subjects e.g. {'sub-01','sub-02'}
  49. pheno_tsv = true; % turn to false if you do not wish to generate a phenotype.tsv file with trial-by-trial ratings; will only work if subjs2write is empty (i.e. when you loop over all your subjects)
  50. pheno_name = 'ratings_online.tsv';
  51. runnames = {'run-1','run-2','run-3','run-4','run-5','run-6'};
  52. logfilenames = {'*_run1.log','*_run2.log','*_run3.log','*_run4.log','*_run5.log','*_run6.log'};
  53. taskname = 'sweettaste_';
  54. sweet_labels = {'sucrose delivery';'erythritol delivery';'sucralose delivery';'control delivery'}; % labels for sweet substance delivery in Code var of logfile
  55. swallow_rinse_labels = {'sucrose_swallowing';'erythritol_swallowing';'sucralose_swallowing';'control_swallowing'}; % labels for swallowing cue presentation after sweet substance delivery in Code var of logfile
  56. rating_labels = {'Sucrose','Erythritol','Sucralose','Control'}; % labels for start of rating period in Code var of logfile
  57. fixation_labels = {'fixation_cross','Sucrose fixation cross','Erythritol fixation cross','Sucralose fixation cross','Control fixation cross'}; % labels for fixation cross in Code var of logfile
  58. events_interest = {'sucrose','erythritol','sucralose','water'}; % names of events of interest to be written to events.tsv
  59. events_nuisance = {'swallow_rinse','rating'}; % names of nuisance events to be written to events.tsv
  60. varNames = {'Trial','Event Type','Code','Time','TTime','Uncertainty','Duration','Uncertainty','ReqTime','ReqDur','Stim Type','Pair Index'}; % varnames of logfile
  61. selectedVarNames = [1:5 7 9]; % varnames we want to use in the script
  62. varTypes = {'double','categorical','categorical','double','double','double','double','double','double','char','char','double'}; % matlab vartypes to be used when importing log file as table
  63. delimiter = '\t';
  64. dataStartLine = 5; % line on which actual data starts in logfile
  65. extraColRule = 'ignore';
  66. opts = delimitedTextImportOptions('VariableNames',varNames,...
  67. 'SelectedVariableNames',selectedVarNames,...
  68. 'VariableTypes',varTypes,...
  69. 'Delimiter',delimiter,...
  70. 'DataLines', dataStartLine,...
  71. 'ExtraColumnsRule',extraColRule);
  72. %% LOOP OVER SUBJECTS TO READ LOGFILES, CREATE TABLE WITH ONSETS AND DURATIONS, AND SAVE AS EVENTS.TSV TO BIDSSUBJDIRS
  73. %-------------------------------------------------------------------------------------------------------------------
  74. if ~isempty(subjs2write)
  75. [C,ia,~] = intersect(sourcesubjs,subjs2write);
  76. if ~isequal(C',subjs2write)
  77. error('\nsubject %s present in subjs2smooth not present in %s, please check before proceeding',subjs2smooth{~ismember(subjs2smooth,C)},derivdir);
  78. else
  79. for sub = ia'
  80. % DEFINE SUBJECT LEVEL DIRS
  81. subjsourcedir = sourcesubjdirs{sub};
  82. subjBIDSdir = fullfile(BIDSsubjdirs{sub},'func');
  83. % LOOP OVER RUNS
  84. for run = 1:size(logfilenames,2)
  85. logfilename = dir(fullfile(subjsourcedir,'logfiles',logfilenames{run}));
  86. logfilename = char(logfilename(:).name);
  87. logfilepath = fullfile(subjsourcedir,'logfiles',logfilename);
  88. if ~isfile(logfilepath)
  89. warning('\nlogfile missing for run %d in %s, please check before proceeding',run,logfilepath);
  90. continue
  91. elseif size(logfilepath,1) > 1
  92. error('\nmore than one logfile with run index %s for %s, please check before proceeding',run,sourcesubjs{sub})
  93. else
  94. log = readtable(logfilepath,opts);
  95. log = log(~isnan(log.Trial),:);
  96. time_zero = log.Time(log.Trial == 0 & log.EventType == 'Pulse'); % time for onsets and durations is counted from the first scanner pulse onwards
  97. if size(time_zero,1) > 1
  98. error('\nambiguity about time zero in %s%s, please check logfile',subjs{sub},logfilenames{run});
  99. end
  100. log.TimeZero = log.Time - time_zero;
  101. log.onset = log.TimeZero ./ 10000; % convert to seconds
  102. log(log.EventType == 'Pulse',:) = [];
  103. log.trial_type = cell(height(log),1);
  104. for k = 1:height(log)
  105. if ismember(log.Code(k),swallow_rinse_labels)
  106. log.trial_type{k} = events_nuisance{1};
  107. elseif ismember(log.Code(k),rating_labels)
  108. log.trial_type{k} = events_nuisance{2};
  109. log.onset(k) = log.onset(k)+4;
  110. elseif ismember(log.Code(k),sweet_labels)
  111. idx = (log.Code(k) == sweet_labels);
  112. log.trial_type{k} = events_interest{idx'};
  113. elseif ismember(log.Code(k),fixation_labels)
  114. log.trial_type{k} = 'fixation';
  115. else
  116. log.trial_type{k} = '';
  117. end
  118. end
  119. log.rating = zeros(height(log),1);
  120. for l = 1:height(log)
  121. if contains(char(log.Code(l)),'score','IgnoreCase',true)
  122. scorestring = char(log.Code(l));
  123. if strcmp(scorestring(1,end-3:end),'-100')
  124. log.rating(l) = str2double(scorestring(1,end-3:end));
  125. elseif ~contains(scorestring(1,end-2:end),':')
  126. log.rating(l) = str2double(strtrim(scorestring(1,end-2:end)));
  127. else
  128. log.rating(l) = str2double(scorestring(1,end));
  129. end
  130. else
  131. log.rating(l) = NaN;
  132. end
  133. end
  134. log.trial_type = categorical(log.trial_type);
  135. log = log((~isundefined(log.trial_type) | ~isnan(log.rating)),:);
  136. for n = 1:height(log)
  137. if ismember(log.trial_type(n),events_interest)
  138. log.rating(n) = log.rating(n+3);
  139. end
  140. end
  141. log = removevars(log,{'Trial','EventType','Code','Time','TTime','Duration','ReqTime','TimeZero'}); % get rid of junk variables from logfile we don't need
  142. log = log(~isundefined(log.trial_type),:);
  143. log.duration = zeros(height(log),1);
  144. for m = 1:height(log)
  145. if ~isequal(log.trial_type(m),'fixation')
  146. log.duration(m) = log.onset(m+1) - log.onset(m);
  147. else
  148. log.duration(m) = NaN;
  149. end
  150. end
  151. log = log(~isnan(log.duration),:);
  152. log = log(log.rating~=-100,:); % lukasvo76 added to original LaBGAScore script - trials with -100 ratings need to be removed since subjects were instructed to use this in case of failed solution delivery
  153. filename = fullfile(subjBIDSdir,[sourcesubjs{sub},'_task-',taskname,runnames{run},'_events.tsv']);
  154. writetable(log,filename,'Filetype','text','Delimiter','\t');
  155. clear logfile log time_zero filename
  156. end % if loop checking whether logfile exists
  157. end % for loop runs
  158. end % for loop subjects
  159. end % if loop checking subjs2write present in sourcesubjs
  160. else
  161. if ~isequal(sourcesubjs, BIDSsubjs)
  162. [D,~,~] = intersect(sourcesubjs,BIDSsubjs);
  163. error('\nsubject %s present in %s not present in %s, please check before proceeding',BIDSsubjs{~ismember(BIDSsubjs,D)},BIDSdir,derivdir);
  164. else
  165. if pheno_tsv
  166. pheno_file = table();
  167. pheno_dir = fullfile(BIDSdir,'phenotype');
  168. if ~isfolder(pheno_dir)
  169. mkdir(pheno_dir);
  170. end
  171. end
  172. for sub = 1:size(sourcesubjs,1)
  173. if pheno_tsv
  174. pheno_file_subj = table();
  175. end
  176. % DEFINE SUBJECT LEVEL DIRS),':')
  177. subjsourcedir = sourcesubjdirs{sub};
  178. subjBIDSdir = fullfile(BIDSsubjdirs{sub},'func');
  179. % LOOP OVER RUNS
  180. for run = 1:size(logfilenames,2)
  181. logfilename = dir(fullfile(subjsourcedir,'logfiles',logfilenames{run}));
  182. logfilename = char(logfilename(:).name);
  183. logfilepath = fullfile(subjsourcedir,'logfiles',logfilename);
  184. if ~isfile(logfilepath)
  185. warning('\nlogfile missing for run %d in %s, please check before proceeding',run,logfilepath);
  186. continue
  187. elseif size(logfilepath,1) > 1
  188. error('\nmore than one logfile with run index %s for %s, please check before proceeding',run,sourcesubjs{sub})
  189. else
  190. log = readtable(logfilepath,opts);
  191. log = log(~isnan(log.Trial),:);
  192. time_zero = log.Time(log.Trial == 0 & log.EventType == 'Pulse'); % time for onsets and durations is counted from the first scanner pulse onwards
  193. if size(time_zero,1) > 1
  194. error('\nambiguity about time zero in %s%s, please check logfile',subjs{sub},logfilenames{run});
  195. end
  196. log.TimeZero = log.Time - time_zero;
  197. log.onset = log.TimeZero ./ 10000; % convert to seconds
  198. log(log.EventType == 'Pulse',:) = [];
  199. log.trial_type = cell(height(log),1);
  200. for k = 1:height(log)
  201. if ismember(log.Code(k),swallow_rinse_labels)
  202. log.trial_type{k} = events_nuisance{1};
  203. elseif ismember(log.Code(k),rating_labels)
  204. log.trial_type{k} = events_nuisance{2};
  205. log.onset(k) = log.onset(k)+4;
  206. elseif ismember(log.Code(k),sweet_labels)
  207. idx = (log.Code(k) == sweet_labels);
  208. log.trial_type{k} = events_interest{idx'};
  209. elseif ismember(log.Code(k),fixation_labels)
  210. log.trial_type{k} = 'fixation';
  211. else
  212. log.trial_type{k} = '';
  213. end
  214. end
  215. log.rating = zeros(height(log),1);
  216. for l = 1:height(log)
  217. if contains(char(log.Code(l)),'score','IgnoreCase',true)
  218. scorestring = char(log.Code(l));
  219. if strcmp(scorestring(1,end-3:end),'-100')
  220. log.rating(l) = str2double(scorestring(1,end-3:end));
  221. elseif ~contains(scorestring(1,end-2:end),':')
  222. log.rating(l) = str2double(strtrim(scorestring(1,end-2:end)));
  223. else
  224. log.rating(l) = str2double(scorestring(1,end));
  225. end
  226. else
  227. log.rating(l) = NaN;
  228. end
  229. end
  230. log.trial_type = categorical(log.trial_type);
  231. log = log((~isundefined(log.trial_type) | ~isnan(log.rating)),:);
  232. for n = 1:height(log)
  233. if ismember(log.trial_type(n),events_interest)
  234. log.rating(n) = log.rating(n+3);
  235. end
  236. end
  237. log = removevars(log,{'Trial','EventType','Code','Time','TTime','Duration','ReqTime','TimeZero'}); % get rid of junk variables from logfile we don't need
  238. log = log(~isundefined(log.trial_type),:);
  239. log.duration = zeros(height(log),1);
  240. for m = 1:height(log)
  241. if ~isequal(log.trial_type(m),'fixation')
  242. log.duration(m) = log.onset(m+1) - log.onset(m);
  243. else
  244. log.duration(m) = NaN;
  245. end
  246. end
  247. log = log(~isnan(log.duration),:);
  248. log = log(log.rating~=-100,:); % lukasvo76 added to original LaBGAScore script - trials with -100 ratings need to be removed since subjects were instructed to use this in case of failed solution delivery
  249. filename = fullfile(subjBIDSdir,[sourcesubjs{sub},'_task-',taskname,runnames{run},'_events.tsv']);
  250. writetable(log,filename,'Filetype','text','Delimiter','\t');
  251. if pheno_tsv
  252. log2 = log(~isnan(log.rating),:);
  253. log2 = removevars(log2,{'onset','duration'});
  254. for n = 1:height(log2)
  255. log2.participant_id(n,:) = BIDSsubjs{sub};
  256. log2.run_id(n) = run;
  257. log2.trial_id_run(n) = n; % generates consecutive trial numbers within each run
  258. log2.trial_id_concat(n) = height(pheno_file_subj) + n; % generates consecutive trial numbers over all conditions & runs
  259. end
  260. for o = 1:size(events_interest,2)
  261. idx_run = log2.trial_type == events_interest{o};
  262. log2.trial_id_cond_run(idx_run) = 1:sum(idx_run);
  263. if height(pheno_file_subj) > 0
  264. idx_sub = pheno_file_subj.trial_type == events_interest{o};
  265. log2.trial_id_cond_concat(idx_run) = sum(idx_sub)+1:(sum(idx_sub)+sum(idx_run));
  266. else
  267. log2.trial_id_cond_concat = log2.trial_id_cond_run;
  268. end
  269. clear idx_run idx_sub
  270. end
  271. pheno_file_subj = [pheno_file_subj;log2];
  272. end
  273. clear logfile log time_zero filename log2
  274. end % if loop checking whether logfile exists
  275. end % for loop runs
  276. pheno_file = [pheno_file;pheno_file_subj];
  277. clear pheno_file_subj;
  278. end % for loop subjects
  279. pheno_filename = fullfile(pheno_dir,pheno_name);
  280. writetable(pheno_file,pheno_filename,'Filetype','text','Delimiter','\t');
  281. end % if loop checking sourcesubjs == BIDSsubjs
  282. end % if loop checking writing option