pRF_CreateParallel4LISA_worker_cv.m 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. function pRF_CreateParallel4LISA_worker_cv(...
  2. parallel_fun, joblist, parallel_fun_dir, job_name)
  3. % This function serves to create parallel Jobs for a given script
  4. % to parallalise it for a given list of jobs.
  5. %
  6. % Therefore, it creates executable sh-scripts, which execute each single
  7. % job, as well as one script which passes all the single job scripts to
  8. % the cluster computer. (LISA @ SurfSara).
  9. %
  10. % This version requsts 600MB free on the host machine. If your job does not
  11. % require free memory (also matlab already needs quite a bit), use
  12. % create_parallel_nomem.m.
  13. %
  14. % IMPORTANT: The function runs on linux: all pathes must be linux style
  15. %
  16. % PARAMETERS
  17. % parallel_fun: name of the function (e.g. 'example_function', NOT
  18. % 'example_script.m', 'example_function()', etc.). This
  19. % function must take exactly 1 argument as input. For an
  20. % example, see parallel_example_fun.m;
  21. %
  22. % joblist: a structure with information on what jobs to create, with how
  23. % many parallel processes, and how to split volumes in
  24. % slice-chunks
  25. %
  26. % parallel_fun_dir: path to parallel_fun, will be used to add to the
  27. % matlab path on the remote machines
  28. %
  29. % OPTIONAL
  30. % job_name: string to identify your job. Subdirectories with this name will
  31. % be created inside the specified batch directory and the log
  32. % directory, so that multiple jobs can be executed at the same time.
  33. % If no jobname is given, the name of prallel_fun together with the
  34. % current date and time will be used as job name.
  35. %% basic input checks -----------------------------------------------------
  36. % check if parallel_fun ends with .m or ()
  37. if length(parallel_fun) >= 2 && (strcmp(parallel_fun(end-1:end), '.m') || ...
  38. strcmp(parallel_fun(end-1:end), '()'))
  39. parallel_fun = parallel_fun(1:end-2);
  40. end
  41. %% set parameters ---------------------------------------------------------
  42. % default name for jobs
  43. if ~exist('job_name', 'var')
  44. job_name = [parallel_fun '_' datestr(now, 'yyyymmddTHHMMSS')];
  45. end
  46. % project_dir on LISA
  47. project_dir = '/home/pcklink/PRF'; % must be the ABSOLUTE path
  48. % log dir on LISA
  49. log_file_dir = [project_dir '/Logs/']; % add jobname
  50. % set local log folder
  51. log_file_dir_local = [pwd '/Logs/']; % add jobname
  52. % job files will be locally written to:
  53. cd ..
  54. batch_dir = fullfile(pwd, 'Jobs', ['JOBS_' job_name]); % add jobname
  55. cd prfCode
  56. %% location of scripts ----------------------------------------------------
  57. % set location of execute_matlab_process.sh
  58. if strcmp(job_name, 'FitPRF_cv_dhrf')
  59. execute_matlab_process_sh = ['$TMPDIR/PRF/BashScripts/'...
  60. 'pRF_run_analyzePRF_LISA_avg_dhrf.sh']; % must be ABSOLUTE path
  61. else
  62. execute_matlab_process_sh = ['$TMPDIR/PRF/BashScripts/'...
  63. 'pRF_run_analyzePRF_LISA_avg.sh']; % must be ABSOLUTE path
  64. end
  65. %% PROCESSING STARTS FROM HERE (no more parameters to check) ==============
  66. %% create batch & log folder ----------------------------------------------
  67. disp('Creating batch & log folders')
  68. [success, message] = mkdir(batch_dir);
  69. if ~success
  70. error(['Could not create directory for batch_dir: ' message])
  71. end
  72. if ispc
  73. error('Windows will not work due to path definitions. Run on Linux')
  74. else
  75. [success, message] = mkdir(log_file_dir_local);
  76. if ~success
  77. error(['Could not create directory for log_file_dir_local: ' message])
  78. end
  79. end
  80. %% Check if main batch files already exists -------------------------------
  81. % set initial behaviour, if you want to overwrite sh-files which are there
  82. overwrite_file = 'ask';
  83. disp('Creating batch files')
  84. % check if main sh-file to start all jobs exists
  85. filename_all = sprintf(['send_all_prf-fitting_jobs_' joblist.monkey '.sh']);
  86. fullfilename_all = [batch_dir '/' filename_all];
  87. if exist(fullfilename_all, 'file')
  88. disp(' ')
  89. disp(['File ' fullfilename_all ' already exist.'])
  90. overwrite_file = input('Should it be overwritten? [y, n, a (all)]: ', 's');
  91. if ~(strcmpi(overwrite_file, 'y') || strcmpi(overwrite_file, 'a'))
  92. error(['File ' filename_all ' already exists and should not be '...
  93. 'overwritten. Solve problem and start again.'])
  94. end
  95. delete(fullfilename_all)
  96. end
  97. %% Create the batch files -------------------------------------------------
  98. % The main batch file handles passing the single job batch files to the
  99. display(['Creating main batch file: ' fullfilename_all])
  100. fid_commit_all = fopen(fullfilename_all, 'w');
  101. % ensure that the right shell is used !#/bin/sh
  102. fprintf(fid_commit_all, '#!/bin/bash\n');
  103. % add comment that THIS file submits the stuff to condor
  104. fprintf(fid_commit_all, '#\n');
  105. fprintf(fid_commit_all, ['# This bash-script submits all jobs to the server, '...
  106. 'instead of running them locally.\n']);
  107. fprintf(fid_commit_all, ['# If you want to submit only some jobs to the server,'...
  108. 'simply add a "#" in front of \n' ...
  109. '#the ones you like to ommit and execute the script then.\n']);
  110. fprintf(fid_commit_all, '#\n');
  111. fprintf(fid_commit_all, '\nmkdir -p $HOME/PRF/Logs/slurm\n');
  112. fprintf(fid_commit_all, 'cd $HOME/PRF/Logs/slurm\n');
  113. fprintf(fid_commit_all, 'chmod +x $HOME/PRF/Code/Jobs/*\n\n');
  114. % create all single job batchfiles, and add for each a call in the main
  115. % batch file
  116. for job_ind = 1:length(joblist.sessinc)
  117. for job_ind2 = 1:length(joblist.slicechunks)
  118. %% create batchfile for current job -------------------------------
  119. % create/overwrite file
  120. filename = sprintf('run_job_Ses-%s_%s_%s_%s.sh', joblist.sessions{...
  121. joblist.sessinc(job_ind),1},num2str(job_ind2),job_name,joblist.monkey);
  122. fullfilename = [batch_dir '/' filename];
  123. disp(['Creating Batch file for Job ' num2str(job_ind) '_' num2str(job_ind2) ': ' fullfilename])
  124. if exist(fullfilename, 'file')
  125. if ~strcmpi(overwrite_file, 'a')
  126. disp(' ')
  127. disp(['File ' fullfilename ' already exist.'])
  128. overwrite_file = input('Should it be overwritten? [y, n, a (all)]: ', 's');
  129. if ~(strcmpi(overwrite_file, 'y') || strcmpi(overwrite_file, 'a'))
  130. error(['File ' filename ' already exists and should not be '...
  131. 'overwritten. Solve problem and start again.'])
  132. end
  133. end
  134. delete(fullfilename)
  135. end
  136. % open single subject file
  137. fid_single = fopen(fullfilename , 'w');
  138. % ==== SLURM ====
  139. % ensure that the right shell is used !#/bin/bash
  140. fprintf(fid_single, '#!/bin/bash\n');
  141. % SLURM definitions
  142. fprintf(fid_single, '#SBATCH -N 1 --ntasks-per-node=16\n');
  143. fprintf(fid_single, '#SBATCH -t 48:00:00\n');
  144. fprintf(fid_single, '#SBATCH --mail-type=END\n');
  145. fprintf(fid_single, '#SBATCH --mail-user=p.c.klink@gmail.com\n');
  146. fprintf(fid_single, '\n');
  147. fprintf(fid_single, 'source ~/.bash_profile\n');
  148. fprintf(fid_single, 'source ~/.bashrc\n');
  149. fprintf(fid_single, 'umask u+rwx,g+rwx\n\n');
  150. % information
  151. fprintf(fid_single, 'echo job id $SLURM_JOBID\n');
  152. fprintf(fid_single, 'echo job name $SLURM_JOB_NAME\n');
  153. fprintf(fid_single, 'echo submitted by $SLURM_JOB_ACCOUNT\n');
  154. fprintf(fid_single, 'echo from $SLURM_SUBMIT_DIR\n');
  155. fprintf(fid_single, 'echo the allocated nodes are: $SLURM_JOB_NODELIST\n');
  156. % add a comment what this script does
  157. jobnameline = ['\n# INFO: ' job_name '_' joblist.sessions{...
  158. joblist.sessinc(job_ind),1} '_' ...
  159. joblist.slicechunks{job_ind2} '\n'];
  160. try
  161. fprintf(fid_single, jobnameline);
  162. catch ME
  163. disp(ME);
  164. end
  165. fprintf(fid_single, '\n');
  166. fprintf(fid_single,'mkdir -p $TMPDIR/PRF\n');
  167. fprintf(fid_single,'mkdir -p $TMPDIR/PRF/Logs/\n');
  168. fprintf(fid_single,['cp -r $HOME/PRF/Data/' joblist.type '/' joblist.monkey '/' ...
  169. joblist.sessions{joblist.sessinc(job_ind),1} '* $TMPDIR/PRF\n']);
  170. fprintf(fid_single,['cp -r $HOME/PRF/Data/mask/' joblist.monkey '/* $TMPDIR/PRF\n']);
  171. fprintf(fid_single,['cp -r $HOME/PRF/Data/refhdr/' joblist.monkey '* $TMPDIR/PRF\n']);
  172. fprintf(fid_single, 'cp -r $HOME/PRF/Code/* $TMPDIR/PRF\n');
  173. fprintf(fid_single,'cd $TMPDIR/PRF\n\n');
  174. fprintf(fid_single,['chmod +x ' execute_matlab_process_sh '\n\n']);
  175. % exec parfun (Monkey,Session,Slices,HRF,numWorkers,modeltype,cv)
  176. line = sprintf('%s \\\n\t%s %s %s %s %s [%s] \\\n\t%s %s %s %s %s \\\n\t', ...
  177. execute_matlab_process_sh, parallel_fun, ...
  178. joblist.monkey, joblist.sessions{joblist.sessinc(job_ind),1}, ...
  179. joblist.slicechunks{job_ind2}, joblist.hrf,...
  180. num2str(joblist.sessions{joblist.sessinc(job_ind),2}),...
  181. joblist.modeltype,...
  182. num2str(joblist.xvalmode),...
  183. joblist.resfld,...
  184. log_file_dir, parallel_fun_dir);
  185. logline= ['$TMPDIR/PRF/Logs/Log_' joblist.monkey '_' ...
  186. joblist.sessions{joblist.sessinc(job_ind),1} '_' ...
  187. joblist.slicechunks{job_ind2} '_' ...
  188. joblist.hrf '_' joblist.modeltype ...
  189. '_xval' num2str(joblist.xvalmode) '.txt'];
  190. fprintf(fid_single, '%s %s %s\n\n', line, '|& tee', logline);
  191. fprintf(fid_single,['cp ' logline ' $HOME/PRF/Logs/\n']);
  192. % finally: pass exit status of execute_matlab_process.sh to LISA
  193. fprintf(fid_single, 'exit $?\n');
  194. fclose(fid_single);
  195. disp(['Adding ' fullfilename ' to original batch file.']);
  196. fullfilename2 = ['$HOME/PRF/Code/Jobs/' filename];
  197. line = sprintf('%s %s', 'sbatch ', fullfilename2);
  198. fprintf(fid_commit_all, '%s\n', line);
  199. end
  200. fprintf(fid_commit_all, '\n');
  201. end
  202. fclose(fid_commit_all);
  203. system(['chmod +x ' fullfilename_all]);
  204. end