matchfiles.m 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. function f = matchfiles(patterns,sorttype)
  2. % function f = matchfiles(patterns,sorttype)
  3. %
  4. % <patterns> is
  5. % (1) a string that matches zero or more files or directories (wildcards '*' okay)
  6. % (2) the empty matrix []
  7. % (3) a cell vector of zero or more things like (1) or (2)
  8. % <sorttype> (optional) is how to sort in each individual match attempt.
  9. % 't' means sort by time (newest first)
  10. % 'tr' means sort by time (oldest first)
  11. % default is [], which means to sort alphabetically by explicitly using MATLAB's sort function.
  12. % (note that MATLAB's sort function may sort differently than UNIX's ls function does!)
  13. %
  14. % return a cell vector of strings containing paths to the matched files and/or directories.
  15. % if there are no matches for an individual match attempt, we issue a warning.
  16. %
  17. % this function should be fully functional on Mac and Linux. however, on Windows,
  18. % we have the following limitations:
  19. % - you cannot use the '?' operator
  20. % - you can use the '*' operator only once and at the end of the expression
  21. % (not in an intermediate directory)
  22. %
  23. % on Mac and Linux, if we run into the too-many-files limitation of the ls command,
  24. % we will resort to the alternative mode described above, and this inherits the
  25. % same limitations.
  26. %
  27. % history:
  28. % 2017/01/31 - switch to using Keith Jamison's fullfilematch.m implementation
  29. % 2011/09/28 - if ls returns too many files, resort to alternative. also, the alternative mode now allows sorttype to be specified.
  30. % 2011/08/07 - allow empty matrix as an input
  31. % 2011/04/02 - now, works on Windows (in a limited way)
  32. % 2011/04/02 - oops, time-sorting behavior did not work. bad bug!!!
  33. % 2011/02/24 - escape spaces in patterns using \. this fixes buggy behavior.
  34. % 2011/01/21 - explicitly use MATLAB's sort function to ensure consistency across platforms.
  35. % input
  36. if ~exist('sorttype','var') || isempty(sorttype)
  37. sorttype = [];
  38. end
  39. % do it
  40. f = fullfilematch(patterns,[],sorttype);
  41. return;
  42. %%%%%%%%%%%%%%%% CLONE OF fullfilematch.m from Keith Jamison
  43. function files = fullfilematch(filestrings,case_sensitive,sorttype)
  44. % function files = fullfilematch(filestrings,[case_sensitive=true],[sorttype=''])
  45. %
  46. % Find files with wildcard matching.
  47. %
  48. % Inputs:
  49. % filestrings: string or cell array of strings with path(s) to search for
  50. % - Paths can include ? or * wildcards anywhere in string
  51. % case_sensitive (optional): Use case sensitive search? default=true
  52. % sorttype (optional): '' = alphabetical (default)
  53. % 't' = newest->oldest
  54. % 'tr' = 'oldest->newest'
  55. %
  56. % Outputs:
  57. % files: cell array of UNIQUE matching filenames
  58. %
  59. % Example:
  60. % > F=fullfilematch('~/somedir*/*.mat')
  61. % F =
  62. % '~/somedir/run1.mat'
  63. % '~/somedir/run2.mat'
  64. % '~/somedir/run3.mat'
  65. % '~/somedirA/run1.mat'
  66. % '~/somedirB/run1.mat'
  67. % KJ Update 10/18/2016: Overhaul to allow wildcards in middle of path, and
  68. % to add sorting options (for use with cvnlab code)
  69. % KJ Update 12/14/2016: Assume default directory='.' (pwd)
  70. if(nargin < 2 || ~exist('case_sensitive','var') || isempty(case_sensitive))
  71. case_sensitive = true;
  72. end
  73. if(nargin < 3 || ~exist('sorttype','var') || isempty(sorttype))
  74. sorttype = '';
  75. end
  76. if(ischar(case_sensitive))
  77. if(strcmpi(case_sensitive,'ignorecase'))
  78. case_sensitive = false;
  79. else
  80. case_sensitive = true;
  81. end
  82. end
  83. if(isempty(filestrings))
  84. files = [];
  85. return;
  86. end
  87. if(~iscell(filestrings))
  88. filestrings = {filestrings};
  89. end
  90. %make sure we can handle '\' filesep for Windows
  91. if(isequal(filesep,'\'))
  92. fsep='[/\\]';
  93. else
  94. fsep=filesep;
  95. end
  96. %% handle wildcards in the middle of path
  97. % eg: expand {'/data/experiment*/*.mat'}
  98. % -> {'/data/experiment1/*.mat'
  99. % '/data/experiment2/*.mat'
  100. % '/data/experiment3/*.mat'}
  101. filestrings0={};
  102. for f = 1:numel(filestrings)
  103. filestr = filestrings{f};
  104. if(isdir(filestr) || ~any(ismember(filestr,'*?')))
  105. files_tmp = {filestr};
  106. else
  107. fparts=regexp(filestr,fsep,'split');
  108. if(numel(fparts)==1)
  109. %if no directory separators in input, pass directly to next
  110. %step (eg: input is '*' or '*.mat')
  111. files_tmp=fparts;
  112. filestrings0=[filestrings0; files_tmp(:)];
  113. continue;
  114. end
  115. %if first character is '/', keep a '/' at the beginning of the new
  116. %string
  117. if(~isempty(regexp(filestr(1),fsep))) %#ok<RGXP1>
  118. files_tmp={'/'};
  119. else
  120. files_tmp={''};
  121. end
  122. %loop through DIRECTORIES in path. whenever we encounter a
  123. % wildcard, call fullfilematch on the parent directory to find
  124. % matching subdirectories, possibly returning multiple new
  125. % directories for the next level of the path (this is OK since
  126. % both fullfilematch() and strcat() can accept strings or cell
  127. % arrays of strings)
  128. for p = 1:numel(fparts)-1
  129. if(isempty(fparts{p}))
  130. continue;
  131. end
  132. if(any(ismember(fparts{p},'*?')))
  133. files_tmp=fullfilematch(strcat(files_tmp,fparts{p}),case_sensitive);
  134. else
  135. files_tmp=strcat(files_tmp,fparts{p});
  136. end
  137. if(isempty(files_tmp))
  138. break;
  139. end
  140. files_tmp=strcat(files_tmp,'/');
  141. end
  142. if(~isempty(files_tmp))
  143. %prune final list to only include directories, then tack on the
  144. % filename part (which may include wildcards) to all, before
  145. % continuing on to the file-name wildcard search
  146. files_tmp=files_tmp(cellfun(@isdir,files_tmp));
  147. files_tmp=strcat(files_tmp,fparts{end});
  148. end
  149. end
  150. filestrings0=[filestrings0; files_tmp(:)];
  151. end
  152. % new filestrings is a cell array that may include many more entries than
  153. % the input if there were directory wildcards
  154. filestrings=filestrings0;
  155. %% main filename wildcard matching for each filestring
  156. % (only operates on the last path element., ie: the file name)
  157. % eg: expand {'/data/experiment/*.mat'}
  158. % -> {'/data/experiment/run1.mat'
  159. % '/data/experiment/run2.mat'}
  160. files = {};
  161. filedates = {};
  162. for f = 1:numel(filestrings)
  163. [files_tmp,filedates_tmp] = aux_fullfilematch(filestrings{f},case_sensitive);
  164. files=[files(:); files_tmp(:)];
  165. filedates=[filedates(:); filedates_tmp(:)];
  166. end
  167. % remove duplicate filenames
  168. [~,iu] = unique(files);
  169. files=files(iu);
  170. filedates=filedates(iu);
  171. % sort by filename or by date
  172. if strcmpi(sorttype,'t')
  173. [~,ii] = sort(cat(2,filedates{:}),2,'descend');
  174. elseif strcmpi(sorttype,'tr')
  175. [~,ii] = sort(cat(2,filedates{:}));
  176. elseif strcmpi(sorttype,'none')
  177. ii=1:numel(files);
  178. else
  179. [~,ii] = sort(cat(2,files));
  180. end
  181. files = files(ii);
  182. %% helper function that does the work to match individual filestrings
  183. % returns filenames and dates to allow date-sorting in main function
  184. function [files,filedates] = aux_fullfilematch(filestr,case_sensitive)
  185. if(isdir(filestr))
  186. files = {filestr};
  187. filestruct=dir(filestr);
  188. %pretty sure '.' is always first, but just in case....
  189. i=find(strcmp({filestruct.name},'.'),1,'first');
  190. filedates=filestruct(i).datenum;
  191. return;
  192. end
  193. [filedir,fpattern,fext] = fileparts(filestr);
  194. fpattern = strrep([fpattern fext],'*','.*');
  195. fpattern = strrep(fpattern,'?','.');
  196. fpattern = strrep(fpattern,'(','\(');
  197. fpattern = strrep(fpattern,')','\)');
  198. fpattern = ['^' fpattern '$'];
  199. removeprefix='';
  200. if(isempty(filedir))
  201. filedir='.';
  202. removeprefix='./';
  203. end
  204. filestruct = dir(filedir);
  205. if(numel(filestruct) == 1 && filestruct(1).isdir)
  206. [filedir2,~,~] = fileparts(filedir);
  207. if(filedir2(end)~='/')
  208. filedir2=[filedir2 '/'];
  209. end
  210. filedir = strcat(filedir2,filestruct(1).name);
  211. if(~isdir(filedir))
  212. files=[];
  213. filedates=[];
  214. return;
  215. end
  216. filestruct = dir(filedir);
  217. end
  218. if(isempty(filestruct))
  219. files = [];
  220. filedates=[];
  221. return;
  222. end
  223. filenames = {filestruct.name};
  224. filedates = {filestruct.datenum};
  225. notdots=~cellfun(@(x)(all(x=='.')),filenames);
  226. filenames = filenames(notdots);
  227. filedates = filedates(notdots);
  228. if(case_sensitive)
  229. fmatch=~cellfun(@isempty,regexpi(filenames,fpattern,'matchcase'));
  230. else
  231. fmatch=~cellfun(@isempty,regexpi(filenames,fpattern));
  232. end
  233. filenames = filenames(fmatch);
  234. filedates = filedates(fmatch);
  235. if(isempty(filenames))
  236. files = [];
  237. filedates=[];
  238. return;
  239. end
  240. if(filedir(end)~='/')
  241. filedir=[filedir '/'];
  242. end
  243. if(~isempty(removeprefix) && strcmp(removeprefix,filedir))
  244. files_tmp=filenames;
  245. else
  246. files_tmp = strcat(filedir,filenames);
  247. end
  248. files_tmp = files_tmp(:);
  249. files=files_tmp(:);
  250. filedates=filedates(:);