permutest.m 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. function [clusters, p_values, t_sums, permutation_distribution ] = permutest( trial_group_1, trial_group_2, dependent_samples, ...
  2. p_threshold, num_permutations, two_sided, num_clusters )
  3. % Permutation test for dependent or independent measures of 1-D or 2-D data.
  4. % Based on Maris & Oostenveld 2007 for 1-D and 2-D vectors. The test
  5. % statistic is T-Sum - the total of t-values within a cluster of contingent
  6. % above-threshold data points.
  7. % See: Maris, E., & Oostenveld, R. (2007). Nonparametric statistical
  8. % testing of EEG-and MEG-data. Journal of Neuroscience Methods, 164(1),
  9. % 177–190. https://doi.org/10.1016/j.jneumeth.2007.03.024
  10. %
  11. % Important notes:
  12. % * Make sure you understand whether you should be using a test of
  13. % dependent or independent samples (a within or between subjects test,
  14. % respectively). Also make sure you know if you want a one-sided or
  15. % two-sided test. e
  16. % * The number of permutation sets a minimal boundary for the resulting
  17. % p-value. This boundary is 1/(nP+1). If a very low p-value is needed, e.g.
  18. % to survive a multiple-comparisons correction, use a sufficiently high
  19. % number of permutations (this is also necessary in order to estimate the
  20. % p-value with sufficient accuracy unless there is a very small number of
  21. % trials).
  22. % * When runnning a two-sided test, there is no need to retroactively
  23. % correct the p-value, as this more lenient assumption is already reflected
  24. % in the way the null-hypothesis distribution is constructed.
  25. %
  26. % Syntax:
  27. % [clusters, p_values, t_sums, permutation_distribution ] = PERMUTEST(x,y)
  28. % runs a cluster-based permutation based for dependent samples, testing for
  29. % differences between x and y. x and y should be 2D or 3D matrices, where
  30. % the last dimension is the dimension across which the test variance is
  31. % defined (that is, trials or subjects). clusters is a cell array with each
  32. % cell holding the vector/matrix indexes corresponding to a specific
  33. % cluster (sorted by magnitude). p_values is the permutation p-value
  34. % corresponding to each cluster. t_sums is the sum of t-values of all data
  35. % points comprising each cluster. distribution is the T-Sum permutation
  36. % distribution (note that it will include many zero values; these
  37. % correspond to all the permutations where no above-threshold clusters were
  38. % found).
  39. % PERMUTEST(x,y,d), where d is set to true or false, determines whether the
  40. % test is for dependent samples. If set to false, it is assumed that x and
  41. % y are independent (non-paired) data sets. In this case, x and y can have
  42. % a different number of trials (though all other dimensions should be equal
  43. % in size). Default value is true.
  44. % PERMUTEST(x,y,d,p) sets p as the p-value threshold
  45. % below which a data point is part of a cluster. Lower values mean that the
  46. % test is sensitive to narrow, stronger effects; higher values make the
  47. % test sensitive to broad, weaker effects. The p-value is translated to a
  48. % t-value for the purpose of this test.
  49. % PERMUTEST(x,y,d,p,nP) sets nP as the number of permutations. The function
  50. % will check whether this number can be supported for the given number of
  51. % trials and test type.
  52. % PERMUTEST(x,y,d,p,nP,t), where t is set to true or false, determines
  53. % whther the test is a two-sided test. If set to true, negative differences
  54. % will be detected as well. Default value is false (one-sided test).
  55. % PERMUTEST(x,y,d,p,nP,t,nC) sets nC as the maximal number of significant
  56. % clusters to be detected. Default value is inf (all existing clusters will
  57. % be tested against the H0 distribution).
  58. %
  59. % Written by Edden M. Gerber, lab of Leon Y. Deouell, 2014
  60. % Send bug reports and requests to edden.gerber@gmail.com
  61. % INITIALIZE
  62. % Set optional arguments:
  63. if nargin < 7 || isempty(num_clusters)
  64. num_clusters = inf;
  65. end
  66. if nargin < 6 || isempty(two_sided)
  67. two_sided = false;
  68. end
  69. if nargin < 5 || isempty(num_permutations)
  70. num_permutations = 10^4;
  71. end
  72. if nargin < 4 || isempty(p_threshold)
  73. p_threshold = 0.05;
  74. end
  75. if nargin < 3 || isempty(dependent_samples)
  76. dependent_samples = true;
  77. end
  78. if nargin < 2
  79. error('Not enough input arguments');
  80. end
  81. % Check input dimensions:
  82. if ismatrix(trial_group_1)
  83. [num_data_points_1_x, num_trials_1] = size(trial_group_1);
  84. num_data_points_1_y = 1;
  85. elseif ndims(trial_group_1) == 3
  86. [num_data_points_1_x, num_data_points_1_y, num_trials_1] = size(trial_group_1);
  87. else
  88. error('Input variable "trial_group_1" needs to be 2D or 3D');
  89. end
  90. if ismatrix(trial_group_2)
  91. [num_data_points_2_x, num_trials_2] = size(trial_group_2);
  92. num_data_points_2_y = 1;
  93. elseif ndims(trial_group_2) == 3
  94. [num_data_points_2_x, num_data_points_2_y, num_trials_2] = size(trial_group_2);
  95. else
  96. error('Input variable "trial_group_2" needs to be 2D or 3D');
  97. end
  98. if dependent_samples
  99. num_trials = num_trials_1;
  100. if ~isequal(num_data_points_1_x,num_data_points_2_x) || ~isequal(num_data_points_1_y,num_data_points_2_y) ...
  101. || ~isequal(num_trials_1, num_trials_2)
  102. error('Size of all dimensions should be identical for two dependent samples');
  103. end
  104. else
  105. if ~isequal(num_data_points_1_x,num_data_points_2_x) || ~isequal(num_data_points_1_y,num_data_points_2_y)
  106. error('Size of all dimensions but the last one (corresponding to the number of trials) should be identical for two independent samples');
  107. end
  108. end
  109. num_data_points_x = num_data_points_1_x;
  110. num_data_points_y = num_data_points_1_y;
  111. % Check that number of requested permutations is possible:
  112. if dependent_samples
  113. % In each permutation, each paired sample could be flipped, meaning
  114. % that there are 2^num_trials possible permutation
  115. max_num_permutations = 2^num_trials;
  116. if num_permutations > max_num_permutations
  117. warning('With %d paired trials, only %d permutations are possible. Using this value instead of %d.',...
  118. num_trials, max_num_permutations, num_permutations);
  119. num_permutations = max_num_permutations;
  120. end
  121. else
  122. % For independent samples, the number of possible permutations is
  123. % nchoosek(num_trials_1+num_trials_2, num_trials_1) - since it is
  124. % assumed that the same trial group sizes are maintained across all
  125. % permutations.
  126. warning('off','MATLAB:nchoosek:LargeCoefficient'); % no need to alarm anyone with this warning
  127. max_num_permutations = nchoosek(num_trials_1+num_trials_2, num_trials_1);
  128. warning('on','MATLAB:nchoosek:LargeCoefficient')
  129. if num_permutations > max_num_permutations
  130. warning('With %d trials in group1 and %d trials in group2, only %d permutations are possible. Using this value instead of %d',...
  131. num_trials_1, num_trials_2, max_num_permutations, num_permutations);
  132. num_permutations = max_num_permutations;
  133. end
  134. end
  135. % Initialize output variables
  136. clusters = cell(1);
  137. p_values = ones(1);
  138. % Compute t-value threshold from p-value threshold
  139. if dependent_samples
  140. tThreshold = abs(tinv(p_threshold, num_trials-1));
  141. else
  142. tThreshold = abs(tinv(p_threshold, num_trials_1+num_trials_2-1));
  143. end
  144. % PRODUCE PERMUTATION VECTORS
  145. if num_permutations < max_num_permutations / 1000
  146. % there are at least 1000 times more possible permutations than the
  147. % requested number, so draw permutation patterns randomly.
  148. if dependent_samples
  149. % Dependent samples: randomly generate vectors of 1's and -1's. In
  150. % each permutation, each pair's difference will be multipled by
  151. % either 1 or -1.
  152. permutation_vectors = round(rand(num_trials,num_permutations)) * 2 - 1;
  153. else
  154. % Independent samples: randomly generate vectors of 1's and 2's. In
  155. % each pemutation, this vector will dictate to which group each
  156. % trial belongs (the number of trials from each group is identical
  157. % to that of the original samples).
  158. permutation_vectors = ones(num_trials_1+num_trials_2, num_permutations);
  159. for p = 1:num_permutations
  160. idx = randperm(num_trials_1+num_trials_2, num_trials_2);
  161. permutation_vectors(idx, p) = 2;
  162. end
  163. end
  164. else
  165. % The number of requested permutations is close to the maximum which
  166. % can be produced by the number of trials, so permutation sequences are
  167. % not randomly drawn but generated explicitly without repetition.
  168. if dependent_samples
  169. % Dependent samples: randomly generate vectors of 1's and -1's. In
  170. % each permutation, each pair's difference will be multipled by
  171. % either 1 or -1.
  172. % Initialize variable:
  173. permutation_vectors = NaN(num_trials,num_permutations);
  174. % generate a non-repeating list of permutations by taking a list of
  175. % non-repating numbers (up to nPermutations), and translating them
  176. % into binary (0's and 1's):
  177. rndB = dec2bin(randperm(2^num_trials,num_permutations)-1);
  178. % if the leading bit of all the numbers is 0, it will be truncated, so
  179. % we need to fill it in:
  180. nBits = size(rndB,2);
  181. if nBits < num_trials
  182. rndB(:,(num_trials-nBits+1):num_trials) = rndB;
  183. rndB(:,1:num_trials-nBits) = '0';
  184. end
  185. % translate the bits into -1 and 1:
  186. for ii=1:numel(permutation_vectors)
  187. permutation_vectors(ii) = str2double(rndB(ii)) * 2 - 1;
  188. end
  189. else
  190. % Independent samples: randomly generate vectors of 1's and 2's. In
  191. % each pemutation, this vector will dictate to which group each
  192. % trial belongs (the number of trials from each group is identical
  193. % to that of the original samples).
  194. % Initialize variable:
  195. permutation_vectors = ones(num_trials_1+num_trials_2,num_permutations);
  196. % Generate all possible combinations of num_trials_2 out of
  197. % (num_trials_1+num_trials_2) entries
  198. idx_matrix = nchoosek(1:(num_trials_1+num_trials_2),num_trials_2);
  199. % Randomly draw num_permutations out of them
  200. idx_matrix = idx_matrix(randperm(size(idx_matrix,1),num_permutations),:);
  201. % Generate the vectors of 1's and 2's using the above
  202. for p=1:num_permutations
  203. permutation_vectors(idx_matrix(p,:),p) = 2;
  204. end
  205. end
  206. end
  207. % RUN PRIMARY PERMUTATION
  208. t_value_vector = zeros(num_data_points_x,num_data_points_y);
  209. if dependent_samples
  210. % Dependent samples: one-sample t-test of the difference between the
  211. % two samples (compared to zero)
  212. if num_data_points_y == 1
  213. % one-dimensional trial data
  214. t_value_vector = simpleTTest(trial_group_1'-trial_group_2',0);
  215. else
  216. % two-dimensional trial data
  217. for ii = 1:num_data_points_y
  218. t_value_vector(:,ii) = simpleTTest(squeeze(trial_group_1(:,ii,:)-trial_group_2(:,ii,:))',0);
  219. end
  220. end
  221. else
  222. % Independent samples: two-sample t-test of the difference between the
  223. % 2 sample means
  224. if num_data_points_y == 1
  225. % one-dimensional trial data
  226. t_value_vector = simpleTTest2(trial_group_1',trial_group_2');
  227. else
  228. % two-dimensional trial data
  229. for ii = 1:num_data_points_y
  230. t_value_vector(:,ii) = simpleTTest2(squeeze(trial_group_1(:,ii,:))',squeeze(trial_group_2(:,ii,:))');
  231. end
  232. end
  233. end
  234. % Find the above-threshold clusters:
  235. CC = bwconncomp(t_value_vector > tThreshold,4);
  236. cMapPrimary = zeros(size(t_value_vector));
  237. tSumPrimary = zeros(CC.NumObjects,1);
  238. for i=1:CC.NumObjects
  239. cMapPrimary(CC.PixelIdxList{i}) = i;
  240. tSumPrimary(i) = sum(t_value_vector(CC.PixelIdxList{i}));
  241. end
  242. if two_sided % Also look for negative clusters
  243. n = CC.NumObjects;
  244. CC = bwconncomp(t_value_vector < -tThreshold,4);
  245. for i=1:CC.NumObjects
  246. cMapPrimary(CC.PixelIdxList{i}) = n+i;
  247. tSumPrimary(n+i) = sum(t_value_vector(CC.PixelIdxList{i}));
  248. end
  249. end
  250. % Sort clusters:
  251. [~,tSumIdx] = sort(abs(tSumPrimary),'descend');
  252. tSumPrimary = tSumPrimary(tSumIdx);
  253. % RUN PERMUTATIONS
  254. % We'll need this for creating shuffled trials for independent samples:
  255. if ~dependent_samples
  256. all_trials = cat(ndims(trial_group_1), trial_group_1, trial_group_2);
  257. end
  258. permutation_distribution = zeros(num_permutations,1);
  259. for p = 1:num_permutations
  260. if dependent_samples
  261. % Dependent samples: use a one-sample t-test of the difference
  262. % between the two samples (compared to zero)
  263. if num_data_points_y == 1
  264. % one-dimensional trial data
  265. D = bsxfun(@times,trial_group_1'-trial_group_2',permutation_vectors(:,p));
  266. t_value_vector = simpleTTest(D,0);
  267. else
  268. % two-dimensional trial data
  269. for ii = 1:num_data_points_y
  270. D = squeeze(trial_group_1(:,ii,:)-trial_group_2(:,ii,:))';
  271. D = bsxfun(@times,D,permutation_vectors(:,p));
  272. t_value_vector(:,ii) = simpleTTest(D,0);
  273. end
  274. end
  275. else
  276. % Independent samples: use a two-sample t-test of the difference
  277. % between the 2 sample means
  278. if num_data_points_y == 1
  279. % one-dimensional trial data
  280. p_trial_group_1 = all_trials(:,permutation_vectors(:,p)==1);
  281. p_trial_group_2 = all_trials(:,permutation_vectors(:,p)==2);
  282. t_value_vector = simpleTTest2(p_trial_group_1',p_trial_group_2');
  283. else
  284. % two-dimensional trial data
  285. p_trial_group_1 = all_trials(:,:,permutation_vectors(:,p)==1);
  286. p_trial_group_2 = all_trials(:,:,permutation_vectors(:,p)==2);
  287. for ii = 1:num_data_points_y
  288. t_value_vector(:,ii) = simpleTTest2(squeeze(p_trial_group_1(:,ii,:))',squeeze(p_trial_group_2(:,ii,:))');
  289. end
  290. end
  291. end
  292. % Find clusters:
  293. CC = bwconncomp(t_value_vector > tThreshold,4);
  294. tSum = zeros(CC.NumObjects,1);
  295. for i=1:CC.NumObjects
  296. tSum(i) = sum(t_value_vector(CC.PixelIdxList{i}));
  297. end
  298. if two_sided % Also look for negative clusters
  299. n = CC.NumObjects;
  300. CC = bwconncomp(t_value_vector < -tThreshold,4);
  301. for i=1:CC.NumObjects
  302. tSum(n+i) = sum(t_value_vector(CC.PixelIdxList{i}));
  303. end
  304. end
  305. if isempty(tSum)
  306. permutation_distribution(p) = 0;
  307. else
  308. [~,idx] = max(abs(tSum));
  309. permutation_distribution(p) = tSum(idx);
  310. end
  311. end
  312. %% DETERIMNE SIGNIFICANCE
  313. for clustIdx = 1:min(num_clusters,length(tSumPrimary))
  314. if two_sided
  315. ii = sum(abs(permutation_distribution) >= abs(tSumPrimary(clustIdx)));
  316. else
  317. ii = sum(permutation_distribution >= tSumPrimary(clustIdx));
  318. end
  319. clusters{clustIdx} = find(cMapPrimary == tSumIdx(clustIdx));
  320. p_values(clustIdx) = (ii+1) / (num_permutations+1);
  321. end
  322. % return regular arrays if only one cluster is requested
  323. t_sums = tSumPrimary;
  324. if num_clusters == 1
  325. clusters = clusters{1};
  326. permutation_distribution = permutation_distribution{1};
  327. end
  328. end
  329. function [t, df] = simpleTTest(x,m)
  330. %TTEST Hypothesis test: Compares the sample average to a constant.
  331. % [STATS] = TTEST(X,M) performs a T-test to determine
  332. % if a sample from a normal distribution (in X) could have mean M.
  333. % Modified from ttest function in statistical toolbox of Matlab
  334. % The modification is that it returns only t value and df.
  335. % The reason is that calculating the critical value that
  336. % passes the threshold via the tinv function takes ages in the original
  337. % function and therefore it slows down functions with many
  338. % iterations.
  339. % Written by Leon Deouell.
  340. %
  341. % Modified by Edden Gerber 19.11.2013: Added support for x being a matrix, where columns are
  342. % observations and rows are variables (output is a vector).
  343. if nargin < 1,
  344. error('Requires at least one input argument.');
  345. end
  346. if nargin < 2
  347. m = 0;
  348. end
  349. samplesize = size(x,1);
  350. xmean = sum(x)/samplesize; % works faster then mean
  351. % compute std (based on the std function, but without unnecessary stages
  352. % which make that function general, but slow (especially using repmat)
  353. xc = bsxfun(@minus,x,xmean); % Remove mean
  354. xstd = sqrt(sum(conj(xc).*xc,1)/(samplesize-1));
  355. ser = xstd ./ sqrt(samplesize);
  356. tval = (xmean - m) ./ ser;
  357. % stats = struct('tstat', tval, 'df', samplesize-1);
  358. t = tval;
  359. df = samplesize-1;
  360. end
  361. function [t, df] = simpleTTest2(x1,x2)
  362. % A 2-sample t-test which computes only the t-value (and degrees of
  363. % freedom), skipping the very time-consuming p-value computation. This
  364. % function is good for permutation tests which need to compute t-values a
  365. % large number of times as fast as possible.
  366. % If using input matrices, different variables should be along the rows
  367. % (each column is a single variable).
  368. %
  369. % Written by Edden Gerber, March 2016.
  370. if nargin < 2,
  371. error('Requires at least two input argument.');
  372. end
  373. n1 = size(x1,1);
  374. n2 = size(x2,1);
  375. xmean1 = sum(x1)/n1; % works faster then mean
  376. xmean2 = sum(x2)/n2; % works faster then mean
  377. % compute std (based on the std function, but without unnecessary stages
  378. % which make that function general, but slow (especially using repmat)
  379. xc = bsxfun(@minus,x1,xmean1); % Remove mean
  380. xstd1 = sqrt(sum(conj(xc).*xc,1)/(n1-1));
  381. xc = bsxfun(@minus,x2,xmean2); % Remove mean
  382. xstd2 = sqrt(sum(conj(xc).*xc,1)/(n2-1));
  383. sx1x2 = sqrt(xstd1.^2/n1 + xstd2.^2/n2);
  384. t = (xmean1 - xmean2) ./ sx1x2;
  385. df = n1+n2-2;
  386. end