fdr_bh.m 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. % fdr_bh() - Executes the Benjamini & Hochberg (1995) and the Benjamini &
  2. % Yekutieli (2001) procedure for controlling the false discovery
  3. % rate (FDR) of a family of hypothesis tests. FDR is the expected
  4. % proportion of rejected hypotheses that are mistakenly rejected
  5. % (i.e., the null hypothesis is actually true for those tests).
  6. % FDR is a somewhat less conservative/more powerful method for
  7. % correcting for multiple comparisons than procedures like Bonferroni
  8. % correction that provide strong control of the family-wise
  9. % error rate (i.e., the probability that one or more null
  10. % hypotheses are mistakenly rejected).
  11. %
  12. % This function also returns the false coverage-statement rate
  13. % (FCR)-adjusted selected confidence interval coverage (i.e.,
  14. % the coverage needed to construct multiple comparison corrected
  15. % confidence intervals that correspond to the FDR-adjusted p-values).
  16. %
  17. %
  18. % Usage:
  19. % >> [h, crit_p, adj_ci_cvrg, adj_p]=fdr_bh(pvals,q,method,report);
  20. %
  21. % Required Input:
  22. % pvals - A vector or matrix (two dimensions or more) containing the
  23. % p-value of each individual test in a family of tests.
  24. %
  25. % Optional Inputs:
  26. % q - The desired false discovery rate. {default: 0.05}
  27. % method - ['pdep' or 'dep'] If 'pdep,' the original Bejnamini & Hochberg
  28. % FDR procedure is used, which is guaranteed to be accurate if
  29. % the individual tests are independent or positively dependent
  30. % (e.g., Gaussian variables that are positively correlated or
  31. % independent). If 'dep,' the FDR procedure
  32. % described in Benjamini & Yekutieli (2001) that is guaranteed
  33. % to be accurate for any test dependency structure (e.g.,
  34. % Gaussian variables with any covariance matrix) is used. 'dep'
  35. % is always appropriate to use but is less powerful than 'pdep.'
  36. % {default: 'pdep'}
  37. % report - ['yes' or 'no'] If 'yes', a brief summary of FDR results are
  38. % output to the MATLAB command line {default: 'no'}
  39. %
  40. %
  41. % Outputs:
  42. % h - A binary vector or matrix of the same size as the input "pvals."
  43. % If the ith element of h is 1, then the test that produced the
  44. % ith p-value in pvals is significant (i.e., the null hypothesis
  45. % of the test is rejected).
  46. % crit_p - All uncorrected p-values less than or equal to crit_p are
  47. % significant (i.e., their null hypotheses are rejected). If
  48. % no p-values are significant, crit_p=0.
  49. % adj_ci_cvrg - The FCR-adjusted BH- or BY-selected
  50. % confidence interval coverage. For any p-values that
  51. % are significant after FDR adjustment, this gives you the
  52. % proportion of coverage (e.g., 0.99) you should use when generating
  53. % confidence intervals for those parameters. In other words,
  54. % this allows you to correct your confidence intervals for
  55. % multiple comparisons. You can NOT obtain confidence intervals
  56. % for non-significant p-values. The adjusted confidence intervals
  57. % guarantee that the expected FCR is less than or equal to q
  58. % if using the appropriate FDR control algorithm for the
  59. % dependency structure of your data (Benjamini & Yekutieli, 2005).
  60. % FCR (i.e., false coverage-statement rate) is the proportion
  61. % of confidence intervals you construct
  62. % that miss the true value of the parameter. adj_ci=NaN if no
  63. % p-values are significant after adjustment.
  64. % adj_p - All adjusted p-values less than or equal to q are significant
  65. % (i.e., their null hypotheses are rejected). Note, adjusted
  66. % p-values can be greater than 1.
  67. %
  68. %
  69. % References:
  70. % Benjamini, Y. & Hochberg, Y. (1995) Controlling the false discovery
  71. % rate: A practical and powerful approach to multiple testing. Journal
  72. % of the Royal Statistical Society, Series B (Methodological). 57(1),
  73. % 289-300.
  74. %
  75. % Benjamini, Y. & Yekutieli, D. (2001) The control of the false discovery
  76. % rate in multiple testing under dependency. The Annals of Statistics.
  77. % 29(4), 1165-1188.
  78. %
  79. % Benjamini, Y., & Yekutieli, D. (2005). False discovery rate?adjusted
  80. % multiple confidence intervals for selected parameters. Journal of the
  81. % American Statistical Association, 100(469), 71?81. doi:10.1198/016214504000001907
  82. %
  83. %
  84. % Example:
  85. % nullVars=randn(12,15);
  86. % [~, p_null]=ttest(nullVars); %15 tests where the null hypothesis
  87. % %is true
  88. % effectVars=randn(12,5)+1;
  89. % [~, p_effect]=ttest(effectVars); %5 tests where the null
  90. % %hypothesis is false
  91. % [h, crit_p, adj_ci_cvrg, adj_p]=fdr_bh([p_null p_effect],.05,'pdep','yes');
  92. % data=[nullVars effectVars];
  93. % fcr_adj_cis=NaN*zeros(2,20); %initialize confidence interval bounds to NaN
  94. % if ~isnan(adj_ci_cvrg),
  95. % sigIds=find(h);
  96. % fcr_adj_cis(:,sigIds)=tCIs(data(:,sigIds),adj_ci_cvrg); % tCIs.m is available on the
  97. % %Mathworks File Exchagne
  98. % end
  99. %
  100. %
  101. % For a review of false discovery rate control and other contemporary
  102. % techniques for correcting for multiple comparisons see:
  103. %
  104. % Groppe, D.M., Urbach, T.P., & Kutas, M. (2011) Mass univariate analysis
  105. % of event-related brain potentials/fields I: A critical tutorial review.
  106. % Psychophysiology, 48(12) pp. 1711-1725, DOI: 10.1111/j.1469-8986.2011.01273.x
  107. % http://www.cogsci.ucsd.edu/~dgroppe/PUBLICATIONS/mass_uni_preprint1.pdf
  108. %
  109. %
  110. % For a review of FCR-adjusted confidence intervals (CIs) and other techniques
  111. % for adjusting CIs for multiple comparisons see:
  112. %
  113. % Groppe, D.M. (in press) Combating the scientific decline effect with
  114. % confidence (intervals). Psychophysiology.
  115. % http://biorxiv.org/content/biorxiv/early/2015/12/10/034074.full.pdf
  116. %
  117. %
  118. % Author:
  119. % David M. Groppe
  120. % Kutaslab
  121. % Dept. of Cognitive Science
  122. % University of California, San Diego
  123. % March 24, 2010
  124. %%%%%%%%%%%%%%%% REVISION LOG %%%%%%%%%%%%%%%%%
  125. %
  126. % 5/7/2010-Added FDR adjusted p-values
  127. % 5/14/2013- D.H.J. Poot, Erasmus MC, improved run-time complexity
  128. % 10/2015- Now returns FCR adjusted confidence intervals
  129. function [h, crit_p, adj_ci_cvrg, adj_p]=fdr_bh(pvals,q,method,report)
  130. if nargin<1,
  131. error('You need to provide a vector or matrix of p-values.');
  132. else
  133. if ~isempty(find(pvals<0,1)),
  134. error('Some p-values are less than 0.');
  135. elseif ~isempty(find(pvals>1,1)),
  136. error('Some p-values are greater than 1.');
  137. end
  138. end
  139. if nargin<2,
  140. q=.05;
  141. end
  142. if nargin<3,
  143. method='pdep';
  144. end
  145. if nargin<4,
  146. report='no';
  147. end
  148. s=size(pvals);
  149. if (length(s)>2) || s(1)>1,
  150. [p_sorted, sort_ids]=sort(reshape(pvals,1,prod(s)));
  151. else
  152. %p-values are already a row vector
  153. [p_sorted, sort_ids]=sort(pvals);
  154. end
  155. [dummy, unsort_ids]=sort(sort_ids); %indexes to return p_sorted to pvals order
  156. m=length(p_sorted); %number of tests
  157. if strcmpi(method,'pdep'),
  158. %BH procedure for independence or positive dependence
  159. thresh=(1:m)*q/m;
  160. wtd_p=m*p_sorted./(1:m);
  161. elseif strcmpi(method,'dep')
  162. %BH procedure for any dependency structure
  163. denom=m*sum(1./(1:m));
  164. thresh=(1:m)*q/denom;
  165. wtd_p=denom*p_sorted./[1:m];
  166. %Note, it can produce adjusted p-values greater than 1!
  167. %compute adjusted p-values
  168. else
  169. error('Argument ''method'' needs to be ''pdep'' or ''dep''.');
  170. end
  171. if nargout>3,
  172. %compute adjusted p-values; This can be a bit computationally intensive
  173. adj_p=zeros(1,m)*NaN;
  174. [wtd_p_sorted, wtd_p_sindex] = sort( wtd_p );
  175. nextfill = 1;
  176. for k = 1 : m
  177. if wtd_p_sindex(k)>=nextfill
  178. adj_p(nextfill:wtd_p_sindex(k)) = wtd_p_sorted(k);
  179. nextfill = wtd_p_sindex(k)+1;
  180. if nextfill>m
  181. break;
  182. end;
  183. end;
  184. end;
  185. adj_p=reshape(adj_p(unsort_ids),s);
  186. end
  187. rej=p_sorted<=thresh;
  188. max_id=find(rej,1,'last'); %find greatest significant pvalue
  189. if isempty(max_id),
  190. crit_p=0;
  191. h=pvals*0;
  192. adj_ci_cvrg=NaN;
  193. else
  194. crit_p=p_sorted(max_id);
  195. h=pvals<=crit_p;
  196. adj_ci_cvrg=1-thresh(max_id);
  197. end
  198. if strcmpi(report,'yes'),
  199. n_sig=sum(p_sorted<=crit_p);
  200. if n_sig==1,
  201. fprintf('Out of %d tests, %d is significant using a false discovery rate of %f.\n',m,n_sig,q);
  202. else
  203. fprintf('Out of %d tests, %d are significant using a false discovery rate of %f.\n',m,n_sig,q);
  204. end
  205. if strcmpi(method,'pdep'),
  206. fprintf('FDR/FCR procedure used is guaranteed valid for independent or positively dependent tests.\n');
  207. else
  208. fprintf('FDR/FCR procedure used is guaranteed valid for independent or dependent tests.\n');
  209. end
  210. end