Stat_ANOVA2_Mixed.m 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. function [SSQs, DFs, MSQs, Fs, Ps]=Stat_ANOVA2_Mixed(X,suppress_output)
  2. % simple function for mixed (between- and within-subjects) ANOVA
  3. %
  4. % Based loosely on BWAOV2 (http://www.mathworks.com/matlabcentral/fileexchange/5579-bwaov2) by Antonio Trujillo-Ortiz
  5. % (insofar as I used that function to figure out the basic equations, as it is apparently very hard to find documentation
  6. % on mixed-model ANOVAs on the Internet). However, the code is all original.
  7. %
  8. % The major advantage of this function over the existing BWAOV2 is that it corrects a bug that occurs when the groups
  9. % have unequal numbers of subjects, as pointed out in the Matlab Central File Exchange by Jaewon Hwang. The code is also,
  10. % at least in my opinion, much cleaner.
  11. %
  12. % At present this function only supports mixed models with a single between-subjects factor and a single within-subjects
  13. % (repeated measures) factor, each with as many levels as you like. I would be happy to add more bells and whistles in
  14. % future editions, such as the ability to define multiple factors, apply Mauchly's test and add in non-sphericity
  15. % corrections when necessary, etc. I'm a better programmer than I am a statistician, though, so if anyone out there would
  16. % like to lend a hand with the math (e.g., feed me the equations for these features), I'd be more than happy to implement
  17. % them. Email matthew DOT r DOT johnson AT aya DOT yale DOT edu
  18. %
  19. % Also feel free to modify this file for your own purposes and upload your changes to the Matlab Central File Exchange if
  20. % you like.
  21. %
  22. % I have checked this function against the example data in David M. Lane's HyperStat online textbook, which is the same
  23. % data that breaks BWAOV2 (http://davidmlane.com/hyperstat/questions/Chapter_14.html, question 6). I have also checked it
  24. % against SPSS and gotten identical results. However, I haven't tested every possible case so bugs may remain. Use at
  25. % your own risk. If you find bugs and let me know, I'm happy to try to fix them.
  26. %
  27. % ===============
  28. % USAGE
  29. % ===============
  30. %
  31. % Inputs:
  32. %
  33. % X: design matrix with four columns (future versions may allow different input configurations)
  34. % - first column (i.e., X(:,1)) : all dependent variable values
  35. % - second column (i.e., X(:,2)) : between-subjects factor (e.g., subject group) level codes (ranging from 1:L where
  36. % L is the # of levels for the between-subjects factor)
  37. % - third column (i.e., X(:,3)) : within-subjects factor (e.g., condition/task) level codes (ranging from 1:L where
  38. % L is the # of levels for the within-subjects factor)
  39. % - fourth column (i.e., X(:,4)) : subject codes (ranging from 1:N where N is the total number of subjects)
  40. %
  41. % suppress_output: defaults to 0 (meaning it displays the ANOVA table as output). If you don't want to display the table,
  42. % just pass in a non-zero value
  43. %
  44. % Outputs:
  45. %
  46. % SSQs, DFs, MSQs, Fs, Ps : Sum of squares, degrees of freedom, mean squares, F-values, P-values. All the same values
  47. % that are shown in the table if you choose to display it. All will be cell matrices. Values within will be in the same
  48. % order that they are shown in the output table.
  49. %
  50. % Enjoy! -MJ
  51. % Example by JSW
  52. % dHP_Pre=[PopulationAvgFr.Pre_dHP_PYR_1]';
  53. % dHP_Main=[PopulationAvgFr.Main_dHP_PYR_1]';
  54. % iHP_Pre=[PopulationAvgFr.Pre_iHP_PYR_1]';
  55. % iHP_Main=[PopulationAvgFr.Main_iHP_PYR_1]';
  56. %
  57. % Y = [dHP_Pre; dHP_Main; iHP_Pre; iHP_Main];
  58. % BS = [GetGroupingVar(dHP_Pre,1); GetGroupingVar(dHP_Main,1); GetGroupingVar(iHP_Pre,2); GetGroupingVar(iHP_Main,2)];
  59. % WS = [GetGroupingVar(dHP_Pre,1); GetGroupingVar(dHP_Main,2); GetGroupingVar(iHP_Pre,1); GetGroupingVar(iHP_Main,2)];
  60. % S = [GetGroupingVar(dHP_Pre); GetGroupingVar(dHP_Main); GetGroupingVar(iHP_Pre)+length(dHP_Pre); GetGroupingVar(iHP_Main)+length(dHP_Pre)];
  61. % X = [Y BS WS S];
  62. % [SSQs, DFs, MSQs, Fs, Ps] = Stat_ANOVA2_Mixed(X);
  63. if nargin < 1,
  64. error('No input');
  65. end;
  66. if nargin < 2 || isempty(suppress_output)
  67. suppress_output=0;
  68. end
  69. all_dvs=X(:,1);
  70. all_bs_labels=X(:,2);
  71. all_ws_labels=X(:,3);
  72. all_subj_labels=X(:,4);
  73. bs_levels=sort(unique(all_bs_labels));
  74. ws_levels=sort(unique(all_ws_labels));
  75. subj_levels=sort(unique(all_subj_labels));
  76. n_bs_levels=length(bs_levels);
  77. n_ws_levels=length(ws_levels);
  78. n_subjects=length(subj_levels);
  79. if any(bs_levels(:)~=(1:n_bs_levels)') || any(ws_levels(:)~=(1:n_ws_levels)') || any(subj_levels(:)~=(1:n_subjects)')
  80. error('Levels of factors (including subject labels) must be numbered 1:L (where L is the # of levels for that factor');
  81. end
  82. for i=1:n_bs_levels
  83. for j=1:n_ws_levels
  84. this_cell_inds=find(all_bs_labels==i & all_ws_labels==j);
  85. if isempty(this_cell_inds)
  86. error('At least one empty cell found');
  87. end
  88. n_subs_per_cell(i,j)=length(this_cell_inds); %#ok<AGROW>
  89. cell_totals(i,j)=sum(all_dvs(this_cell_inds)); %#ok<AGROW>
  90. end
  91. if any(n_subs_per_cell(i,:)~=n_subs_per_cell(i,1))
  92. error('At least one subject missing at least one repeated measure (or is possibly entered more than once)');
  93. %technically this is not a failsafe check, as it could be that subject ! has only conditions A & B,
  94. % whereas subject 2 has only condition C, which still gives equal values for all the conditions.
  95. % We'll double-check for that circumstance below
  96. end
  97. end
  98. for k=1:n_subjects
  99. this_subj_inds=find(all_subj_labels==k);
  100. if length(this_subj_inds)~=n_ws_levels
  101. %our second check for this issue
  102. error('At least one subject missing at least one repeated measure (or is possibly entered more than once)');
  103. end
  104. subj_totals(k)=sum(all_dvs(this_subj_inds)); %#ok<AGROW>
  105. end
  106. correction_term = sum(all_dvs)^2 / length(all_dvs);
  107. SStot = sum(all_dvs.^2) - correction_term;
  108. %don't really need this for calculations, but can uncomment if we want to print
  109. % DFtot = length(all_dvs) - 1; %total degrees of freedom
  110. %subject "factor" (i.e. differences in subject means)
  111. SSsub = sum(subj_totals .^ 2)/n_ws_levels - correction_term;
  112. %between-subjects factor
  113. SStmp=[];
  114. for i=1:n_bs_levels
  115. SStmp(i)=(sum(cell_totals(i,:))^2) / sum(n_subs_per_cell(i,:)); %#ok<AGROW>
  116. end
  117. SSbs = sum(SStmp) - correction_term;
  118. DFbs = n_bs_levels - 1;
  119. MSbs = SSbs / DFbs;
  120. %error terms for between-subjects factor
  121. ERRbs = SSsub - SSbs;
  122. DFERRbs = n_subjects - n_bs_levels;
  123. MSERRbs = ERRbs / DFERRbs;
  124. %correction with harmonic mean of cell sizes if cell sizes are not all equal
  125. n_subs_hm=harmmean(n_subs_per_cell(:));
  126. cell_totals_hm = (cell_totals ./ n_subs_per_cell) * n_subs_hm;
  127. correction_term_hm = sum(cell_totals_hm(:))^2 / (n_subs_hm * n_bs_levels * n_ws_levels);
  128. n_subs_per_cell_hm = ones(n_bs_levels,n_ws_levels) * n_subs_hm;
  129. %within-subjects factor
  130. SStmp=[];
  131. for j=1:n_ws_levels
  132. SStmp(j)=(sum(cell_totals_hm(:,j))^2) ./ sum(n_subs_per_cell_hm(:,j)); %#ok<AGROW>
  133. end
  134. SSws = sum(SStmp) - correction_term_hm;
  135. DFws = n_ws_levels - 1;
  136. MSws = SSws / DFws;
  137. %uncorrected version of within-subjects factor for calculating interaction
  138. SStmp=[];
  139. for j=1:n_ws_levels
  140. SStmp(j)=(sum(cell_totals(:,j))^2) ./ sum(n_subs_per_cell(:,j)); %#ok<AGROW>
  141. end
  142. SSws_unc = sum(SStmp) - correction_term;
  143. %interaction of between-subjects and within-subjects factor
  144. SStmp = sum((cell_totals(:) .^ 2) ./ n_subs_per_cell(:));
  145. SSint = SStmp - SSbs - SSws_unc - correction_term;
  146. DFint = DFbs * DFws;
  147. MSint = SSint / DFint;
  148. %error terms (for both within-subjects factor and interaction)
  149. ERRws = SStot - SSbs - ERRbs - SSws_unc - SSint;
  150. DFERRws = DFERRbs * DFws;
  151. MSERRws = ERRws / DFERRws;
  152. %F-values
  153. Fbs = MSbs / MSERRbs;
  154. Fws = MSws / MSERRws;
  155. Fint = MSint / MSERRws;
  156. %P-values
  157. Pbs = 1-fcdf(Fbs, DFbs, DFERRbs);
  158. Pws = 1-fcdf(Fws, DFws, DFERRws);
  159. Pint = 1-fcdf(Fint,DFint,DFERRws);
  160. if ~suppress_output
  161. disp(' ');
  162. disp(' ');
  163. fprintf(1,' Source SSq df MSq F p \n');
  164. fprintf(1,'-------------------------------------------------------------------------------------\n');
  165. fprintf(1,' Between-subjects factor %9.3f %9i %9.3f %9.3f %9.7f\n',SSbs,DFbs,MSbs,Fbs,Pbs);
  166. fprintf(1,' Between-subjects error %9.3f %9i %9.3f\n',ERRbs,DFERRbs,MSERRbs);
  167. fprintf(1,' Within-subjects factor %9.3f %9i %9.3f %9.3f %9.7f\n',SSws,DFws,MSws,Fws,Pws);
  168. fprintf(1,' Within x between int. %9.3f %9i %9.3f %9.3f %9.7f\n',SSint,DFint,MSint,Fint,Pint);
  169. fprintf(1,' Within-subjects error %9.3f %9i %9.3f\n',ERRws,DFERRws,MSERRws);
  170. disp(' ');
  171. disp(' ');
  172. end
  173. SSQs = { SSbs; ERRbs; SSws; SSint; ERRws };
  174. DFs = { DFbs; DFERRbs; DFws; DFint; DFERRws };
  175. MSQs = { MSbs; MSERRbs; MSws; MSint; MSERRws };
  176. Fs = { Fbs; []; Fws; Fint; [] };
  177. Ps = { Pbs; []; Pws; Pint; [] };