clear all clc close all % % %% loading the processed data files (takes a while, its big) load('data/2fcasts_members_questions_extracted_data.mat'); team2start = 11; %% removing the values for which there is no brier (because of minimum requirements from previous script, like minimum amount of answers) for team=1:size(forecast_time2close_all,1) for question=1:491 N = numel(group_briers_all {team,question}); if numel(group_briers_all {team,question}) == 0 group_forecasts_all{team,question} = [] ; group_members_all{team,question} = [] ; group_timestamps_all{team,question} = [] ; forecast_time2close_all{team,question} = [] ; end end end %% creating the variables for the data table for t=team2start:90 team4table{1,t} = repmat(t, numel(cat(2,group_briers_all{t,:})) ,1); question4table = []; for q = 1:size(questions2analyze_all{t},1) question4table = vertcat(question4table, repmat(questions2analyze_all{t}(q), numel(group_briers_all{t,q}) ,1)); end question4table_all{1,t} = question4table; member4table{1,t} = cat(1,group_members_all{t,:}); fcast4table{1,t} = cat(1,group_forecasts_all{t,:}); brier4table{1,t} = cat(2,group_briers_all{t,:})'; timestamp4table{1,t} = cat(1,group_timestamps_all{t,:}); time2close4table {1,t} = cat(2,forecast_time2close_all{t,:})'; end %% creating the data table team = cat(1,team4table{1,:}); question_code = cat(1,question4table_all{1,:}); question_n = nan(numel(question_code),1); unique_questions = unique(question_code); for i = 1:numel(unique_questions) question_n (ismember(question_code, unique_questions{i} )) = i; end member = cat(1,member4table{1,:}); fcast = cat(1,fcast4table{1,:}); brier = cat(1,brier4table{1,:}); timestamp = cat(1,timestamp4table{1,:}); time2close = cat(1,time2close4table{1,:}); Data = table(team,member,question_code,question_n,fcast,brier,timestamp,time2close); Data(Data.team team_avg_brier(t) WilcoxonPerformance (m) = 1; %%% if member is worse than team elseif Wilcoxon (m) > 0.05/N_comparisons WilcoxonPerformance (m) = 0; %%% if member is same than team end end Wilcoxon_team_vs_members_pvalues{t} = Wilcoxon; Wilcoxon_team_vs_members_performance{t} = WilcoxonPerformance; end %% members per team figure figure subplot 121 plot (1:10,membersperteam_n(1:10),'s-') ylabel('# of members') xlabel('team') subplot 122 plot (11:90,membersperteam_n(11:90),'s-') xlabel('team') %% members per team figure figure; set(gcf,'color','w') semilogy (1:90,membersperteam_n,'.-', 'linewidth' , 2,'markersize',15) hold on plot ([1 90],[70 70], 'linewidth' , 2) xlim([0 91]) set(gca,'XTick',[10:10: 90]) xlim ([0.5 90.5]) set(gca,'XTickLabel',{[10:10:90]}) xlabel('Team') ylabel('# members') set(gca,'fontsize',20) %% figure of proportion of team members diffferent performance than team mean for i = team2start:90 t_perf = Wilcoxon_team_vs_members_performance{i}; t_better(i) = sum (t_perf == -1) / numel(t_perf); t_worse(i) = sum (t_perf == 1)/ numel(t_perf); % t_better(i) = sum (t_perf == -1); % t_worse(i) = sum (t_perf == 1); t_diff(i) = t_better(i) + t_worse(i); end figure; set(gcf,'color','w'); plot (1:90,t_better,1:90,t_worse) legend('Better than team average','Worse than team average','location','northeast') xlim([0 91]) set(gca,'XTick',[10:10: 90]) xlim ([0.5 90.5]) set(gca,'XTickLabel',{[10:10:90]}) xlabel('Team') ylabel('Proportion of team members') set(gca,'FontSize',20) sum(t_diff==0) %% concatenated_wilcoxonperfs = cat(2,Wilcoxon_team_vs_members_performance{1,:}); disp([ 'Check that the N for the pvalue is = ' num2str(numel(cat(2,Wilcoxon_team_vs_members_performance{1,:})))] ) Nbetter = sum(concatenated_wilcoxonperfs == -1); Nworse = sum(concatenated_wilcoxonperfs == 1); Nequal = sum(concatenated_wilcoxonperfs == 0); disp(['N members better than their team = ' num2str(Nbetter)]) disp(['N members worse than their team = ' num2str(Nworse)]) disp(['N members equal than their team = ' num2str(Nequal)]) %% creating the new variables to analyze % confidence and absolute performance % confidence4table = nan(size(Data,1),1); abs_performance4table = nan(size(Data,1),1); confidence4table = abs(Data.fcast-0.5); abs_performance4table (Data.brier < 0.5) = 1; abs_performance4table (Data.brier > 0.5) = 0; abs_performance4table (Data.brier == 0.5) = -1; Data.confidence = confidence4table; Data.abs_performance = abs_performance4table; %% calculating correct outcomes for later. vector with 1 if q outcome 1, 0 if 0. Corresponding to the probability forecast. outcome_vector = nan(size(Data,1),1); outcome_vector(Data.fcast==0.5) = -1; outcome_vector(Data.abs_performance==1 & Data.fcast<0.5) = 0; outcome_vector(Data.abs_performance==0 & Data.fcast<0.5) = 1; outcome_vector(Data.abs_performance==0 & Data.fcast>0.5) = 0; outcome_vector(Data.abs_performance==1 & Data.fcast>0.5) = 1; Data.q_outcome = outcome_vector; %% descriptive histograms. Sanity check. should be histograms with -1 0 1 for performance and values between 0 and 0.5 for confidence figure; set(gcf,'color','w'); subplot 121 histogram (Data.abs_performance,'normalization','cdf') xlabel ('forecast absolute performance') ylabel ('Cumulative Probability') subplot 122 histogram(Data.confidence,100,'normalization','pdf') xlabel ('Forecast Confidence') %% windowing confidence and absolute performance tw_start = 110:-10:30; tw_end = 80:-10:0; for tw = 1:numel(tw_start) clear time_index time_index = Data.time2close < tw_start(tw) & Data.time2close > tw_end(tw); abs_perf_window(tw) = mean(Data.abs_performance(time_index & Data.fcast ~= 0.5)); confidence_mean_window(tw) = mean(Data.confidence(time_index)); confidence_se_window(tw) = std(Data.confidence(time_index)) / sqrt(numel(Data.confidence(time_index))); brier_mean_window(tw) = mean(Data.brier(time_index)); brier_se_window(tw) = std(Data.brier(time_index)) / sqrt(numel(Data.brier(time_index))); end %% figure of absolute performance and confidence across time windows figure; set(gcf,'color','w'); subplot 211 plot(1:9,abs_perf_window) set(gca,'XTick',[1:9]) xlim ([0.5 9.5]) set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'}) xlabel('Interval of days before question closure') ylabel('Proportion of correct absolute forecasts') subplot 212 errorbar(1:9,confidence_mean_window,confidence_se_window) ylim([0.28 0.4]) xlim ([0.5 9.5]) set(gca,'XTick',[1:9]) set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'}) xlabel('Interval of days before question closure') ylabel('Confidence Score') %% calculating the two performances per team per window min_answers = 5; for tw = 1:numel(tw_start) time_index = Data.time2close < tw_start(tw) & Data.time2close > tw_end(tw); Performance1_window_raw = nan(90,382); Performance2_window_raw = nan(90,382); Consensus_fcast_window_raw = nan(90,382); for t = team2start:90 for q = 1:max(Data.question_n) clear values valuesP1 tans_val values = Data.brier(Data.team == t & Data.question_n == q & time_index); valuesP1 = Data.fcast(Data.team == t & Data.question_n == q & time_index); if numel(values) >= min_answers Performance2_window_raw(t,q) = nanmean(values); outcome4briercalc = max( unique(Data.q_outcome(Data.question_n==q)) ); Performance1_window_raw(t,q) = BrierScoreCalc(nanmean(valuesP1),outcome4briercalc); Consensus_fcast_window_raw(t,q) = nanmean(valuesP1); end end end %%%performances by team to plot one line per team instead of the grand %%%average Perf1_groupmean_tw (:,tw) = nanmean(Performance1_window_raw,2); Perf2_groupmean_tw (:,tw) = nanmean(Performance2_window_raw,2); Perf1_questionmean_tw (:,tw) = nanmean(Performance1_window_raw,1); Perf2_questionmean_tw (:,tw) = nanmean(Performance2_window_raw,1); Consensus_groupmean_tw (:,tw) = nanmean(Consensus_fcast_window_raw,2); Consensus_questionmean_tw (:,tw) = nanmean(Consensus_fcast_window_raw,1); %%%%% Performance1_window_mean (tw) = nanmean(Performance1_window_raw(:)); Performance1_window_se (tw) = nanstd(Performance1_window_raw(:)) / sqrt(numel(Performance1_window_raw(~isnan(Performance1_window_raw)))); Performance2_window_mean (tw) = nanmean(Performance2_window_raw(:)); Performance2_window_se (tw) = nanstd(Performance2_window_raw(:)) / sqrt(numel(Performance2_window_raw(~isnan(Performance2_window_raw)))); Performance2_window_mean (tw) = nanmean(Performance2_window_raw(:)); Performance2_window_se (tw) = nanstd(Performance2_window_raw(:)) / sqrt(numel(Performance2_window_raw(~isnan(Performance2_window_raw)))); clear Performance_diff Performance_sum MI Performance_diff = Performance2_window_raw - Performance1_window_raw; Performance_sum = Performance2_window_raw + Performance1_window_raw; Perf2_KW {tw} = Performance2_window_raw(:); Perf1_KW {tw} = Performance1_window_raw(:); MI = Performance_diff./Performance_sum; MI_mean(tw) = nanmean(MI(:)); MI_se(tw) = nanstd(MI(:)) / sqrt( sum(sum(~isnan(MI))) ); end %% one more level of aggregation (2nd order compromise) for q = 1:size(Consensus_questionmean_tw,1) for tw = 1:9 if isnan(Consensus_questionmean_tw (q,tw)) Consensus_Brier_tw (q,tw) = nan; elseif ~isnan(Consensus_questionmean_tw (q,tw)) Consensus_Brier_tw (q,tw) = BrierScoreCalc(Consensus_questionmean_tw (q,tw), max( unique(Data.q_outcome(Data.question_n==q)))); end Nvalues_for_se(tw) = sum (~isnan(Consensus_questionmean_tw(:,tw))); end end Consensus_mean_2aggregation_tw = nanmean(Consensus_Brier_tw,1); Consensus_se_2aggregation_tw = nanstd(Consensus_Brier_tw,1) ./ sqrt(Nvalues_for_se); %% statistical testing for window = 1:9 mat_testing = nan(90*382,3); mat_testing(:,1) = Perf2_KW{window}; %% blue mat_testing(:,2) = Perf1_KW{window}; %% red index_numel = numel(Consensus_Brier_tw(:,window)); mat_testing(1:index_numel,3) = Consensus_Brier_tw(:,window); %% yellow % pKW (window) = kruskalwallis(mat_testing); [pKW(window),tbl{window},stats{window}] = kruskalwallis(mat_testing,[],'off') end %%% multiple comparisons multiple_comparisons = nan(3,11); multiple_comparisons(1,10:11)= [1 2]; multiple_comparisons(2,10:11)= [2 3]; multiple_comparisons(3,10:11)= [1 3]; %%%% correcting the KW, multiplying by 9 for window = 1:9 if pKW(window)*9 < 0.05 multiple_comparisons(1,window) = 3*ranksum (Perf2_KW{window},Perf1_KW{window}); multiple_comparisons(2,window) = 3*ranksum (Consensus_Brier_tw(:,window),Perf1_KW{window}); multiple_comparisons(3,window) = 3*ranksum (Perf2_KW{window},Consensus_Brier_tw(:,window)); end multiple_comparisons_dunn{window} = multcompare(stats{window},'CType','dunn-sidak'); end %% GLM preparation %%%% preparing the data clear b_curve r_curve y_curve b_curve = Perf2_KW; r_curve = Perf1_KW; for i = 1:9 indb = ~isnan(Perf2_KW{i}); b_curve{i} = Perf2_KW{i}(indb); indr = ~isnan(Perf1_KW{i}); r_curve{i} = Perf1_KW{i}(indr); indy = ~isnan(Consensus_Brier_tw(:,i)); y_curve{i} = Consensus_Brier_tw(indy,i); clear indr indb indy b_timebins{i} = i*ones(numel(b_curve{i}),1); r_timebins{i} = i*ones(numel(r_curve{i}),1); y_timebins{i} = i*ones(numel(y_curve{i}),1); end %%%% now concatenating and generating the long vector of data and the %%%% columns for conditions and time b_cat = vertcat(b_curve{:}); r_cat = vertcat(r_curve{:}); y_cat = vertcat(y_curve{:}); briers4glm = vertcat(b_cat,r_cat,y_cat); condition4glm(1:numel(b_cat),1) = 1; condition4glm((1+numel(b_cat)):(numel(b_cat)+numel(r_cat)),1) = 2; condition4glm((1+numel(b_cat)+numel(r_cat)):(numel(b_cat)+numel(r_cat)+numel(y_cat)),1) = 3; timebin4glm = vertcat(b_timebins{:},r_timebins{:},y_timebins{:}); %% running the glm Accuracy= briers4glm+eps; % Accuracy Timebin = timebin4glm; % stimuls difficulty Condition = condition4glm; % diffrent conditions figure;set(gcf,'color','w') subplot 121 boxplot (Accuracy,Condition) subplot 122 boxplot (Accuracy,Timebin) IntAccTime = Accuracy .* Timebin; IntCondTime = Condition .* Timebin; %%% you would make a table for these arrays MyTable = table(Timebin,Accuracy,Condition,IntAccTime,IntCondTime); %%% then, you would run the functcion glme = fitglme(MyTable,... 'Accuracy ~ 1+Condition + Timebin',... 'Distribution','inverse gaussian','FitMethod','MPL',... 'DummyVarCoding','effects'); %%% and creat the table for the results Res_ACC = [glme.Coefficients.Estimate(2:end),glme.Coefficients.SE(2:end),... glme.Coefficients.Lower(2:end),glme.Coefficients.Upper(2:end),... glme.Coefficients.tStat(2:end),glme.Coefficients.pValue(2:end)]; %% plotting the medians nBS=10000; quantileBS = 0.95; for i=1:9 medians2plot(1,i) = median(b_curve{i}); medians2plot(2,i) = median(r_curve{i}); medians2plot(3,i) = median(y_curve{i}); means2plot(1,i) = mean(b_curve{i}); means2plot(2,i) = mean(r_curve{i}); means2plot(3,i) = mean(y_curve{i}); CIb(:,i) = median_ci(b_curve{i},nBS,quantileBS); CIr(:,i) = median_ci(r_curve{i},nBS,quantileBS); CIy(:,i) = median_ci(y_curve{i},nBS,quantileBS); end %% plotting performance across time figure;set(gcf,'color','w') errorbar(-97:10:-17,medians2plot(1,:),abs( CIb(1,:)-CIb(2,:)),abs(CIb(3,:)-CIb(2,:)),'linewidth',1.5) hold on errorbar(-95:10:-15,medians2plot(2,:),abs( CIr(1,:)-CIr(2,:)),abs(CIr(3,:)-CIr(2,:)),'linewidth',1.5) errorbar(-93:10:-13,medians2plot(3,:),abs( CIy(1,:)-CIy(2,:)),abs(CIy(3,:)-CIy(2,:)),'linewidth',1.5) xlim([-100 -10]) set(gca,'XTick',[-95:10:-15]) xlabel('Interval of days before question closure') ylabel('Brier Score') legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','northwest') set(gca,'FontSize',15) %% comparison of specific time windows within a curve. ranksum(b_curve{1},b_curve{5}) ranksum(r_curve{1},r_curve{5}) ranksum(y_curve{1},y_curve{5}) %% calculating confidence and absolute performance per time window and per curve of aggregation level for tw = 1:9 %%% calculate absolute correct b_prop_correct(tw) = 100 * sum(b_curve{tw}<0.5) / numel(b_curve{tw}); r_prop_correct(tw) = 100 * sum(r_curve{tw}<0.5) / numel(r_curve{tw}); y_prop_correct(tw) = 100 * sum(y_curve{tw}<0.5) / numel(y_curve{tw}); %%% calculate absolute confidence for i = 1:numel(b_curve{tw}) b_conf_temp(i) = 100 * abs ( Inverse_BrierScoreCalc(b_curve{tw}(i),1) - 0.5) / 0.5; end b_confidence {tw} = b_conf_temp; b_confidence_median(tw) = mean(b_confidence{tw}); b_confidence_SE(tw) = std(b_confidence{tw}) / sqrt(numel(b_confidence{tw})); for i = 1:numel(r_curve{tw}) r_conf_temp(i) = 100 * abs ( Inverse_BrierScoreCalc(r_curve{tw}(i),1) - 0.5) / 0.5; end r_confidence {tw} = r_conf_temp; r_confidence_median(tw) = mean(r_confidence{tw}); r_confidence_SE(tw) = std(r_confidence{tw}) / sqrt(numel(r_confidence{tw})); for i = 1:numel(y_curve{tw}) y_conf_temp(i) = 100 * abs ( Inverse_BrierScoreCalc(y_curve{tw}(i),1) - 0.5)/ 0.5; end y_confidence {tw} = y_conf_temp; y_confidence_median(tw) = mean(y_confidence{tw}); y_confidence_SE(tw) = std(y_confidence{tw}) / sqrt(numel(y_confidence{tw})); clear b_confiidence_temp r_confiidence_temp y_confiidence_temp end %% figure;set(gcf,'color','w') subplot 211 % plot(-95:10:-15,b_confidence_median) % hold on % plot(-95:10:-15,r_confidence_median) % plot(-95:10:-15,y_confidence_median) errorbar(-97:10:-17,b_confidence_median,b_confidence_SE,'linewidth',1.3) hold on errorbar(-95:10:-15,r_confidence_median,r_confidence_SE,'linewidth',1.3) errorbar(-93:10:-13,y_confidence_median,y_confidence_SE,'linewidth',1.3) xlim([-100 -10]) set(gca,'XTick',[-95:10:-15]) % xlabel('Interval of days before question closure') ylabel('% of max confidence') % legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','northwest') set(gca,'FontSize',10) subplot 212 plot(-97:10:-17,b_prop_correct,'linewidth',1.3) hold on plot(-95:10:-15,r_prop_correct,'linewidth',1.3) plot(-93:10:-13,y_prop_correct,'linewidth',1.3) xlim([-100 -10]) set(gca,'XTick',[-95:10:-15]) xlabel('Interval of days before question closure') ylabel('% of Correct Forecasts') legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','northwest') set(gca,'FontSize',10) ylim([70 100]) %% figure; set(gcf,'color','w'); OF MEAN (instead of median) PERFORMANCES IN TIME WINDOWS figure; set(gcf,'color','w'); errorbar (1:9,Performance2_window_mean,Performance2_window_se,'linewidth',3) hold on errorbar (1:9,Performance1_window_mean,Performance1_window_se,'linewidth',3) errorbar (1:9,Consensus_mean_2aggregation_tw,Consensus_se_2aggregation_tw,'linewidth',3) xlim ([0.5 9.5]) ylim([0.05 0.4]) set(gca,'XTick',[1:9]) set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'}) xlabel('Interval of days before question closure') ylabel('Brier Score') legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','southwest') set(gca,'FontSize',15) %% same as above but instead of average, a line per question, averaging only across teams, just a sanity check figure; set(gcf,'color','w'); for g = 11:90 plot (1:9,Perf1_groupmean_tw (g,:),'b') hold on plot (1:9,Perf2_groupmean_tw (g,:) ,'r') end set(gca,'XTick',[1:9]) set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'}) xlabel('Interval of days before question closure') ylabel('Brier Score') legend ({'Brier of average forecast';'Average of individual Briers'}) title('one line per team') set(gca,'FontSize',15) figure; set(gcf,'color','w'); for g = 1:382 plot (1:9,Perf1_questionmean_tw (g,:),'b') hold on plot (1:9,Perf2_questionmean_tw (g,:) ,'r') end set(gca,'XTick',[1:9]) set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'}) xlabel('Interval of days before question closure') ylabel('Brier Score') legend ({'Brier of average forecast';'Average of individual Briers'}) title('one line per question') set(gca,'FontSize',15) %% plot modulation index figure; set(gcf,'color','w'); errorbar (1:9,MI_mean,MI_se,'linewidth',3) xlim ([0.5 9.5]) set(gca,'XTick',[1:9]) set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'}) xlabel('Interval of days before question closure') ylabel('Modulation Index') % legend ({'Brier of average forecast';'Average of individual Briers'}) set(gca,'FontSize',15) %% Performance_diff = Performance2_window_raw - Performance1_window_raw; Performance_sum = Performance2_window_raw + Performance1_window_raw; MI = Performance_diff./Performance_sum; nanmean(MI(:))