clear all
clc
close all 

% % %% loading the processed data files (takes a while, its big)
load('data/2fcasts_members_questions_extracted_data.mat');


team2start = 11;


%% removing the values for which there is no brier (because of minimum requirements from previous script, like minimum amount of answers)
for team=1:size(forecast_time2close_all,1)
    
    for question=1:491
        N = numel(group_briers_all {team,question});
        
        if numel(group_briers_all {team,question}) == 0
            group_forecasts_all{team,question} = [] ;
            group_members_all{team,question} = [] ;
            group_timestamps_all{team,question} = [] ;
            forecast_time2close_all{team,question} = [] ;
            
        end
        
        
        
    end
end


%% creating the variables for the data table
for t=team2start:90
    team4table{1,t} = repmat(t, numel(cat(2,group_briers_all{t,:})) ,1);
    question4table = [];
    

    for q = 1:size(questions2analyze_all{t},1)
        question4table = vertcat(question4table, repmat(questions2analyze_all{t}(q), numel(group_briers_all{t,q}) ,1));
    end
    
    question4table_all{1,t} = question4table;
    
    member4table{1,t} = cat(1,group_members_all{t,:});
    fcast4table{1,t} =  cat(1,group_forecasts_all{t,:});
    brier4table{1,t} = cat(2,group_briers_all{t,:})';
    timestamp4table{1,t} = cat(1,group_timestamps_all{t,:});
    time2close4table {1,t} = cat(2,forecast_time2close_all{t,:})';
end

%% creating the data table
team = cat(1,team4table{1,:});
question_code = cat(1,question4table_all{1,:});

question_n = nan(numel(question_code),1);
unique_questions = unique(question_code);

for i = 1:numel(unique_questions)
        question_n (ismember(question_code, unique_questions{i} )) = i;
    
end


member = cat(1,member4table{1,:});
fcast = cat(1,fcast4table{1,:});
brier = cat(1,brier4table{1,:});
timestamp = cat(1,timestamp4table{1,:});
time2close = cat(1,time2close4table{1,:});


Data  = table(team,member,question_code,question_n,fcast,brier,timestamp,time2close);


Data(Data.team<team2start,:) = [];

%%%%%% here i randomize the team vector, comment if i want real teams.
% Data.team = Data.team(randperm(length(Data.team)));
% Data.team = Data.fcast(randperm(length(Data.team)));
% Data.timestamp = Data.timestamp(randperm(length(Data.team)));
% b = Data.team;



%% calculating the actual mean brier and standard error for the team and each team member, and testing each tream member against the whole team.
 
N_comparisons = 2479; % this number depends on the amount and identity of teams being analyzed. 2479 is correct for teams 11 through 90.

for t=team2start:90
    membs = unique(Data.member(Data.team==t));
    membersperteam_n (t) = numel(unique(Data.member(Data.team==t)));
    team_avg_brier(t) = mean(Data.brier(Data.team==t));
    team_se_brier(t) = std(Data.brier(Data.team==t)) / sqrt(numel(  Data.brier(Data.team==t)  ));
    
    clear m_avg_brier m_se_brier Wilcoxon WilcoxonPerformance
    for m = 1:numel(membs)
        m_avg_brier(m) = mean(Data.brier(Data.team==t & Data.member == membs(m)));
        m_se_brier(m) = std(Data.brier(Data.team==t & Data.member == membs(m))) / sqrt(numel(   Data.brier(Data.team==t & Data.member == membs(m))    ));
        
        member_avg_brier{t} = m_avg_brier;
        member_se_brier{t} = m_se_brier;
        
        Wilcoxon(m) = ranksum (Data.brier(Data.team==t),Data.brier(Data.team==t & Data.member == membs(m)));
        if Wilcoxon (m) <= 0.05/N_comparisons && m_avg_brier(m) < team_avg_brier(t)
            WilcoxonPerformance(m) = -1; %%% if member is better than team
        elseif Wilcoxon (m) <= 0.05/N_comparisons && m_avg_brier(m) > team_avg_brier(t)
            WilcoxonPerformance (m) = 1; %%% if member is worse than team
        elseif Wilcoxon (m) > 0.05/N_comparisons
            WilcoxonPerformance (m) = 0; %%% if member is same than team
        end
    end
    Wilcoxon_team_vs_members_pvalues{t} = Wilcoxon;
    Wilcoxon_team_vs_members_performance{t} = WilcoxonPerformance;
    
end

%% members per team figure
figure
subplot 121
plot (1:10,membersperteam_n(1:10),'s-')
ylabel('# of members')
xlabel('team')
subplot 122
plot (11:90,membersperteam_n(11:90),'s-')
xlabel('team')
%% members per team figure
figure; set(gcf,'color','w')
semilogy (1:90,membersperteam_n,'.-', 'linewidth' , 2,'markersize',15)
hold on
plot ([1 90],[70 70], 'linewidth' , 2)
xlim([0 91])
set(gca,'XTick',[10:10: 90])
xlim ([0.5 90.5])
set(gca,'XTickLabel',{[10:10:90]})
xlabel('Team')
ylabel('# members')
set(gca,'fontsize',20)

%% figure of proportion of team members diffferent performance than team mean
for i = team2start:90
    t_perf = Wilcoxon_team_vs_members_performance{i};
    t_better(i) = sum (t_perf == -1) / numel(t_perf);
    t_worse(i) = sum (t_perf == 1)/ numel(t_perf);
    %     t_better(i) = sum (t_perf == -1);
    %     t_worse(i) = sum (t_perf == 1);
    t_diff(i) = t_better(i) + t_worse(i);
    
end

figure; set(gcf,'color','w');
plot (1:90,t_better,1:90,t_worse)
legend('Better than team average','Worse than team average','location','northeast')
xlim([0 91])
set(gca,'XTick',[10:10: 90])
xlim ([0.5 90.5])
set(gca,'XTickLabel',{[10:10:90]})
xlabel('Team')
ylabel('Proportion of team members')
set(gca,'FontSize',20)

sum(t_diff==0)



%%
concatenated_wilcoxonperfs = cat(2,Wilcoxon_team_vs_members_performance{1,:});
disp([ 'Check that the N for the pvalue is = ' num2str(numel(cat(2,Wilcoxon_team_vs_members_performance{1,:})))] )

Nbetter = sum(concatenated_wilcoxonperfs == -1);
Nworse = sum(concatenated_wilcoxonperfs == 1);
Nequal = sum(concatenated_wilcoxonperfs == 0);

disp(['N members better than their team = ' num2str(Nbetter)])
disp(['N members worse than their team = ' num2str(Nworse)])
disp(['N members equal than their team = ' num2str(Nequal)])






%% creating the new variables to analyze
%  confidence and absolute performance


% confidence4table = nan(size(Data,1),1);
abs_performance4table = nan(size(Data,1),1);

confidence4table = abs(Data.fcast-0.5);
abs_performance4table (Data.brier < 0.5) = 1;
abs_performance4table (Data.brier > 0.5) = 0;
abs_performance4table (Data.brier == 0.5) = -1;

 
Data.confidence = confidence4table;
Data.abs_performance = abs_performance4table;

%% calculating correct outcomes for later. vector with 1 if q outcome 1, 0 if 0. Corresponding to the probability forecast.


outcome_vector = nan(size(Data,1),1);

outcome_vector(Data.fcast==0.5) = -1;

outcome_vector(Data.abs_performance==1 & Data.fcast<0.5) = 0;
outcome_vector(Data.abs_performance==0 & Data.fcast<0.5) = 1;
outcome_vector(Data.abs_performance==0 & Data.fcast>0.5) = 0;
outcome_vector(Data.abs_performance==1 & Data.fcast>0.5) = 1;

Data.q_outcome = outcome_vector;


%% descriptive histograms. Sanity check. should be histograms with -1 0 1 for performance and values between 0 and 0.5 for confidence
figure; set(gcf,'color','w');
subplot 121
histogram (Data.abs_performance,'normalization','cdf')
xlabel ('forecast absolute performance')
ylabel ('Cumulative Probability')
subplot 122
histogram(Data.confidence,100,'normalization','pdf')
xlabel ('Forecast Confidence')


%% windowing confidence and absolute performance

tw_start = 110:-10:30;
tw_end = 80:-10:0;

for tw = 1:numel(tw_start)
    clear time_index
    time_index = Data.time2close < tw_start(tw) & Data.time2close > tw_end(tw);
    
    abs_perf_window(tw) = mean(Data.abs_performance(time_index & Data.fcast ~= 0.5));
    
    confidence_mean_window(tw) = mean(Data.confidence(time_index));
    confidence_se_window(tw) = std(Data.confidence(time_index)) / sqrt(numel(Data.confidence(time_index)));
    
    brier_mean_window(tw) = mean(Data.brier(time_index));
    brier_se_window(tw) = std(Data.brier(time_index)) / sqrt(numel(Data.brier(time_index)));
    
end
%% figure of absolute performance and confidence across time windows

figure; set(gcf,'color','w');
subplot 211
plot(1:9,abs_perf_window)
set(gca,'XTick',[1:9])
xlim ([0.5 9.5])
set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'})
xlabel('Interval of days before question closure')
ylabel('Proportion of correct absolute forecasts')

subplot 212
errorbar(1:9,confidence_mean_window,confidence_se_window)
ylim([0.28 0.4])
xlim ([0.5 9.5])
set(gca,'XTick',[1:9])
set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'})
xlabel('Interval of days before question closure')
ylabel('Confidence Score')



%% calculating the two performances per team per window


min_answers = 5;


for tw = 1:numel(tw_start)
    time_index = Data.time2close < tw_start(tw) & Data.time2close > tw_end(tw);
    Performance1_window_raw = nan(90,382);
    Performance2_window_raw = nan(90,382);
    Consensus_fcast_window_raw = nan(90,382);

    for t = team2start:90
        
        
        for q = 1:max(Data.question_n)
            clear values valuesP1 tans_val
            values = Data.brier(Data.team == t & Data.question_n == q & time_index);
            valuesP1 = Data.fcast(Data.team == t & Data.question_n == q & time_index);
            
            if numel(values) >= min_answers
                Performance2_window_raw(t,q) = nanmean(values);
           
                
                outcome4briercalc = max(  unique(Data.q_outcome(Data.question_n==q))  );
                
                Performance1_window_raw(t,q) = BrierScoreCalc(nanmean(valuesP1),outcome4briercalc);
                
                Consensus_fcast_window_raw(t,q) = nanmean(valuesP1);
                             
            end

        end
        
    end
    %%%performances by team to plot one line per team instead of the grand
    %%%average
    Perf1_groupmean_tw (:,tw) = nanmean(Performance1_window_raw,2);
    Perf2_groupmean_tw (:,tw) = nanmean(Performance2_window_raw,2);
    Perf1_questionmean_tw (:,tw) = nanmean(Performance1_window_raw,1);
    Perf2_questionmean_tw (:,tw) = nanmean(Performance2_window_raw,1);
    Consensus_groupmean_tw (:,tw) =  nanmean(Consensus_fcast_window_raw,2);
    Consensus_questionmean_tw (:,tw) = nanmean(Consensus_fcast_window_raw,1);
    %%%%% 
    
    Performance1_window_mean (tw) = nanmean(Performance1_window_raw(:));
    Performance1_window_se (tw) = nanstd(Performance1_window_raw(:)) / sqrt(numel(Performance1_window_raw(~isnan(Performance1_window_raw))));
    
    Performance2_window_mean (tw) = nanmean(Performance2_window_raw(:));
    Performance2_window_se (tw) = nanstd(Performance2_window_raw(:)) / sqrt(numel(Performance2_window_raw(~isnan(Performance2_window_raw))));
    
    Performance2_window_mean (tw) = nanmean(Performance2_window_raw(:));
    Performance2_window_se (tw) = nanstd(Performance2_window_raw(:)) / sqrt(numel(Performance2_window_raw(~isnan(Performance2_window_raw))));
    
    clear Performance_diff Performance_sum MI
    Performance_diff = Performance2_window_raw - Performance1_window_raw;
    Performance_sum = Performance2_window_raw + Performance1_window_raw;
    
    Perf2_KW {tw} = Performance2_window_raw(:);
    Perf1_KW {tw} = Performance1_window_raw(:);
    
    MI = Performance_diff./Performance_sum;
    MI_mean(tw) = nanmean(MI(:));
    MI_se(tw) = nanstd(MI(:)) / sqrt( sum(sum(~isnan(MI))) );
    


end

%% one more level of aggregation (2nd order compromise)

for q = 1:size(Consensus_questionmean_tw,1)
    for tw = 1:9
        if isnan(Consensus_questionmean_tw (q,tw))
            Consensus_Brier_tw (q,tw) = nan;
        elseif ~isnan(Consensus_questionmean_tw (q,tw))
            Consensus_Brier_tw (q,tw) = BrierScoreCalc(Consensus_questionmean_tw (q,tw), max(  unique(Data.q_outcome(Data.question_n==q))));
        end
        Nvalues_for_se(tw) = sum (~isnan(Consensus_questionmean_tw(:,tw)));
    end

end



Consensus_mean_2aggregation_tw = nanmean(Consensus_Brier_tw,1);
Consensus_se_2aggregation_tw = nanstd(Consensus_Brier_tw,1) ./ sqrt(Nvalues_for_se);

%% statistical testing

for window = 1:9
    mat_testing = nan(90*382,3);
    mat_testing(:,1) = Perf2_KW{window}; %% blue
    mat_testing(:,2) = Perf1_KW{window}; %% red
    index_numel = numel(Consensus_Brier_tw(:,window));
    mat_testing(1:index_numel,3) = Consensus_Brier_tw(:,window); %% yellow

%     pKW (window) = kruskalwallis(mat_testing);
    [pKW(window),tbl{window},stats{window}] = kruskalwallis(mat_testing,[],'off')
    
end

%%% multiple comparisons
multiple_comparisons = nan(3,11);
multiple_comparisons(1,10:11)= [1 2];
multiple_comparisons(2,10:11)= [2 3];
multiple_comparisons(3,10:11)= [1 3];

%%%% correcting the KW, multiplying by 9
for window = 1:9
    if pKW(window)*9 < 0.05
       multiple_comparisons(1,window) = 3*ranksum (Perf2_KW{window},Perf1_KW{window});
       multiple_comparisons(2,window) = 3*ranksum (Consensus_Brier_tw(:,window),Perf1_KW{window});
       multiple_comparisons(3,window) = 3*ranksum (Perf2_KW{window},Consensus_Brier_tw(:,window));
    end
    multiple_comparisons_dunn{window} = multcompare(stats{window},'CType','dunn-sidak');
    
    
end









%% GLM preparation


%%%% preparing the data
clear b_curve r_curve y_curve

b_curve = Perf2_KW;
r_curve = Perf1_KW;
for i = 1:9
    indb = ~isnan(Perf2_KW{i});
    b_curve{i} = Perf2_KW{i}(indb);
    
    indr = ~isnan(Perf1_KW{i});
    r_curve{i} = Perf1_KW{i}(indr);
    
    indy = ~isnan(Consensus_Brier_tw(:,i));
    y_curve{i} = Consensus_Brier_tw(indy,i);
    clear indr indb indy
    
    
    b_timebins{i} = i*ones(numel(b_curve{i}),1);
    r_timebins{i} = i*ones(numel(r_curve{i}),1);
    y_timebins{i} = i*ones(numel(y_curve{i}),1);
    
    
    
    
end


%%%% now concatenating and generating the long vector of data and the
%%%% columns for conditions and time
b_cat = vertcat(b_curve{:});
r_cat = vertcat(r_curve{:});
y_cat = vertcat(y_curve{:});

briers4glm = vertcat(b_cat,r_cat,y_cat);
condition4glm(1:numel(b_cat),1) = 1;
condition4glm((1+numel(b_cat)):(numel(b_cat)+numel(r_cat)),1) = 2;
condition4glm((1+numel(b_cat)+numel(r_cat)):(numel(b_cat)+numel(r_cat)+numel(y_cat)),1) = 3;




timebin4glm = vertcat(b_timebins{:},r_timebins{:},y_timebins{:});


%% running the glm

Accuracy= briers4glm+eps; % Accuracy
Timebin = timebin4glm;  % stimuls difficulty
Condition = condition4glm; % diffrent conditions

figure;set(gcf,'color','w')
subplot 121
boxplot (Accuracy,Condition)
subplot 122
boxplot (Accuracy,Timebin)

IntAccTime = Accuracy .* Timebin;
IntCondTime = Condition .* Timebin;

%%% you would make a table for these arrays
MyTable = table(Timebin,Accuracy,Condition,IntAccTime,IntCondTime);
%%% then, you would run the functcion
glme = fitglme(MyTable,...
		'Accuracy ~ 1+Condition + Timebin',...
		'Distribution','inverse gaussian','FitMethod','MPL',... 
		'DummyVarCoding','effects');

    
%%% and creat the table for the results
Res_ACC = [glme.Coefficients.Estimate(2:end),glme.Coefficients.SE(2:end),...
    glme.Coefficients.Lower(2:end),glme.Coefficients.Upper(2:end),...
    glme.Coefficients.tStat(2:end),glme.Coefficients.pValue(2:end)];



%% plotting the medians

nBS=10000;
quantileBS = 0.95;



for i=1:9
    medians2plot(1,i) = median(b_curve{i});
    medians2plot(2,i) = median(r_curve{i});
    medians2plot(3,i) = median(y_curve{i});
    
    means2plot(1,i) = mean(b_curve{i});
    means2plot(2,i) = mean(r_curve{i});
    means2plot(3,i) = mean(y_curve{i});

    CIb(:,i) = median_ci(b_curve{i},nBS,quantileBS); 
    CIr(:,i) = median_ci(r_curve{i},nBS,quantileBS); 
    CIy(:,i) = median_ci(y_curve{i},nBS,quantileBS); 

end


%% plotting performance across time
figure;set(gcf,'color','w')
errorbar(-97:10:-17,medians2plot(1,:),abs(  CIb(1,:)-CIb(2,:)),abs(CIb(3,:)-CIb(2,:)),'linewidth',1.5)
hold on
errorbar(-95:10:-15,medians2plot(2,:),abs(  CIr(1,:)-CIr(2,:)),abs(CIr(3,:)-CIr(2,:)),'linewidth',1.5)
errorbar(-93:10:-13,medians2plot(3,:),abs(  CIy(1,:)-CIy(2,:)),abs(CIy(3,:)-CIy(2,:)),'linewidth',1.5)
xlim([-100 -10])

set(gca,'XTick',[-95:10:-15])
xlabel('Interval of days before question closure')
ylabel('Brier Score')
legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','northwest')
set(gca,'FontSize',15)

%% comparison of specific time windows within a curve.
ranksum(b_curve{1},b_curve{5})
ranksum(r_curve{1},r_curve{5})
ranksum(y_curve{1},y_curve{5})

%% calculating confidence and absolute performance per time window and per curve of aggregation level

for tw = 1:9
    
        %%% calculate absolute correct
        b_prop_correct(tw) = 100 * sum(b_curve{tw}<0.5) / numel(b_curve{tw});
        r_prop_correct(tw) = 100 * sum(r_curve{tw}<0.5) / numel(r_curve{tw});
        y_prop_correct(tw) = 100 * sum(y_curve{tw}<0.5) / numel(y_curve{tw});
        %%% calculate absolute confidence
        for i = 1:numel(b_curve{tw})
            b_conf_temp(i) =  100 * abs ( Inverse_BrierScoreCalc(b_curve{tw}(i),1) - 0.5) / 0.5;
        end
        b_confidence {tw} = b_conf_temp;
        b_confidence_median(tw) = mean(b_confidence{tw});
        b_confidence_SE(tw) = std(b_confidence{tw}) / sqrt(numel(b_confidence{tw}));
        
        for i = 1:numel(r_curve{tw})
            r_conf_temp(i) = 100 * abs ( Inverse_BrierScoreCalc(r_curve{tw}(i),1) - 0.5) / 0.5;
        end
        r_confidence {tw} = r_conf_temp;
        r_confidence_median(tw) = mean(r_confidence{tw});
        r_confidence_SE(tw) = std(r_confidence{tw}) / sqrt(numel(r_confidence{tw}));
        
        for i = 1:numel(y_curve{tw})
            y_conf_temp(i) = 100 * abs ( Inverse_BrierScoreCalc(y_curve{tw}(i),1) - 0.5)/ 0.5;
        end
        y_confidence {tw} = y_conf_temp;
        y_confidence_median(tw) = mean(y_confidence{tw});
        y_confidence_SE(tw) = std(y_confidence{tw}) / sqrt(numel(y_confidence{tw}));
        
        
    
        clear b_confiidence_temp r_confiidence_temp y_confiidence_temp
end

%%
figure;set(gcf,'color','w')
subplot 211
% plot(-95:10:-15,b_confidence_median)
% hold on
% plot(-95:10:-15,r_confidence_median)
% plot(-95:10:-15,y_confidence_median)
errorbar(-97:10:-17,b_confidence_median,b_confidence_SE,'linewidth',1.3)
hold on
errorbar(-95:10:-15,r_confidence_median,r_confidence_SE,'linewidth',1.3)
errorbar(-93:10:-13,y_confidence_median,y_confidence_SE,'linewidth',1.3)
xlim([-100 -10])

set(gca,'XTick',[-95:10:-15])
% xlabel('Interval of days before question closure')
ylabel('% of max confidence')
% legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','northwest')
set(gca,'FontSize',10)

subplot 212
plot(-97:10:-17,b_prop_correct,'linewidth',1.3)
hold on
plot(-95:10:-15,r_prop_correct,'linewidth',1.3)
plot(-93:10:-13,y_prop_correct,'linewidth',1.3)
xlim([-100 -10])

set(gca,'XTick',[-95:10:-15])
xlabel('Interval of days before question closure')
ylabel('% of Correct Forecasts')
legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','northwest')
set(gca,'FontSize',10)
ylim([70 100])

%% figure; set(gcf,'color','w'); OF MEAN (instead of median) PERFORMANCES IN TIME WINDOWS

figure; set(gcf,'color','w');
errorbar (1:9,Performance2_window_mean,Performance2_window_se,'linewidth',3)
hold on
errorbar (1:9,Performance1_window_mean,Performance1_window_se,'linewidth',3)
errorbar (1:9,Consensus_mean_2aggregation_tw,Consensus_se_2aggregation_tw,'linewidth',3)
xlim ([0.5 9.5])
ylim([0.05 0.4])
set(gca,'XTick',[1:9])
set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'})
xlabel('Interval of days before question closure')
ylabel('Brier Score')
legend ({'Average of individual Briers';'Team Consensus 1st order';'Team Consensus 2nd order'},'location','southwest')
set(gca,'FontSize',15)

%% same as above but instead of average, a line per question, averaging only across teams, just a sanity check

figure; set(gcf,'color','w');

for g = 11:90
    plot (1:9,Perf1_groupmean_tw (g,:),'b')
    hold on
    plot (1:9,Perf2_groupmean_tw (g,:) ,'r')
    
    
end
set(gca,'XTick',[1:9])
set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'})
xlabel('Interval of days before question closure')
ylabel('Brier Score')
legend ({'Brier of average forecast';'Average of individual Briers'})
title('one line per team')
set(gca,'FontSize',15)

figure; set(gcf,'color','w');
for g = 1:382
    plot (1:9,Perf1_questionmean_tw (g,:),'b')
    hold on
    plot (1:9,Perf2_questionmean_tw (g,:) ,'r')
    
    
end
set(gca,'XTick',[1:9])
set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'})
xlabel('Interval of days before question closure')
ylabel('Brier Score')
legend ({'Brier of average forecast';'Average of individual Briers'})
title('one line per question')
set(gca,'FontSize',15)



%% plot modulation index

figure; set(gcf,'color','w');
errorbar (1:9,MI_mean,MI_se,'linewidth',3)
xlim ([0.5 9.5])
set(gca,'XTick',[1:9])
set(gca,'XTickLabel',{'110-80','100-70','90-60','80-50','70-40','60-30','50-20','40-10','30-0'})
xlabel('Interval of days before question closure')
ylabel('Modulation Index')
% legend ({'Brier of average forecast';'Average of individual Briers'})
set(gca,'FontSize',15)

%%
Performance_diff = Performance2_window_raw - Performance1_window_raw;
Performance_sum = Performance2_window_raw + Performance1_window_raw;
MI = Performance_diff./Performance_sum;
nanmean(MI(:))