123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- %% processing data for forecasting teams of GJP
- % what happens when we calculate brier of the average forecast or the average brier of the individuals
- close all
- clear all
- clc
- tic
- % % %% loading the processed data files (takes a while, its big)
- load('data\Processed_data\processedGJP.mat')
- toc
- %% removing unnecessary data
- Forecasts_all(isnan(Forecasts_all.team),:) = []; %%% removing al entries of user who did not belong in a team.
- Forecasts_all.user_id = str2double(Forecasts_all.user_id); %%% converting strings to numbers to facilitate life.
- %% processing the data and calculating team brier score per question, in the two different ways.
- % Either doing the average of the brier scores of each participant of the
- % team. Or doing the brier of the average probability forecast of the team.
- %%% initializing these variables to fill up in the loop
- pop_average_brier=nan(1000,100);
- pop_brier_average=nan(1000,100);
- %%%%%
- answer_time_window = [-1000 1000]; %%% minimum and maximum amount of days before question closure, for which answers will be allowed to calculate brier scores.
- fc_type2analyze = 1; %% check the line where it is used, right now is equal or lower. this refers to the entry of each forecast, either an original one or an update.
- min_answers_req = 5; %%% minimum answers required within a team and question to be analyzed.
- for team2analyze = 1:90; %%% looping along teams
-
- questions2analyze = unique(Forecasts_all.ifp_id(Forecasts_all.team==team2analyze)); %% list of questions this particular team answered.
- questions2analyze_all{team2analyze} = questions2analyze;
- team2analyze
- unique(Forecasts_all.ctt(Forecasts_all.team==team2analyze)) %% team treatments
-
- %%%initializing empty vectors to fill up in the loop
- fcast_avg = nan(1,numel(questions2analyze));
- brier_averages = nan(1,numel(questions2analyze));
- group_briers_avg = nan(1,numel(questions2analyze));
- %%%%%%%%%%%
-
- for q = 1:numel(questions2analyze) %%% looping questions within a team
- question_id = questions2analyze{q}; % question ID to analyze in this iteration.
- outcome_letter = ifps.outcome(strcmp(question_id,ifps.ifp_id)); %% real world outcome of this question
- if outcome_letter=='b'; outcome=0; elseif outcome_letter=='a'; outcome=1;end %% this line sets the outcome for the BrierScoreCalc function to use. Since we kept probabilities declared for outcome a only from the original data, then we set 'a' outcome as 1. This is because of the way the BrierScoreCalc function works.
-
- group_forecasts = {};
- group_forecasts {q} = Forecasts_all.value(strcmp(question_id,Forecasts_all.ifp_id) & Forecasts_all.team==team2analyze & Forecasts_all.fcast_type==fc_type2analyze) ; %%% probabilities assigned for each question by each member.
- group_forecasts_all{team2analyze,q} = group_forecasts{q};
- group_members = {};
- group_members {q} = Forecasts_all.user_id(strcmp(question_id,Forecasts_all.ifp_id) & Forecasts_all.team==team2analyze & Forecasts_all.fcast_type==fc_type2analyze) ;
- group_members_all{team2analyze,q} = group_members{q};
- group_timestamps = {};
- group_timestamps{q} = Forecasts_all.timestamp(strcmp(question_id,Forecasts_all.ifp_id) & Forecasts_all.team==team2analyze & Forecasts_all.fcast_type==fc_type2analyze);
- group_timestamps_all{team2analyze,q} = group_timestamps{q};
-
- %%%%%%% calculating time relative to question closure of each forecast
- for ts = 1:numel(group_timestamps{q})
- t1=group_timestamps{q}(ts);
- t2=ifps.date_closed (strcmp(question_id,ifps.ifp_id));
-
- if ~strcmp(t2,'NA')
- t11=datevec(datenum(t1));
- t22=datevec(datenum(t2));
- time_interval_in_days(ts) = etime(t22,t11)/(24*60*60);
- end
- end
- forecast_time2close_all{team2analyze,q} = time_interval_in_days;
- % if ~isnan(time_interval_in_days)
- index_times = time_interval_in_days > answer_time_window(1) & time_interval_in_days < answer_time_window(2);
- forecast_time2close{team2analyze,q} = time_interval_in_days (index_times);
- % end
- time_interval_in_days = nan;
-
- N_in_timewindow = forecast_time2close{team2analyze,q} > answer_time_window(1) & forecast_time2close{team2analyze,q} < answer_time_window(2); %%% forecasts for this group and question within the time window specified
-
- if numel(group_forecasts{q})>=min_answers_req && sum(N_in_timewindow)>=min_answers_req %% we only analyze questions for which ate least a minimum amount of forecasts were done (because not all team members respond to all questions)
- fcast_avg (q) = mean(group_forecasts{q}(N_in_timewindow)); %%% this is the average of the probabilities assigned by the team, for each question
- brier_averages(q) = BrierScoreCalc(fcast_avg (q),outcome); %%% this is the Brier Score of the average probabilities
-
- %%%% looping along forecasts for tthe question, and calculating the
- %%%% individuals briers
- clear q_briers %%% clearing this variable because)
- % q_briers = [];
- for br = 1:numel(group_forecasts{q})
- q_briers(br) = BrierScoreCalc(group_forecasts{q}(br),outcome);
- end
- q_briers(~N_in_timewindow) = []; %%% removing forecasts the values outside of the time window bounds
-
- group_briers = {};
- group_briers{q} = q_briers; %%% cell array containing the brier scores of each individual forecast for each question of the team being analyzed
- group_briers_all{team2analyze,q} = group_briers{q};
- group_briers_avg(q) = mean(group_briers{q}); %%% vector containing the brier score of the average of the individual forecasts for each question for the team being analyzed
- end
- end
-
-
-
- %%%%%% summing up all teams results
- Nquestions2save = sum(~isnan(group_briers_avg));
- pop_average_brier(1:Nquestions2save,team2analyze)=group_briers_avg(~isnan(group_briers_avg));
- pop_brier_average(1:Nquestions2save,team2analyze)=brier_averages(~isnan(brier_averages));
- [p,h] = signrank(pop_brier_average(:),pop_average_brier(:));
- pop_pvalues(team2analyze) = p;
-
- questions_analyzed{team2analyze} = find(~isnan(group_briers_avg));
-
-
-
- end
- %%%% creating the question and team matrixes to also export and analyze
- %%%% later
- q_analyzed = [questions_analyzed{:}]';
- team_matrix = repmat(1:100,1000,1);
- teams_answered = team_matrix(:);
- %%%% organizing up the popdata
- clear pop_data
- pop_data= [pop_brier_average(:),pop_average_brier(:)];
- teams_answered(isnan(pop_data(:,1)))=[];
- pop_data(isnan(pop_data(:,1)),:)=[];
- % save(['pop_data_' num2str(answer_time_window (1)) '-' num2str(answer_time_window (2)) '_days_teams_ALL.mat'],...
- % 'pop_data','q_analyzed','teams_answered')
- %% saving the processed data to analyze later
- save('2fcasts_members_questions_extracted_data.mat','group_forecasts_all','group_members_all','questions2analyze_all','group_briers_all','group_timestamps_all', 'forecast_time2close_all')
|