GJP_processing_for_paper2.m 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. %% processing data for forecasting teams of GJP
  2. % what happens when we calculate brier of the average forecast or the average brier of the individuals
  3. close all
  4. clear all
  5. clc
  6. tic
  7. % % %% loading the processed data files (takes a while, its big)
  8. load('data\Processed_data\processedGJP.mat')
  9. toc
  10. %% removing unnecessary data
  11. Forecasts_all(isnan(Forecasts_all.team),:) = []; %%% removing al entries of user who did not belong in a team.
  12. Forecasts_all.user_id = str2double(Forecasts_all.user_id); %%% converting strings to numbers to facilitate life.
  13. %% processing the data and calculating team brier score per question, in the two different ways.
  14. % Either doing the average of the brier scores of each participant of the
  15. % team. Or doing the brier of the average probability forecast of the team.
  16. %%% initializing these variables to fill up in the loop
  17. pop_average_brier=nan(1000,100);
  18. pop_brier_average=nan(1000,100);
  19. %%%%%
  20. answer_time_window = [-1000 1000]; %%% minimum and maximum amount of days before question closure, for which answers will be allowed to calculate brier scores.
  21. fc_type2analyze = 1; %% check the line where it is used, right now is equal or lower. this refers to the entry of each forecast, either an original one or an update.
  22. min_answers_req = 5; %%% minimum answers required within a team and question to be analyzed.
  23. for team2analyze = 1:90; %%% looping along teams
  24. questions2analyze = unique(Forecasts_all.ifp_id(Forecasts_all.team==team2analyze)); %% list of questions this particular team answered.
  25. questions2analyze_all{team2analyze} = questions2analyze;
  26. team2analyze
  27. unique(Forecasts_all.ctt(Forecasts_all.team==team2analyze)) %% team treatments
  28. %%%initializing empty vectors to fill up in the loop
  29. fcast_avg = nan(1,numel(questions2analyze));
  30. brier_averages = nan(1,numel(questions2analyze));
  31. group_briers_avg = nan(1,numel(questions2analyze));
  32. %%%%%%%%%%%
  33. for q = 1:numel(questions2analyze) %%% looping questions within a team
  34. question_id = questions2analyze{q}; % question ID to analyze in this iteration.
  35. outcome_letter = ifps.outcome(strcmp(question_id,ifps.ifp_id)); %% real world outcome of this question
  36. if outcome_letter=='b'; outcome=0; elseif outcome_letter=='a'; outcome=1;end %% this line sets the outcome for the BrierScoreCalc function to use. Since we kept probabilities declared for outcome a only from the original data, then we set 'a' outcome as 1. This is because of the way the BrierScoreCalc function works.
  37. group_forecasts = {};
  38. group_forecasts {q} = Forecasts_all.value(strcmp(question_id,Forecasts_all.ifp_id) & Forecasts_all.team==team2analyze & Forecasts_all.fcast_type==fc_type2analyze) ; %%% probabilities assigned for each question by each member.
  39. group_forecasts_all{team2analyze,q} = group_forecasts{q};
  40. group_members = {};
  41. group_members {q} = Forecasts_all.user_id(strcmp(question_id,Forecasts_all.ifp_id) & Forecasts_all.team==team2analyze & Forecasts_all.fcast_type==fc_type2analyze) ;
  42. group_members_all{team2analyze,q} = group_members{q};
  43. group_timestamps = {};
  44. group_timestamps{q} = Forecasts_all.timestamp(strcmp(question_id,Forecasts_all.ifp_id) & Forecasts_all.team==team2analyze & Forecasts_all.fcast_type==fc_type2analyze);
  45. group_timestamps_all{team2analyze,q} = group_timestamps{q};
  46. %%%%%%% calculating time relative to question closure of each forecast
  47. for ts = 1:numel(group_timestamps{q})
  48. t1=group_timestamps{q}(ts);
  49. t2=ifps.date_closed (strcmp(question_id,ifps.ifp_id));
  50. if ~strcmp(t2,'NA')
  51. t11=datevec(datenum(t1));
  52. t22=datevec(datenum(t2));
  53. time_interval_in_days(ts) = etime(t22,t11)/(24*60*60);
  54. end
  55. end
  56. forecast_time2close_all{team2analyze,q} = time_interval_in_days;
  57. % if ~isnan(time_interval_in_days)
  58. index_times = time_interval_in_days > answer_time_window(1) & time_interval_in_days < answer_time_window(2);
  59. forecast_time2close{team2analyze,q} = time_interval_in_days (index_times);
  60. % end
  61. time_interval_in_days = nan;
  62. N_in_timewindow = forecast_time2close{team2analyze,q} > answer_time_window(1) & forecast_time2close{team2analyze,q} < answer_time_window(2); %%% forecasts for this group and question within the time window specified
  63. if numel(group_forecasts{q})>=min_answers_req && sum(N_in_timewindow)>=min_answers_req %% we only analyze questions for which ate least a minimum amount of forecasts were done (because not all team members respond to all questions)
  64. fcast_avg (q) = mean(group_forecasts{q}(N_in_timewindow)); %%% this is the average of the probabilities assigned by the team, for each question
  65. brier_averages(q) = BrierScoreCalc(fcast_avg (q),outcome); %%% this is the Brier Score of the average probabilities
  66. %%%% looping along forecasts for tthe question, and calculating the
  67. %%%% individuals briers
  68. clear q_briers %%% clearing this variable because)
  69. % q_briers = [];
  70. for br = 1:numel(group_forecasts{q})
  71. q_briers(br) = BrierScoreCalc(group_forecasts{q}(br),outcome);
  72. end
  73. q_briers(~N_in_timewindow) = []; %%% removing forecasts the values outside of the time window bounds
  74. group_briers = {};
  75. group_briers{q} = q_briers; %%% cell array containing the brier scores of each individual forecast for each question of the team being analyzed
  76. group_briers_all{team2analyze,q} = group_briers{q};
  77. group_briers_avg(q) = mean(group_briers{q}); %%% vector containing the brier score of the average of the individual forecasts for each question for the team being analyzed
  78. end
  79. end
  80. %%%%%% summing up all teams results
  81. Nquestions2save = sum(~isnan(group_briers_avg));
  82. pop_average_brier(1:Nquestions2save,team2analyze)=group_briers_avg(~isnan(group_briers_avg));
  83. pop_brier_average(1:Nquestions2save,team2analyze)=brier_averages(~isnan(brier_averages));
  84. [p,h] = signrank(pop_brier_average(:),pop_average_brier(:));
  85. pop_pvalues(team2analyze) = p;
  86. questions_analyzed{team2analyze} = find(~isnan(group_briers_avg));
  87. end
  88. %%%% creating the question and team matrixes to also export and analyze
  89. %%%% later
  90. q_analyzed = [questions_analyzed{:}]';
  91. team_matrix = repmat(1:100,1000,1);
  92. teams_answered = team_matrix(:);
  93. %%%% organizing up the popdata
  94. clear pop_data
  95. pop_data= [pop_brier_average(:),pop_average_brier(:)];
  96. teams_answered(isnan(pop_data(:,1)))=[];
  97. pop_data(isnan(pop_data(:,1)),:)=[];
  98. % save(['pop_data_' num2str(answer_time_window (1)) '-' num2str(answer_time_window (2)) '_days_teams_ALL.mat'],...
  99. % 'pop_data','q_analyzed','teams_answered')
  100. %% saving the processed data to analyze later
  101. save('2fcasts_members_questions_extracted_data.mat','group_forecasts_all','group_members_all','questions2analyze_all','group_briers_all','group_timestamps_all', 'forecast_time2close_all')