remove_outliers.m 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. function [goodtrials, sample_wise_goodtrials] = remove_outliers(data,zthres)
  2. % THIS FUNCTION TREATS DATA AS TIME X TRIALS
  3. % input:
  4. % - data (time x trials)
  5. % - zthres (set this to 3 as default)
  6. %
  7. % - output:
  8. % - goodtrials (based on a trialwise removal of outliers)
  9. % - sample_wise_goodtrials (both trials that are outliers on average
  10. % and trials with outlying samples are removed)
  11. %
  12. % Rob Teeuwen 20190815
  13. % trial based outlier removal
  14. % average across samples, giving 1 value per trial. z-score these
  15. % values, and delete trials that have z-score higher than xxx.
  16. q = mean(data);
  17. mn = mean(q);
  18. st = std(q);
  19. z = abs((q-mn)./st);
  20. badtrials = find(z>zthres);
  21. % in addition, we can compute the zscore of all samples, and then
  22. % remove trials that have any samples higher than, let's say 10
  23. z2 = zscore(data,0,'all');
  24. z2 = (z2>10);
  25. badtrials2 = find(sum(z2));
  26. goodtrials = ones(1,size(data,2));
  27. goodtrials(badtrials) = 0;
  28. sample_wise_goodtrials = goodtrials;
  29. sample_wise_goodtrials(badtrials2) = 0;
  30. % test if there are any crazy z-scores, higher than 20. if so,
  31. % there could be other outliers that are masked by the
  32. % craziness of the crazy outliers, so we should consider
  33. % removing outliers twice.
  34. data_adj = data;
  35. while max(z) > 20
  36. % we have to treat outliers as missing values rather
  37. % than remove them, otherwise trial numbers won't match
  38. % up because 'data' will be of different size
  39. data_adj(:,badtrials) = nan(size(data,1),length(badtrials));
  40. q2 = nanmean(data_adj);
  41. mn2 = nanmean(q2);
  42. st2 = nanstd(q2);
  43. z = abs((q2-mn2)./st2);
  44. badtrials = find(z>zthres);
  45. % remove bad trials from goodtrials array
  46. goodtrials(badtrials) = 0;
  47. end
  48. end