calccorrelation.m 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. function f = calccorrelation(x,y,dim,wantmeansubtract,wantgainsensitive)
  2. % function f = calccorrelation(x,y,dim,wantmeansubtract,wantgainsensitive)
  3. %
  4. % <x>,<y> are matrices with the same dimensions
  5. % <dim> (optional) is the dimension of interest.
  6. % default to 2 if <x> is a (horizontal) vector and to 1 if not.
  7. % special case is 0 which means to calculate globally.
  8. % <wantmeansubtract> (optional) is whether to subtract mean first. default: 1.
  9. % <wantgainsensitive> (optional) is whether to make the metric sensitive
  10. % to gain. default: 0.
  11. %
  12. % calculate Pearson's correlation coefficient between <x> and <y> for
  13. % each case oriented along <dim>. this is achieved by taking
  14. % the two vectors implicated in each case, subtracting each vector's mean,
  15. % normalizing each vector to have unit length, and then taking their dot product.
  16. %
  17. % a special case is when <wantmeansubtract>==0, in which case we omit the mean-
  18. % subtraction. this technically isn't Pearson's correlation any more.
  19. %
  20. % another special case is <wantgainsensitive>, in which case we take the two
  21. % vectors implicated in each case, subtract each vector's mean (if
  22. % <wantmeansubtract>), normalize the two vectors by the same constant
  23. % such that the larger of the two vectors has length 1, and then take the dot-
  24. % product. note that this metric still ranges from -1 to 1 and that any gain
  25. % mismatch can only hurt you (i.e. push you towards a correlation of 0).
  26. % (in retrospect, we now observe that it is probably better to use calccod.m in order
  27. % to be sensitive to gain, rather than to use <wantgainsensitive> in this function.)
  28. %
  29. % if there is no variance in one (or more) of the inputs, the
  30. % result is returned as NaN.
  31. %
  32. % NaNs are handled gracefully (a NaN causes that data point to be ignored).
  33. %
  34. % if there are no valid data points (i.e. all data points are
  35. % ignored because of NaNs), we return NaN for that case.
  36. %
  37. % we don't use caution with respect to cases involving low variance (see unitlength.m).
  38. %
  39. % be careful of the presumption that the mean and scale of <x> and <y> can be discounted.
  40. % if you do not want to perform this discounting, use calccod.m instead!
  41. %
  42. % note some weird cases:
  43. % calccorrelation([],[]) is []
  44. %
  45. % example:
  46. % calccorrelation(randn(1,100),randn(1,100))
  47. % input
  48. if ~exist('dim','var') || isempty(dim)
  49. if isvector(x) && size(x,1)==1
  50. dim = 2;
  51. else
  52. dim = 1;
  53. end
  54. end
  55. if ~exist('wantmeansubtract','var') || isempty(wantmeansubtract)
  56. wantmeansubtract = 1;
  57. end
  58. if ~exist('wantgainsensitive','var') || isempty(wantgainsensitive)
  59. wantgainsensitive = 0;
  60. end
  61. % handle weird case up front
  62. if isempty(x)
  63. f = [];
  64. return;
  65. end
  66. % handle 0 case
  67. if dim==0
  68. x = x(:);
  69. y = y(:);
  70. dim = 1;
  71. end
  72. % propagate NaNs (i.e. ignore invalid data points)
  73. x(isnan(y)) = NaN;
  74. y(isnan(x)) = NaN;
  75. % subtract mean
  76. if wantmeansubtract
  77. x = zeromean(x,dim);
  78. y = zeromean(y,dim);
  79. end
  80. % normalize x and y to be unit length
  81. [xnorm,xlen] = unitlength(x,dim,[],0);
  82. [ynorm,ylen] = unitlength(y,dim,[],0);
  83. % if gain sensitive, then do something tricky:
  84. if wantgainsensitive
  85. % case where x is the bigger vector
  86. temp = xnorm .* bsxfun(@(x,y) zerodiv(x,y,NaN,0),y,xlen);
  87. bad = all(isnan(temp),dim);
  88. f1 = nansum(temp,dim);
  89. f1(bad) = NaN;
  90. % case where y is the bigger vector
  91. temp = bsxfun(@(x,y) zerodiv(x,y,NaN,0),x,ylen) .* ynorm;
  92. bad = all(isnan(temp),dim);
  93. f2 = nansum(temp,dim);
  94. f2(bad) = NaN;
  95. % figure out which one to use
  96. f = f1; % this is when x is bigger
  97. f(xlen < ylen) = f2(xlen < ylen); % this is when y is bigger
  98. % if not, just take the dot product
  99. else
  100. temp = xnorm .* ynorm;
  101. % at this point, we want to sum along dim. however, if a case has all NaNs, we need the output to be NaN.
  102. bad = all(isnan(temp),dim); % record bad cases
  103. f = nansum(temp,dim); % do the summation
  104. f(bad) = NaN; % explicitly set bad cases to NaN
  105. end