calccod.m 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. function f = calccod(x,y,dim,wantgain,wantmeansub)
  2. % function f = calccod(x,y,dim,wantgain,wantmeansub)
  3. %
  4. % <x>,<y> are matrices with the same dimensions
  5. % <dim> (optional) is the dimension of interest.
  6. % default to 2 if <x> is a (horizontal) vector and to 1 if not.
  7. % special case is 0 which means to calculate globally.
  8. % <wantgain> (optional) is
  9. % 0 means normal
  10. % 1 means allow a gain to be applied to each case of <x>
  11. % to minimize the squared error with respect to <y>.
  12. % in this case, there cannot be any NaNs in <x> or <y>.
  13. % 2 is like 1 except that gains are restricted to be non-negative.
  14. % so, if the gain that minimizes the squared error is negative,
  15. % we simply set the gain to be applied to be 0.
  16. % default: 0.
  17. % <wantmeansub> (optional) is
  18. % 0 means do not subtract any mean. this makes it such that
  19. % the variance quantification is relative to 0.
  20. % 1 means subtract the mean of each case of <y> from both
  21. % <x> and <y> before performing the calculation. this makes
  22. % it such that the variance quantification
  23. % is relative to the mean of each case of <y>.
  24. % note that <wantgain> occurs before <wantmeansub>.
  25. % default: 1.
  26. %
  27. % calculate the coefficient of determination (R^2) indicating
  28. % the percent variance in <y> that is explained by <x>. this is achieved
  29. % by calculating 100*(1 - sum((y-x).^2) / sum(y.^2)). note that
  30. % by default, we subtract the mean of each case of <y> from both <x>
  31. % and <y> before proceeding with the calculation.
  32. %
  33. % the quantity is at most 100 but can be 0 or negative (unbounded).
  34. % note that this metric is sensitive to DC and scale and is not symmetric
  35. % (i.e. if you swap <x> and <y>, you may obtain different results).
  36. % it is therefore fundamentally different than Pearson's correlation
  37. % coefficient (see calccorrelation.m).
  38. %
  39. % NaNs are handled gracefully (a NaN causes that data point to be ignored).
  40. %
  41. % if there are no valid data points (i.e. all data points are
  42. % ignored because of NaNs), we return NaN for that case.
  43. %
  44. % note some weird cases:
  45. % calccod([],[]) is []
  46. %
  47. % history:
  48. % 2013/08/18 - fix pernicious case where <x> is all zeros and <wantgain> is 1 or 2.
  49. % 2010/11/28 - add <wantgain>==2 case
  50. % 2010/11/23 - changed the output range to percentages. thus, the range is (-Inf,100].
  51. % also, we removed the <wantr> input since it was dumb.
  52. %
  53. % example:
  54. % x = randn(1,100);
  55. % calccod(x,x+0.1*randn(1,100))
  56. % input
  57. if ~exist('dim','var') || isempty(dim)
  58. if isvector(x) && size(x,1)==1
  59. dim = 2;
  60. else
  61. dim = 1;
  62. end
  63. end
  64. if ~exist('wantgain','var') || isempty(wantgain)
  65. wantgain = 0;
  66. end
  67. if ~exist('wantmeansub','var') || isempty(wantmeansub)
  68. wantmeansub = 1;
  69. end
  70. % handle weird case up front
  71. if isempty(x)
  72. f = [];
  73. return;
  74. end
  75. % handle 0 case
  76. if dim==0
  77. x = x(:);
  78. y = y(:);
  79. dim = 1;
  80. end
  81. % handle gain
  82. if wantgain
  83. % to get the residuals, we want to do something like y-x*inv(x'*x)*x'*y
  84. temp = 1./dot(x,x,dim) .* dot(x,y,dim);
  85. if wantgain==2
  86. temp(temp < 0) = 0; % if the gain was going to be negative, rectify it to 0.
  87. end
  88. x = bsxfun(@times,x,temp);
  89. end
  90. % propagate NaNs (i.e. ignore invalid data points)
  91. x(isnan(y)) = NaN;
  92. y(isnan(x)) = NaN;
  93. % handle mean subtraction
  94. if wantmeansub
  95. mn = nanmean(y,dim);
  96. y = bsxfun(@minus,y,mn);
  97. x = bsxfun(@minus,x,mn);
  98. end
  99. % finally, compute it
  100. f = 100*(1 - zerodiv(nansum((y-x).^2,dim),nansum(y.^2,dim),NaN,0));
  101. % JUNK:
  102. %
  103. % % <wantr> (optional) is whether to apply signedarraypower(f,0.5)
  104. % % at the very end, giving something like a correlation coefficient (r).
  105. % % default: 1.
  106. %
  107. % if ~exist('wantr','var') || isempty(wantr)
  108. % wantr = 1;
  109. % end
  110. %
  111. % if wantr
  112. % f = signedarraypower(f,0.5);
  113. % end