corrstats.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. """
  2. Functions for calculating the statistical significant differences between two dependent or independent correlation
  3. coefficients.
  4. The Fisher and Steiger method is adopted from the R package http://personality-project.org/r/html/paired.r.html
  5. and is described in detail in the book 'Statistical Methods for Psychology'
  6. The Zou method is adopted from http://seriousstats.wordpress.com/2012/02/05/comparing-correlations/
  7. Credit goes to the authors of above mentioned packages!
  8. Author: Philipp Singer (www.philippsinger.info)
  9. """
  10. from __future__ import division
  11. __author__ = 'psinger'
  12. import numpy as np
  13. from scipy.stats import t, norm
  14. from math import atanh, pow
  15. from numpy import tanh
  16. def rz_ci(r, n, conf_level = 0.95):
  17. zr_se = pow(1/(n - 3), .5)
  18. moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se
  19. zu = atanh(r) + moe
  20. zl = atanh(r) - moe
  21. return tanh((zl, zu))
  22. def rho_rxy_rxz(rxy, rxz, ryz):
  23. num = (ryz-1/2.*rxy*rxz)*(1-pow(rxy,2)-pow(rxz,2)-pow(ryz,2))+pow(ryz,3)
  24. den = (1 - pow(rxy,2)) * (1 - pow(rxz,2))
  25. return num/float(den)
  26. def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'):
  27. """
  28. Calculates the statistic significance between two dependent correlation coefficients
  29. @param xy: correlation coefficient between x and y
  30. @param xz: correlation coefficient between x and z
  31. @param yz: correlation coefficient between y and z
  32. @param n: number of elements in x, y and z
  33. @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
  34. @param conf_level: confidence level, only works for 'zou' method
  35. @param method: defines the method uses, 'steiger' or 'zou'
  36. @return: t and p-val
  37. """
  38. if method == 'steiger':
  39. d = xy - xz
  40. determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
  41. av = (xy + xz)/2
  42. cube = (1 - yz) * (1 - yz) * (1 - yz)
  43. t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube)))
  44. p = 1 - t.cdf(abs(t2), n - 3)
  45. if twotailed:
  46. p *= 2
  47. return t2, p
  48. elif method == 'zou':
  49. L1 = rz_ci(xy, n, conf_level=conf_level)[0]
  50. U1 = rz_ci(xy, n, conf_level=conf_level)[1]
  51. L2 = rz_ci(xz, n, conf_level=conf_level)[0]
  52. U2 = rz_ci(xz, n, conf_level=conf_level)[1]
  53. rho_r12_r13 = rho_rxy_rxz(xy, xz, yz)
  54. lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
  55. upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
  56. return lower, upper
  57. else:
  58. raise Exception('Wrong method!')
  59. def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'):
  60. """
  61. Calculates the statistic significance between two independent correlation coefficients
  62. @param xy: correlation coefficient between x and y
  63. @param xz: correlation coefficient between a and b
  64. @param n: number of elements in xy
  65. @param n2: number of elements in ab (if distinct from n)
  66. @param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method
  67. @param conf_level: confidence level, only works for 'zou' method
  68. @param method: defines the method uses, 'fisher' or 'zou'
  69. @return: z and p-val
  70. """
  71. if method == 'fisher':
  72. xy_z = 0.5 * np.log((1 + xy)/(1 - xy))
  73. ab_z = 0.5 * np.log((1 + ab)/(1 - ab))
  74. if n2 is None:
  75. n2 = n
  76. se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3))
  77. diff = xy_z - ab_z
  78. z = abs(diff / se_diff_r)
  79. p = (1 - norm.cdf(z))
  80. if twotailed:
  81. p *= 2
  82. return z, p
  83. elif method == 'zou':
  84. L1 = rz_ci(xy, n, conf_level=conf_level)[0]
  85. U1 = rz_ci(xy, n, conf_level=conf_level)[1]
  86. L2 = rz_ci(ab, n2, conf_level=conf_level)[0]
  87. U2 = rz_ci(ab, n2, conf_level=conf_level)[1]
  88. lower = xy - ab - pow((pow((xy - L1), 2) + pow((U2 - ab), 2)), 0.5)
  89. upper = xy - ab + pow((pow((U1 - xy), 2) + pow((ab - L2), 2)), 0.5)
  90. return lower, upper
  91. else:
  92. raise Exception('Wrong method!')
  93. print(dependent_corr(.40, .50, .10, 103, method='steiger'))
  94. print(independent_corr(0.5 , 0.6, 103, 103, method='fisher'))
  95. #print dependent_corr(.396, .179, .088, 200, method='zou')
  96. #print independent_corr(.560, .588, 100, 353, method='zou')