123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165 |
- """
- Downloaded from: https://gist.github.com/alimuldal/fbb19b73fa25423f02e8
- """
- import numpy as np
- from scipy import stats
- from itertools import combinations
- from statsmodels.stats.multitest import multipletests
- from statsmodels.stats.libqsturng import psturng
- import warnings
- from collections import namedtuple
- def kw_dunn(groups, pairs=None, alpha=0.05, method='bonf'):
- return KWDunn.compute(groups=groups,
- pairs=pairs,
- alpha=alpha,
- method=method)
- def chisqprob(chisq, df):
- """a helper function to deal with recent SciPy versions."""
- return stats.chi2.sf(chisq, df)
- Dunn = namedtuple('Dunn',
- ["Z", "p_corrected", "alpha", "reject"]
- )
- class KWDunn(namedtuple('_KWDunn',
- [
- "H",
- "p_omnibus",
- "pairwise",
- ]
- )):
- @classmethod
- def compute(cls, groups, pairs=None, alpha=0.05, method='bonf'):
- H, p_omni, Z_pair, p_pair, rej = kw_dunn_raw(groups, to_compare=pairs,
- alpha=alpha, method=method)
- pairwise = {}
- if (pairs is None) or (len(pairs) == 0):
- pass
- else:
- for pair, Z, p, r in zip(pairs, Z_pair, p_pair, rej):
- pairwise[pair] = Dunn(Z, p, alpha, r)
- return cls(H, p_omni, pairwise)
- def kw_dunn_raw(groups, to_compare=None, alpha=0.05, method='bonf'):
- """
- Kruskal-Wallis 1-way ANOVA with Dunn's multiple comparison test
- Arguments:
- ---------------
- groups: sequence
- arrays corresponding to k mutually independent samples from
- continuous populations
- to_compare: sequence
- tuples specifying the indices of pairs of groups to compare, e.g.
- [(0, 1), (0, 2)] would compare group 0 with 1 & 2. by default, all
- possible pairwise comparisons between groups are performed.
- alpha: float
- family-wise error rate used for correcting for multiple comparisons
- (see statsmodels.stats.multitest.multipletests for details)
- method: string
- method used to adjust p-values to account for multiple corrections (see
- statsmodels.stats.multitest.multipletests for options)
- Returns:
- ---------------
- H: float
- Kruskal-Wallis H-statistic
- p_omnibus: float
- p-value corresponding to the global null hypothesis that the medians of
- the groups are all equal
- Z_pairs: float array
- Z-scores computed for the absolute difference in mean ranks for each
- pairwise comparison
- p_corrected: float array
- corrected p-values for each pairwise comparison, corresponding to the
- null hypothesis that the pair of groups has equal medians. note that
- these are only meaningful if the global null hypothesis is rejected.
- reject: bool array
- True for pairs where the null hypothesis can be rejected for the given
- alpha
- Reference:
- ---------------
- Gibbons, J. D., & Chakraborti, S. (2011). Nonparametric Statistical
- Inference (5th ed., pp. 353-357). Boca Raton, FL: Chapman & Hall.
- """
- # omnibus test (K-W ANOVA)
- # -------------------------------------------------------------------------
- groups = [np.array(gg) for gg in groups]
- k = len(groups)
- n = np.array([len(gg) for gg in groups])
- if np.any(n < 5):
- warnings.warn("Sample sizes < 5 are not recommended (K-W test assumes "
- "a chi square distribution)")
- allgroups = np.concatenate(groups)
- N = len(allgroups)
- ranked = stats.rankdata(allgroups)
- # correction factor for ties
- T = stats.tiecorrect(ranked)
- if T == 0:
- raise ValueError('All numbers are identical in kruskal')
- # sum of ranks for each group
- j = np.insert(np.cumsum(n), 0, 0)
- R = np.empty(k, dtype=np.float)
- for ii in range(k):
- R[ii] = ranked[j[ii]:j[ii + 1]].sum()
- # the Kruskal-Wallis H-statistic
- H = (12. / (N * (N + 1.))) * ((R ** 2.) / n).sum() - 3 * (N + 1)
- # apply correction factor for ties
- H /= T
- df_omnibus = k - 1
- p_omnibus = chisqprob(H, df_omnibus)
- # multiple comparisons
- # -------------------------------------------------------------------------
- # by default we compare every possible pair of groups
- if to_compare is None:
- to_compare = tuple(combinations(range(k), 2))
- ncomp = len(to_compare)
- Z_pairs = np.empty(ncomp, dtype=np.float)
- p_uncorrected = np.empty(ncomp, dtype=np.float)
- Rmean = R / n
- for pp, (ii, jj) in enumerate(to_compare):
- # standardized score
- Zij = (np.abs(Rmean[ii] - Rmean[jj]) /
- np.sqrt((1. / 12.) * N * (N + 1) * (1. / n[ii] + 1. / n[jj])))
- Z_pairs[pp] = Zij
- # corresponding p-values obtained from upper quantiles of the standard
- # normal distribution
- p_uncorrected = stats.norm.sf(Z_pairs) * 2.
- # correction for multiple comparisons
- reject, p_corrected, alphac_sidak, alphac_bonf = multipletests(
- p_uncorrected, method=method
- )
- return H, p_omnibus, Z_pairs, p_corrected, reject
|