occupancyBasedMeasure.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import numpy as np
  2. from collections import Counter
  3. from pyemd import emd
  4. def calcOccupancyDistribution(swcList, voxelSize):
  5. """
  6. Returns the distribution of the sum of voxel occupancies across swcs in swcList.
  7. Voxel occupancy of a voxel is 1 if an swc has an node in the voxel, otherwise 0.
  8. :param swcList: list of valid SWC files on the file system
  9. :param voxelSize: float, voxel size to discretize space.
  10. :return: dict with voxel occupancy and its normalized frequency as key-value pairs
  11. """
  12. voxels = []
  13. for swc in swcList:
  14. aPts = np.loadtxt(swc)[:, 2:5]
  15. aVox = np.array(aPts / voxelSize, np.int32)
  16. aVoxSet = set(map(tuple, aVox))
  17. voxels.extend(list(aVoxSet))
  18. voxelCounter = Counter(voxels)
  19. counts = voxelCounter.values()
  20. bins = np.arange(1, len(swcList) + 2) - 0.5
  21. hist, bins = np.histogram(counts, bins)
  22. histWeighted = hist * (bins[:-1] + 0.5)
  23. histNormed = histWeighted / float(sum(histWeighted))
  24. return dict(zip(np.arange(1, len(swcList) + 1), histNormed))
  25. def occupancyEMD(swcList, voxelSize):
  26. """
  27. Calculate the EMD between the occupancy distributions (see calcOccupancyDistribution above) of
  28. swcList and that of perfect overlap (1 at len(swcList) and zero elsewhere)
  29. :param swcList: list of valid SWC files on the file system
  30. :param voxelSize: float, voxel size for discretizing space.
  31. :return: float, emd value
  32. """
  33. occupancyDistributionDict = calcOccupancyDistribution(swcList, voxelSize)
  34. bins = np.arange(1, len(swcList) + 1)
  35. occupancyDistribution = [occupancyDistributionDict[x] for x in bins]
  36. perfectOverlapDist = np.zeros(bins.shape)
  37. perfectOverlapDist[-1] = 1
  38. binsRow = bins.reshape((1, bins.shape[0]))
  39. distMetric = np.dot(np.ones((bins.shape[0], 1)), binsRow) - binsRow.T
  40. emd_val = emd(np.asarray(occupancyDistribution, dtype=np.float64),
  41. np.asarray(perfectOverlapDist, dtype=np.float64),
  42. np.asarray(distMetric, dtype=np.float64))
  43. return emd_val