dba.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. '''
  2. /*******************************************************************************
  3. * Copyright (C) 2018 Francois Petitjean
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, version 3 of the License.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. ******************************************************************************/
  17. '''
  18. from __future__ import division
  19. import numpy as np
  20. import matplotlib.pyplot as plt
  21. from functools import reduce
  22. __author__ ="Francois Petitjean"
  23. def performDBA(series, n_iterations=10):
  24. n_series = len(series)
  25. max_length = reduce(max, map(len, series))
  26. cost_mat = np.zeros((max_length, max_length))
  27. delta_mat = np.zeros((max_length, max_length))
  28. path_mat = np.zeros((max_length, max_length), dtype=np.int8)
  29. medoid_ind = approximate_medoid_index(series,cost_mat,delta_mat)
  30. center = series[medoid_ind]
  31. for i in range(0,n_iterations):
  32. center = DBA_update(center, series, cost_mat, path_mat, delta_mat)
  33. return center
  34. def approximate_medoid_index(series,cost_mat,delta_mat):
  35. if len(series)<=50:
  36. indices = range(0,len(series))
  37. else:
  38. indices = np.random.choice(range(0,len(series)),50,replace=False)
  39. medoid_ind = -1
  40. best_ss = 1e20
  41. for index_candidate in indices:
  42. candidate = series[index_candidate]
  43. ss = sum_of_squares(candidate,series,cost_mat,delta_mat)
  44. if(medoid_ind==-1 or ss<best_ss):
  45. best_ss = ss
  46. medoid_ind = index_candidate
  47. return medoid_ind
  48. def sum_of_squares(s,series,cost_mat,delta_mat):
  49. return sum(map(lambda t:squared_DTW(s,t,cost_mat,delta_mat),series))
  50. def DTW(s,t,cost_mat,delta_mat):
  51. return np.sqrt(squared_DTW(s,t,cost_mat,delta_mat))
  52. def squared_DTW(s,t,cost_mat,delta_mat):
  53. s_len = len(s)
  54. t_len = len(t)
  55. length = len(s)
  56. fill_delta_mat_dtw(s, t, delta_mat)
  57. cost_mat[0, 0] = delta_mat[0, 0]
  58. for i in range(1, s_len):
  59. cost_mat[i, 0] = cost_mat[i-1, 0]+delta_mat[i, 0]
  60. for j in range(1, t_len):
  61. cost_mat[0, j] = cost_mat[0, j-1]+delta_mat[0, j]
  62. for i in range(1, s_len):
  63. for j in range(1, t_len):
  64. diag,left,top =cost_mat[i-1, j-1], cost_mat[i, j-1], cost_mat[i-1, j]
  65. if(diag <=left):
  66. if(diag<=top):
  67. res = diag
  68. else:
  69. res = top
  70. else:
  71. if(left<=top):
  72. res = left
  73. else:
  74. res = top
  75. cost_mat[i, j] = res+delta_mat[i, j]
  76. return cost_mat[s_len-1,t_len-1]
  77. def fill_delta_mat_dtw(center, s, delta_mat):
  78. slim = delta_mat[:len(center),:len(s)]
  79. np.subtract.outer(center, s,out=slim)
  80. np.square(slim, out=slim)
  81. def DBA_update(center, series, cost_mat, path_mat, delta_mat):
  82. options_argmin = [(-1, -1), (0, -1), (-1, 0)]
  83. updated_center = np.zeros(center.shape)
  84. n_elements = np.array(np.zeros(center.shape), dtype=int)
  85. center_length = len(center)
  86. for s in series:
  87. s_len = len(s)
  88. fill_delta_mat_dtw(center, s, delta_mat)
  89. cost_mat[0, 0] = delta_mat[0, 0]
  90. path_mat[0, 0] = -1
  91. for i in range(1, center_length):
  92. cost_mat[i, 0] = cost_mat[i-1, 0]+delta_mat[i, 0]
  93. path_mat[i, 0] = 2
  94. for j in range(1, s_len):
  95. cost_mat[0, j] = cost_mat[0, j-1]+delta_mat[0, j]
  96. path_mat[0, j] = 1
  97. for i in range(1, center_length):
  98. for j in range(1, s_len):
  99. diag,left,top =cost_mat[i-1, j-1], cost_mat[i, j-1], cost_mat[i-1, j]
  100. if(diag <=left):
  101. if(diag<=top):
  102. res = diag
  103. path_mat[i,j] = 0
  104. else:
  105. res = top
  106. path_mat[i,j] = 2
  107. else:
  108. if(left<=top):
  109. res = left
  110. path_mat[i,j] = 1
  111. else:
  112. res = top
  113. path_mat[i,j] = 2
  114. cost_mat[i, j] = res+delta_mat[i, j]
  115. i = center_length-1
  116. j = s_len-1
  117. while(path_mat[i, j] != -1):
  118. updated_center[i] += s[j]
  119. n_elements[i] += 1
  120. move = options_argmin[path_mat[i, j]]
  121. i += move[0]
  122. j += move[1]
  123. assert(i == 0 and j == 0)
  124. updated_center[i] += s[j]
  125. n_elements[i] += 1
  126. return np.divide(updated_center, n_elements)
  127. # def main():
  128. #generating synthetic data
  129. n_series = 20
  130. length = 200
  131. series = list()
  132. padding_length=30
  133. indices = range(0, length-padding_length)
  134. main_profile_gen = np.array([np.sin(2*np.pi*j/len(indices)) for j in indices])
  135. randomizer = lambda j:np.random.normal(j,0.02)
  136. randomizer_fun = np.vectorize(randomizer)
  137. for i in range(0,n_series):
  138. n_pad_left = np.random.randint(0,padding_length)
  139. #adding zero at the start or at the end to shif the profile
  140. series_i = np.pad(main_profile_gen,(n_pad_left,padding_length-n_pad_left),mode='constant',constant_values=0)
  141. #chop some of the end to prove it can work with multiple lengths
  142. l = np.random.randint(length-20,length+1)
  143. series_i = series_i[:l]
  144. #randomize a bit
  145. series_i = randomizer_fun(series_i)
  146. series.append(series_i)
  147. series = np.array(series)
  148. #plotting the synthetic data
  149. for s in series:
  150. plt.plot(range(0,len(s)), s)
  151. plt.draw()
  152. #calculating average series with DBA
  153. # average_series = performDBA(series)
  154. xxx
  155. #plotting the average series
  156. plt.figure()
  157. plt.plot(range(0,len(average_series)), average_series)
  158. plt.show()
  159. # if __name__== "__main__":
  160. # main()
  161. rows = np.argwhere(np.max(aa, axis=1) >35)
  162. psth1 = np.delete(psth_tot[0, :, :, 20], rows, axis=0)
  163. series1 = list()
  164. for ii in range(psth1.shape[0]):
  165. series1.append(psth1[ii])
  166. series1 = np.array(series1)
  167. dba = performDBA(series1)
  168. plt.figure()
  169. plt.clf()
  170. plt.plot(psth1.T, 'C0', alpha=0.5)
  171. plt.plot(dba, 'k')
  172. plt.draw()