Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

cleaning.py~ 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. import os
  2. from os import path
  3. from glob import glob
  4. import numpy as np
  5. import pandas as pd
  6. import h5py
  7. import datashader as ds
  8. import datashader.transfer_functions as tf
  9. # from plots import plot_occupancy_projections
  10. HDF_FORMAT = 'fixed'
  11. def strip_sessions(pos):
  12. """Remove the beginning and end of each session's tracking data, using its position XYZ info."""
  13. # Get Only the data after entry into the arena for each session.
  14. def get_first_index(pos):
  15. indices = np.where(
  16. (pos.Y < .2) # Rat is low.
  17. & (pos.Z.abs() < .2) # Rat is inside the Arena.
  18. )
  19. return indices[0][0] if len(indices[0]) > 0 else 0
  20. def get_last_index(pos):
  21. indices = np.where(
  22. (.20 < np.abs(pos.X)) # Rat is off board
  23. & (np.abs(pos.X) < .45) # Rat is still close to board, though.
  24. & (pos.Y > .06) # Rat is above the floor, not sitting on it.
  25. & (pos.Y < .20) # Rat is below the board.
  26. & (np.sign(pos.X) == np.sign(pos.X.diff())) # Rat is moving away from the board.
  27. & (np.abs(pos.X.diff() / pos.Time.diff()) > .08) # Rat is moving away from the board at speed.
  28. & ((pos.Y.diff() / pos.Time.diff()) < -.20)
  29. )
  30. return indices[0][0] if len(indices[0]) > 0 else 0
  31. get_window = lambda df: df.iloc[(get_first_index(df) + 300):get_last_index(df) + 28 ]
  32. pos = pos.groupby('Session_id').apply(get_window)
  33. pos.reset_index('Session_id', inplace=True, drop=True)
  34. return pos
  35. def filter_sessions(pos):
  36. """Remove Bad Sessions"""
  37. pos = pos.groupby('Session_id').filter(lambda df: (np.abs(df.X) > .38).any() == False )
  38. pos = pos.groupby('Session_id').filter(lambda df: (np.abs(df.Y) > .49).any() == False )
  39. return pos.copy()
  40. def add_relative_time(pos):
  41. # Get Relative Time in Session
  42. pos = pos.copy()
  43. normalize = lambda t: (t - t.min()) / (t.max() - t.min())
  44. pos['TimeNormed'] = pos.groupby('Session_id').Time.apply(normalize)
  45. return pos
  46. def smooth_trajectories(df, **rolling_kwargs):
  47. """Transform Data (Velocity and Acceleration)"""
  48. pos_s = df.copy()
  49. g = pos_s.groupby('Session_id')
  50. for dim in 'XYZ':
  51. pos_s[dim] = g[dim].rolling(**rolling_kwargs).mean().values
  52. pos_s['Vel' + dim] = g.apply(lambda df: df[dim].diff() / df.Time.diff()).values
  53. pos_s['Acc' + dim] = g.apply(lambda df: df['Vel' + dim].diff() / df.Time.diff()).values
  54. return pos_s
  55. def get_jump_trajectory(df, search_size=100, win_size=40):
  56. def get_end_of_fall(df):
  57. end = np.where(df.VelY == np.min(df.VelY.iloc[-search_size:]))[0][0]
  58. return df.iloc[(end - win_size):end]
  59. falls = df.copy().groupby('Session_id').apply(get_end_of_fall)
  60. falls.reset_index('Session_id', drop=True, inplace=True)
  61. return falls
  62. def get_jump_point(df, session_filter='VelY < -.3'):
  63. """Get last frame of falling: The minimum VelY"""
  64. jumps = df.groupby('Session_id').apply(lambda df: df[df.VelY == df.VelY.min()])
  65. jumps.reset_index('Session_id', inplace=True, drop=True)
  66. jumps = jumps.query(session_filter)
  67. return jumps.copy()
  68. if __name__ == '__main__':
  69. # Load Data
  70. fname = '../data/VRCliff_AllData.h5'
  71. if not path.exists(fname):
  72. os.system("python merge_sessions.py")
  73. pos = pd.read_hdf(fname, '/Position').dropna()
  74. pos.reset_index(inplace=True, drop=True) # Because original file put frame as index.
  75. sessions = pd.read_hdf(fname, '/Session')
  76. sessions.set_index('id', inplace=True)
  77. # Correct for different Board Height
  78. sessions['BoardCorrection'] = sessions.BoardHeight.apply(lambda x: -.01 if x > .15 else 0)
  79. boardcorr = pd.merge(pos[['Session_id']], sessions[['BoardCorrection']], left_on='Session_id', right_index=True,)
  80. pos.Y.update(pos.Y + boardcorr.BoardCorrection)
  81. # Process, Clean, Filter Data
  82. pos = strip_sessions(pos)
  83. pos = filter_sessions(pos)
  84. pos = add_relative_time(pos)
  85. pos_s = smooth_trajectories(pos, window=15, center=True, min_periods=9, win_type='hamming')
  86. falls = get_jump_trajectory(pos_s, search_size=100, win_size=40)
  87. jumps = get_jump_point(falls, session_filter='VelY < -.3')
  88. # # Save Cleaned Data to a new file.
  89. # savename = '../data/VRCliff_AllData_cleaned.h5'
  90. # pos.to_hdf(savename, '/Position', format='table')
  91. # sessions.to_hdf(savename, '/Session', format='table')
  92. # ###########
  93. # Correct_for Left Cliff Condition (onlyh in RealCliff=Left condition)
  94. # dfsub = sessions.query('CliffType == "Real" & CliffSide == "L"')
  95. # swap_lr = lambda x: {'L': 'R', 'R': 'L', np.nan: np.nan}[x]
  96. # sessions.loc[dfsub.index, 'JumpSide'] = dfsub.JumpSide.apply(swap_lr)
  97. # todo: Move Left Cliff correction to earlier in the process.
  98. # Evaluate Whether jump was on correct side or not
  99. # sessions['JumpRight'] = ((sessions.JumpSide == 'R') - 0.5) * 2
  100. # sessions['Correct'] = sessions.JumpSide != sessions.CliffSide
  101. # #########
  102. # falls.to_hdf(fname, '/Falls', format=HDF_FORMAT)
  103. # jumps.to_hdf(fname, '/Jumps', format=HDF_FORMAT)