create_rawdata.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. import itertools
  2. import pathlib
  3. import pandas as pd
  4. import numpy as np
  5. import scipy
  6. import spikeinterface.full as si
  7. import json
  8. import csv
  9. from scipy.io import loadmat
  10. import neo
  11. from bep032tools.generator.BEP032Generator import BEP032Data
  12. from bep032tools.generator.utils import save_json, save_tsv
  13. # b = BEP032Data()
  14. # b.generate_metadata_file_channels()
  15. class BIDSGenerator(BEP032Data):
  16. def __init__(self, *args, **kwargs):
  17. super().__init__(*args, **kwargs)
  18. self.probe_name, self.probe_sources = self.load_probe_source_data()
  19. def generate_metadata_file_channels(self, output):
  20. recording_folder = self.custom_metadata_sources['source_data_folder']
  21. neo_reader = neo.get_io(recording_folder)
  22. # recording = si.NixRecordingExtractor(recording_folder, streamid)
  23. neo_streams = neo_reader.header['signal_streams']
  24. neo_channels = neo_reader.header['signal_channels']
  25. df = pd.DataFrame.from_records(neo_channels)
  26. df.rename(columns={'name': 'channel_id', 'id':'channel_name', 'sampling_rate':'sampling_frequency', 'units':'unit'}, inplace=True)
  27. df['type'] = 'EXT'
  28. # ensure all channels have a corresponding contact
  29. channel_contacts = df['channel_id'].str.extract(r'(\d+)').astype(int)
  30. assert all(np.isin(channel_contacts, self.probe_sources['chanMap0ind']))
  31. df.set_index('channel_id', inplace=True)
  32. save_tsv(df, output)
  33. def generate_metadata_file_probes(self, output): # get_probes_files(path):
  34. # only a single neuropixel probe used in this experiment
  35. df = pd.DataFrame(columns=['probe_id', 'type'], data=[[f'probe-{self.probe_name}', 'Neuropixel']])
  36. df.set_index('probe_id', inplace=True)
  37. save_tsv(df, output)
  38. def generate_metadata_file_contacts(self, output): # get_contacts_files(probes_file):
  39. df = self.probe_sources.copy()
  40. df.rename(columns={'chanMap0ind':'contact_id','shankInd':'shank_id','chanMap':'1-indexed-contact_id', 'xcoords':'x','ycoords':'y'}, inplace=True)
  41. df.set_index('contact_id', inplace=True)
  42. save_tsv(df, output)
  43. def generate_metadata_file_dataset_description(self, output):
  44. mdict = {'author': ['Alice A', ' Bob B']}
  45. # Using a JSON string
  46. save_json(mdict, output)
  47. def generate_metadata_file_participants(self, output):
  48. df = pd.DataFrame(columns=['subject_id'], data=['sub-' + self.sub_id])
  49. df.set_index('subject_id', inplace=True)
  50. save_tsv(df, output)
  51. def generate_metadata_file_sessions(self, output):
  52. df = pd.DataFrame(columns=['session_id'], data=['ses-' + self.ses_id])
  53. df.set_index('session_id', inplace=True)
  54. save_tsv(df, output)
  55. def generate_metadata_file_ephys(self, output):
  56. mdict = {'PowerLineFrequency':60}
  57. save_json(mdict, output)
  58. def load_probe_source_data(self):
  59. sources_folder = self.custom_metadata_sources['source_data_folder'].parents[1]
  60. # Import .mat dataset
  61. mat_files = list(sources_folder.glob('*.mat'))
  62. assert len(mat_files) == 1
  63. mat_file = mat_files[0]
  64. mat = scipy.io.loadmat(mat_file)
  65. df = pd.DataFrame()
  66. for key, values in mat.items():
  67. if key.startswith('__') or key == 'name':
  68. continue
  69. else:
  70. df[key] = values.flatten()
  71. if 'name' in mat:
  72. probe_name = mat['name'][0]
  73. else:
  74. probe_name = None
  75. return probe_name, df
  76. if __name__ == '__main__':
  77. for sub_path in pathlib.Path('.').glob('../sourcedata/sub-*'):
  78. sub_id = sub_path.name.split('sub-')[-1]
  79. for ses_path in sub_path.glob('ses-*'):
  80. ses_id = ses_path.name.split('ses-')[-1]
  81. gen = BIDSGenerator(sub_id, ses_id,
  82. custom_metadata_source={'source_data_folder': ses_path})
  83. gen.basedir = pathlib.Path('../rawdata')
  84. gen.register_data_sources(ses_path)
  85. gen.generate_directory_structure()
  86. # gen.organize_data_files(mode='link', autoconvert='nwb')
  87. gen.generate_all_metadata_files()
  88. # contacts_file = get_contacts_files(get_probes_files('neuropixPhase3A_kilosortChanMap.mat'))
  89. # channel_file = get_channel_file('/Users/killianrochet/Downloads/bep032-spikesorting-2/Cazette/dataset/sub-i/ses-123456/ephys/sub-i_ses-123456_task-r2g_run-001_ephys.nix',contacts_file)
  90. # print(contacts_file)
  91. # print(channel_file)