reposit_bep032.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #!/usr/bin/env python3
  2. import itertools
  3. import pathlib
  4. import pandas as pd
  5. import numpy as np
  6. import scipy
  7. import spikeinterface.full as si
  8. import json
  9. import csv
  10. from scipy.io import loadmat
  11. import neo
  12. from bep032tools.generator.BEP032Generator import BEP032Data
  13. from bep032tools.generator.utils import save_json, save_tsv
  14. # b = BEP032Data()
  15. # b.generate_metadata_file_channels()
  16. class BIDSGenerator(BEP032Data):
  17. def __init__(self, *args, **kwargs):
  18. super().__init__(*args, **kwargs)
  19. self.probe_name, self.probe_sources = self.load_probe_source_data()
  20. def generate_metadata_file_channels(self, output):
  21. recording_folder = self.custom_metadata_sources['source_data_folder']
  22. neo_reader = neo.get_io(recording_folder)
  23. # recording = si.NixRecordingExtractor(recording_folder, streamid)
  24. neo_streams = neo_reader.header['signal_streams']
  25. neo_channels = neo_reader.header['signal_channels']
  26. df = pd.DataFrame.from_records(neo_channels)
  27. df.rename(columns={'name': 'channel_id', 'id':'channel_name', 'sampling_rate':'sampling_frequency', 'units':'unit'}, inplace=True)
  28. df['type'] = 'EXT'
  29. # ensure all channels have a corresponding contact
  30. channel_contacts = df['channel_id'].str.extract(r'(\d+)').astype(int)
  31. assert all(np.isin(channel_contacts, self.probe_sources['chanMap0ind']))
  32. df.set_index('channel_id', inplace=True)
  33. save_tsv(df, output)
  34. def generate_metadata_file_probes(self, output): # get_probes_files(path):
  35. # only a single neuropixel probe used in this experiment
  36. df = pd.DataFrame(columns=['probe_id', 'type'], data=[[f'probe-{self.probe_name}', 'Neuropixel']])
  37. df.set_index('probe_id', inplace=True)
  38. save_tsv(df, output)
  39. def generate_metadata_file_contacts(self, output): # get_contacts_files(probes_file):
  40. df = self.probe_sources.copy()
  41. df.rename(columns={'chanMap0ind':'contact_id','shankInd':'shank_id','chanMap':'1-indexed-contact_id', 'xcoords':'x','ycoords':'y'}, inplace=True)
  42. df.set_index('contact_id', inplace=True)
  43. save_tsv(df, output)
  44. def generate_metadata_file_dataset_description(self, output):
  45. mdict = {'author': ['Alice A', ' Bob B']}
  46. # Using a JSON string
  47. save_json(mdict, output)
  48. def generate_metadata_file_participants(self, output):
  49. df = pd.DataFrame(columns=['subject_id'], data=['sub-' + self.sub_id])
  50. df.set_index('subject_id', inplace=True)
  51. save_tsv(df, output)
  52. def generate_metadata_file_sessions(self, output):
  53. df = pd.DataFrame(columns=['session_id'], data=['ses-' + self.ses_id])
  54. df.set_index('session_id', inplace=True)
  55. save_tsv(df, output)
  56. def generate_metadata_file_ephys(self, output):
  57. mdict = {'PowerLineFrequency':60}
  58. save_json(mdict, output)
  59. def load_probe_source_data(self):
  60. sources_folder = self.custom_metadata_sources['source_data_folder'].parents[1]
  61. # Import .mat dataset
  62. mat_files = list(sources_folder.glob('*.mat'))
  63. #assert len(mat_files) == 1
  64. mat_file = mat_files[0]
  65. mat = scipy.io.loadmat(mat_file)
  66. df = pd.DataFrame()
  67. for key, values in mat.items():
  68. if key.startswith('__') or key == 'name':
  69. continue
  70. else:
  71. df[key] = values.flatten()
  72. if 'name' in mat:
  73. probe_name = mat['name'][0]
  74. else:
  75. probe_name = None
  76. return probe_name, df
  77. if __name__ == '__main__':
  78. code_path = pathlib.Path(__file__).parent
  79. for sub_path in code_path.glob('../../sourcedata/sub-*'):
  80. print(f'Processing `{sub_path}`')
  81. sub_id = sub_path.name.split('sub-')[-1]
  82. for ses_path in sub_path.glob('ses-*'):
  83. ses_id = ses_path.name.split('ses-')[-1]
  84. gen = BIDSGenerator(sub_id, ses_id,
  85. custom_metadata_source={'source_data_folder': ses_path})
  86. gen.basedir = code_path.parent
  87. gen.register_data_sources(ses_path)
  88. gen.generate_directory_structure()
  89. gen.organize_data_files(mode='link', autoconvert='nwb')
  90. gen.generate_all_metadata_files()
  91. # contacts_file = get_contacts_files(get_probes_files('neuropixPhase3A_kilosortChanMap.mat'))
  92. # channel_file = get_channel_file('/Users/killianrochet/Downloads/bep032-spikesorting-2/Cazette/dataset/sub-i/ses-123456/ephys/sub-i_ses-123456_task-r2g_run-001_ephys.nix',contacts_file)
  93. # print(contacts_file)
  94. # print(channel_file)