create_measurement_list_bente.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. '''
  2. Creates .lst file for single wavelength TIFF files (pre-calculated RATIO) in the format used in Trondheim
  3. Author: Giovanni, Dec 2021, based on template in VIEW folder by Ajay
  4. Expected data structure:
  5. In the folder "01_DATA", each animal has a folder, e.g. "190815_h2_El",
  6. and located in that folder are all .tif files for that animal
  7. There is a sister folder "02_LISTS"
  8. Output:
  9. In the folder "02_LISTS":
  10. There will be a file Animal.lst.xlsx, e.g. "190815_h2_El.lst.xlsx"
  11. That file contains one line for each measurement.
  12. What to do next?
  13. In this file, change values that are global
  14. or insert a function that can extract odor name or concentration name from somewhere
  15. In the Animal.lst.xlsx file, correct/complete entries (e.g. odor names, odor concentrations)
  16. Make sure stimulus timing information is correct
  17. Why do I need a .lst.xlsx file?
  18. Load measurements in pyVIEW using this .lst file, so that stimulus information is correct
  19. For off-line analysis, information is taken from this file.
  20. Good to know:
  21. Information, where possible, is taken from the OME header of the incoming tif file.
  22. If that information is wrong, incomplete, or else, modify the code in:
  23. importers.py:P1DualWavelengthTIFSingleFileImporter:parse_metadata
  24. '''
  25. from view.python_core.measurement_list import MeasurementList
  26. from view.python_core.measurement_list.importers import get_importer_class
  27. from view.python_core.flags import FlagsManager
  28. from collections import OrderedDict
  29. import pandas as pd
  30. import logging
  31. import pathlib as pl
  32. logging.basicConfig(level=logging.INFO)
  33. # ------------------- Some parameters about experimental setup, data structure and output file type --------------------
  34. # 3 for single wavelength Till Photonics Measurements
  35. # 4 for two wavelength Till Photonics Measurements
  36. # 20 for Zeiss Confocal Measurements
  37. LE_loadExp = 33 #for Bente, 33 or 35 both work!
  38. # Mother of all Folders of your dataset
  39. # On Windows, if you copy paths from the file explorer, make sure the string below is always of the form r"......"
  40. STG_MotherOfAllFolders = r"/Users/galizia/Nextcloud/VTK_2021/Bente_Test_2021"
  41. # path of the "Data" folder in VIEW organization containing the data
  42. # On Windows, if you copy paths from the file explorer, make sure the string below is always of the form r"......"
  43. STG_Datapath = r""
  44. # path of the "Lists" folder in VIEW organization containing the list files
  45. # On Windows, if you copy paths from the file explorer, make sure the string below is always of the form r"......"
  46. STG_OdorInfoPath = r""
  47. # Choose measurement list output extension among ".lst", ".lst.xlsx", ".settings.xlsx"
  48. # VIEW does not support writing .xls list files anymore (nonetheless, it can read them and revise/update them to .xlsx)
  49. measurement_output_extension = ".lst.xlsx"
  50. # ------------------- A dictionary containing default values for metadata.----------------------------------------------
  51. # ------------------- Only metadata included in this dictionary will be written ----------------------------------------
  52. # ----Note that columns of the output measeurement list files will have the same order as below.------------------------
  53. default_values = OrderedDict()
  54. default_values['Measu'] = 0 # unique identifier for each line, corresponds to item in TILL photonics log file
  55. default_values['Label'] = "none"
  56. default_values['Odour'] = 'odor?' # stimulus name, maybe extracted from label in the function "custom_func" below
  57. default_values['OConc'] = 0 # odor concentration, maybe extracted from label in the function "custom_func" below
  58. default_values['Analyze'] = -1 # whether to analyze in VIEWoff. Default 1
  59. default_values['Cycle'] = 0 # how many ms per frame
  60. default_values['DBB1'] = 'none' # file name of raw data
  61. default_values['UTC'] = 0 # recording time, extracted from file
  62. default_values['PxSzX'] = '0.0' # um per pixel, 1.5625 for 50x air objective, measured by Hanna Schnell July 2017 on Till vision system, with a binning of 8
  63. default_values['PxSzY'] = '0.0' # um per pixel, 1.5625 for 50x air objective, measured by Hanna Schnell July 2017 on Till vision system, with a binning of 8
  64. default_values['Lambda'] = 0 # wavelength of stimulus. In TILL, from .log file, In Zeiss LSM, from .lsm file
  65. # These will be automatically filed for LE_loadExp=4
  66. default_values['dbb2'] = 'none' # file name of raw data in dual wavelength recordings (FURA)
  67. # To include more columns, uncomment entries below and specify a default value.
  68. # #
  69. # block for first stimulus
  70. # default_values['StimON'] = -1 # stimulus onset, unit: frames, count starts at frame 1.
  71. # default_values['StimOFF'] = -1 # stimulus offset, unit: frames, count starts at frame 1.
  72. # default_values['StimLen'] = 0 # stimulus onset in ms from beginning - alternative to StimON
  73. # default_values['StimONms'] = -1 # stimulus length in ms - alternative to StimOFF
  74. # #
  75. # block for second stimulus
  76. # default_values['Stim2ON'] = 0 # stimulus onset, unit: frames, count starts at frame 1.
  77. # default_values['Stim2OFF'] = 0 # stimulus offset, unit: frames, count starts at frame 1.
  78. # default_values['Stim2Len'] = 0 # stimulus onset in ms from beginning - alternative to StimON
  79. # default_values['Stim2ONms'] = -1 # stimulus length in ms - alternative to StimOFF
  80. # #
  81. # default_values['Age'] = -1
  82. # default_values['Sex'] = 'o'
  83. # default_values['Side'] = 'none'
  84. # default_values['Comment'] = 'none'
  85. # #
  86. # default_values['MTime'] = 0
  87. # default_values['Control'] = 0
  88. # default_values['Pharma'] = 'none'
  89. # default_values['PhTime'] = 0
  90. # default_values['PhConc'] = 0
  91. # default_values['ShiftX'] = 0
  92. # default_values['ShiftY'] = 0
  93. # default_values['StimISI'] = 0
  94. # default_values['setting'] = 'none'
  95. # default_values['dbb3'] = 'none'
  96. # default_values['PosZ'] = 0
  97. # default_values['Countl'] = 0
  98. # default_values['slvFlip'] = 0
  99. # ----------------------------------------------------------------------------------------------------------------------
  100. # ----------------- A function used to modify list entries after automatic parsing of metadata -------------------------
  101. # ----------------- This function indicates what needs to be done for a row --------------------------------------------
  102. # ----------------- The same is internally applied to all rows of the measurement list----------------------------------
  103. def get_odorinfo_from_label(label):
  104. # format for file name (label) is:
  105. # odor_concentration_anything_else.tif
  106. # separating element is underscore
  107. # is the information for a concentration present? Detect "-"
  108. parts = label.split("_")
  109. if len(parts) > 1:
  110. odor = parts[0]
  111. concentration = parts[1]
  112. # in the case the name is odor_conc.tif:
  113. if concentration[-4:] == '.tif':
  114. concentration = concentration[:-4]
  115. else:
  116. odor = 'odor?'
  117. concentration = 'conc?'
  118. return [odor, concentration]
  119. def custom_func(list_row: pd.Series, animal_tag: str) -> pd.Series:
  120. # Examples:
  121. # list_row["StimON"] = 25
  122. # list_row["Odour"] = get_odor_from_label(list_row["Label"])
  123. # if list_row["Measu"]
  124. # get Odor from another file based on the value of <animal_tag> and list_row["Label"]
  125. list_row["StimONms"] = '3000'
  126. list_row["StimLen"] = '2000'
  127. list_row["Comment"] = 'create_measurement_list_ratio'
  128. list_row["Line"] = 'ham'
  129. #extract odor and concentration from name
  130. (list_row["Odour"],list_row["OConc"]) = get_odorinfo_from_label(list_row["Label"])
  131. try:
  132. float(list_row["OConc"])
  133. except: #Odour concentration is not a number, set to fictive 0
  134. list_row["OConc"] = '0.0'
  135. if list_row["Label"][-4:] == '.tif':
  136. list_row["Label"] = list_row["Label"][:-4]
  137. return list_row
  138. # ----------------------------------------------------------------------------------------------------------------------
  139. # ------------------ A function defining the criteria for excluding measurements ---------------------------------------
  140. # ------------------ Currently applicable only for tillvision setups ---------------------------------------------------
  141. def measurement_filter(s):
  142. # exclude blocks that have in the name "Snapshot" or "Delta"
  143. # or that do not have any "_"
  144. name = s["Label"]
  145. label_not_okay = name.count('Snapshot') > 0 or name.count('Delta') > 0 or name.count('_') < 1
  146. label_okay = not label_not_okay
  147. # exclude blocks with less than two frames or no calibration
  148. atleast_two_frames = False
  149. if type(s["Timing_ms"]) is str:
  150. if len(s["Timing_ms"].split(' ')) >= 2 and s["Timing_ms"] != "(No calibration available)":
  151. atleast_two_frames = True
  152. return label_okay and atleast_two_frames
  153. # ______________________________________________________________________________________________________________________
  154. # ------------------ names of columns that will be overwritten by old values -------------------------------------------
  155. # ------ these will only be used if a measurement list file with the same name as current output file exists -----------
  156. overwrite_old_values = ["Line", "PxSzX", "PxSzY", "Age", "Sex", "Prefer",
  157. "Comment", "Analyze", "Odour", "OConc"]
  158. # ______________________________________________________________________________________________________________________
  159. if __name__ == "__main__":
  160. # initialize a FlagsManager object with values specified above
  161. flags = FlagsManager()
  162. flags.update_flags({"STG_MotherOfAllFolders": STG_MotherOfAllFolders,
  163. "STG_OdorInfoPath": STG_OdorInfoPath,
  164. "STG_Datapath": STG_Datapath})
  165. # initialize importer
  166. importer_class = get_importer_class(LE_loadExp)
  167. importer = importer_class(default_values)
  168. # open a dialog for choosing raw data files
  169. # this returns a dictionary where keys are animal tags (STG_ReportTag) and
  170. # values are lists of associated raw data files
  171. animal_tag_raw_data_mapping = importer.ask_for_files(default_dir=flags["STG_Datapath"])
  172. # make sure some files were chosen
  173. assert len(animal_tag_raw_data_mapping) > 0, IOError("No files were chosen!")
  174. for animal_tag, raw_data_files in animal_tag_raw_data_mapping.items():
  175. # automatically parse metadata
  176. metadata_df = importer.import_metadata(raw_data_files=raw_data_files,
  177. measurement_filter=measurement_filter)
  178. # inform user if no usable measurements were found
  179. if metadata_df.shape[0] == 0:
  180. logging.info(f"No usable measurements we found among the files "
  181. f"chosen for the animal {animal_tag}. Not creating a list file")
  182. else:
  183. # create a new Measurement list object from parsed metadata
  184. measurement_list = MeasurementList.create_from_df(LE_loadExp=LE_loadExp,
  185. df=metadata_df)
  186. # apply custom modifications
  187. measurement_list.update_from_custom_func(custom_func=custom_func, animal_tag=animal_tag)
  188. # set anaylze to 0 if raw data files don't exist
  189. flags.update_flags({"STG_ReportTag": animal_tag})
  190. measurement_list.sanitize(flags=flags,
  191. data_file_extensions=importer.movie_data_extensions)
  192. # sort by time as in column "UTC"
  193. #sorted_df = df.sort_values(by=['Column_name'], ascending=True)
  194. # does not work if the list file already existed.
  195. measurement_list.measurement_list_df = measurement_list.measurement_list_df.sort_values(by=['UTC'], ascending=True)
  196. # construct the name of the output file
  197. #AskAjay - what I am writing seems crude to me (Giovanni Dec 21)
  198. #Ajay: out_file = f"{flags.get_lst_file_stem()}{measurement_output_extension}"
  199. singlefilein = pl.Path(raw_data_files[0])
  200. #singlefilein could be:
  201. #'/Users/galizia/Nextcloud/VTK_2021/Bente_Test_2021/01_DATA/190815_h2_El/B_1.tif'
  202. #output should be:
  203. #'/Users/galizia/Nextcloud/VTK_2021/Bente_Test_2021/02_ANALYSIS/190815_h2_El.lst.xlsx'
  204. out_file = pl.Path(singlefilein.parent.parent.parent)
  205. out_file = pl.Path.joinpath(out_file, '02_LISTS' , singlefilein.parts[-2])
  206. out_file = f"{out_file}{measurement_output_extension}"
  207. # write measurement file to list
  208. measurement_list.write_to_list_file(lst_fle=out_file, columns2write=default_values.keys(),
  209. overwrite_old_values=overwrite_old_values)