importers.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. import pathlib as pl
  2. from tillvisionio.vws import VWSDataManager
  3. import pandas as pd
  4. import tifffile
  5. from view.python_core.misc import excel_datetime
  6. import typing
  7. import easygui
  8. import logging
  9. import pprint
  10. from abc import ABC, abstractmethod
  11. import xml.etree.ElementTree as ET
  12. import datetime
  13. def calculate_dt_from_timing_ms(timing_ms: str) -> float:
  14. times = timing_ms.strip()
  15. times = [float(x) for x in times.split(' ')]
  16. # calculate frame rate as time of (last frame - first frame) / (frames-1)
  17. dt = (times[-1] - times[0]) / (len(times) - 1)
  18. return dt
  19. # a function injecting code into the automatic parsing of metadata from vws.log file
  20. def additional_cols_func(s):
  21. # time & analyze
  22. try:
  23. dt = calculate_dt_from_timing_ms(s["Timing_ms"])
  24. analyze = 1 # since there are at least two frames, and thus a time, I suppose it is worth analyzing
  25. except Exception as e:
  26. dt = -1
  27. analyze = 0
  28. return {"dt": dt, "Analyze": analyze}
  29. class BaseImporter(ABC):
  30. def __init__(self, default_values: typing.Mapping):
  31. super().__init__()
  32. self.default_values = default_values
  33. self.associated_extensions = None
  34. self.associate_file_type = None
  35. self.LE_loadExp = None
  36. self.movie_data_extensions = None
  37. def get_default_row(self):
  38. return pd.Series(self.default_values)
  39. def import_metadata(self, raw_data_files, measurement_filter):
  40. combined_df = pd.DataFrame()
  41. for fle_ind, fle in enumerate(raw_data_files):
  42. logging.getLogger("VIEW").info(f"Parsing metadata from {fle}")
  43. df = self.parse_metadata(fle, fle_ind, measurement_filter)
  44. combined_df = combined_df.append(df, ignore_index=True)
  45. return combined_df
  46. def get_filetype_info_string(self):
  47. return [f"*{x}" for x in self.associated_extensions] + [self.associate_file_type]
  48. def ask_for_files(self, default_dir, multiple: bool = True) -> dict:
  49. default_dir_str = str(pl.Path(default_dir) / "*")
  50. files_chosen = easygui.fileopenbox(
  51. title=f"Choose one or more files for LE_loadExp={self.LE_loadExp}",
  52. filetypes=self.get_filetype_info_string(),
  53. multiple=multiple,
  54. default=default_dir_str)
  55. if files_chosen is None:
  56. raise IOError("User Abort while choosing files.")
  57. else:
  58. assert files_chosen[0].startswith(str(default_dir)), \
  59. f"The data selected in not in the expected data directory of the current tree:\n" \
  60. f"{default_dir}. Please copy your data there and try again!"
  61. animal_tag_raw_data_mapping = self.get_animal_tag_raw_data_mapping(files_chosen)
  62. logging.getLogger("VIEW").info(
  63. f"Working on the following animal tags and their corresponding files:\n"
  64. f"{pprint.pformat(animal_tag_raw_data_mapping)}")
  65. return animal_tag_raw_data_mapping
  66. @abstractmethod
  67. def parse_metadata(self, fle: str, fle_ind: int,
  68. measurement_filter: typing.Callable[[pd.Series], bool]) -> pd.DataFrame:
  69. """
  70. Reads and returns the metadata from a metadata file
  71. :param str fle: path of a metadata file
  72. :param int fle_ind: integer representing the row order of the measurement associated with <fle>,
  73. if it is part of a series
  74. :param Callable measurement_filter: only used for Till Vision setups. See tillvisionio.VWSDataManager.get_all_metadata
  75. :rtype: pd.DataFrame
  76. :return: the columns of the DataFrame returned must be a subset of the metadata columns defined in `view/flags_and_metadata_definitions/metadata_definition.csv`
  77. """
  78. pass
  79. @abstractmethod
  80. def get_animal_tag_raw_data_mapping(self, files_chosen: list) -> dict:
  81. """
  82. Parses the animal tag from raw data file names (<file_chosen>). Revises the raw data file names if necessary.
  83. Returns a one-element dictionary with the animal tag as key and list of (revised) raw data files as value.
  84. :param list files_chosen: list of raw data file names
  85. :rtype: dict
  86. """
  87. pass
  88. @abstractmethod
  89. def get_path_relative_to_data_dir(self, fle):
  90. """
  91. Creates a string representing the path of the raw data file <fle> relative to the data directory represented
  92. by the flag "STG_Datapath" (Eg.: "01_DATA")
  93. :param fle: path of the raw data file as parsed from the metadata file
  94. :rtype: str
  95. """
  96. pass
  97. class TillImporter(BaseImporter, ABC):
  98. def __init__(self, default_values: typing.Mapping):
  99. super().__init__(default_values)
  100. self.associate_file_type = "VWS Log Files"
  101. self.associated_extensions = [".vws.log"]
  102. self.movie_data_extensions = [".pst", ".ps"]
  103. def get_animal_tag_raw_data_mapping(self, files_chosen: list) -> dict:
  104. if len(files_chosen) == 0:
  105. return {}
  106. else:
  107. dict2return = {}
  108. for fle in files_chosen:
  109. fle_path = pl.Path(fle)
  110. dict2return[fle_path.name.split(".")[0]] = [fle]
  111. return dict2return
  112. def get_path_relative_to_data_dir(self, fle):
  113. for extension in self.movie_data_extensions:
  114. if fle.endswith(extension):
  115. fle_path = pl.PureWindowsPath(fle)
  116. possible_dbb1 = str(pl.Path(fle_path.parts[-2]) / fle_path.stem)
  117. return 1, str(possible_dbb1)
  118. else:
  119. return 0, "wrong extension"
  120. def convert_vws_names_to_lst_names(self, vws_measurement_series, default_row):
  121. """
  122. Convert values from vws.log nomenclaure to internal measurement list nomenclature
  123. :param vws_measurement_series: pandas.Series
  124. :param default_row: pandas.Series with default values
  125. :return: pandas.series
  126. """
  127. logging.getLogger("VIEW").info(f'Parsing measurement with label {vws_measurement_series["Label"]}')
  128. lst_line = default_row.copy()
  129. lst_line['Measu'] = vws_measurement_series['index'] + 1
  130. lst_line['Label'] = vws_measurement_series['Label']
  131. expected_data_file = vws_measurement_series["Location"]
  132. if expected_data_file[-2:] == 'ps':
  133. # there is one version of the macro in tillVision that "eats" the last t of the file name
  134. logging.getLogger("VIEW").warning('adding a t to the .ps file name to make it .pst')
  135. expected_data_file += 't'
  136. analyze, dbb1_relative = self.get_path_relative_to_data_dir(expected_data_file)
  137. if analyze == 0:
  138. logging.getLogger("VIEW").warning(
  139. f"Data file {expected_data_file} not found! Setting analyze=0 for this measurement")
  140. lst_line['DBB1'] = dbb1_relative
  141. lst_line["Analyze"] = analyze * int(lst_line.get("Analyze", 1))
  142. lst_line['Cycle'] = vws_measurement_series["dt"]
  143. lst_line['Lambda'] = vws_measurement_series['MonochromatorWL_nm']
  144. lst_line['UTC'] = vws_measurement_series['UTCTime']
  145. return pd.DataFrame(lst_line).T
  146. def get_mtime(self, utc, first_utc):
  147. time_since_first_utc = pd.to_timedelta(utc - first_utc, unit="s")
  148. return str(time_since_first_utc).split(" days ")[1]
  149. class TillImporterOneWavelength(TillImporter):
  150. def __init__(self, default_values: typing.Mapping):
  151. super().__init__(default_values)
  152. self.LE_loadExp = 3
  153. # for till data, metadata is contained in vws.log file
  154. def parse_metadata(self, fle: str, fle_ind: int,
  155. measurement_filter: typing.Callable[[pd.Series], bool]) -> pd.DataFrame:
  156. vws_manager = VWSDataManager(fle)
  157. measurements = vws_manager.get_all_metadata(filter=measurement_filter,
  158. additional_cols_func=additional_cols_func)
  159. first_utc = vws_manager.get_earliest_utc()
  160. this_lst_frame = pd.DataFrame()
  161. if len(measurements) == 0:
  162. logging.getLogger("VIEW").warning(
  163. f"In {fle}: No usable measurements found for given 'measurement_filter' function")
  164. for measurement_index, measurement_row in measurements.iterrows():
  165. lst_line = self.convert_vws_names_to_lst_names(vws_measurement_series=measurement_row,
  166. default_row=self.get_default_row(),
  167. )
  168. lst_line["MTime"] = self.get_mtime(utc=lst_line["UTC"][0], first_utc=first_utc)
  169. this_lst_frame = this_lst_frame.append(lst_line, ignore_index=True)
  170. return this_lst_frame
  171. class TillImporterTwoWavelength(TillImporter):
  172. def __init__(self, default_values: typing.Mapping):
  173. super().__init__(default_values)
  174. self.LE_loadExp = 4
  175. def parse_metadata(self, fle: str, fle_ind: int,
  176. measurement_filter: typing.Callable[[pd.Series], bool]) -> pd.DataFrame:
  177. vws_manager = VWSDataManager(fle)
  178. measurements_wl340_df, measurements_wl380_df \
  179. = vws_manager.get_metadata_two_wavelengths(wavelengths=(340, 380), filter=measurement_filter,
  180. additional_cols_func=additional_cols_func)
  181. first_utc = vws_manager.get_earliest_utc()
  182. this_lst_frame = pd.DataFrame()
  183. for (ind1, measurement_wl340), (ind2, measurement_wl380) in zip(measurements_wl340_df.iterrows(),
  184. measurements_wl380_df.iterrows()):
  185. lst_line_wl340 = self.convert_vws_names_to_lst_names(measurement_wl340, self.get_default_row())
  186. lst_line_wl380 = self.convert_vws_names_to_lst_names(measurement_wl380, self.get_default_row())
  187. lst_line_wl340["dbb2"] = lst_line_wl380["DBB1"]
  188. lst_line_wl340["MTime"] = self.get_mtime(utc=lst_line_wl340["UTC"][0], first_utc=first_utc)
  189. lst_line_wl380["Analyze"] = 0
  190. lst_line_wl380["MTime"] = self.get_mtime(utc=lst_line_wl380["UTC"][0], first_utc=first_utc)
  191. this_lst_frame = this_lst_frame.append(lst_line_wl340, ignore_index=True)
  192. this_lst_frame = this_lst_frame.append(lst_line_wl380, ignore_index=True)
  193. return this_lst_frame
  194. class LSMImporter(BaseImporter):
  195. def __init__(self, default_values: typing.Mapping):
  196. super().__init__(default_values)
  197. self.associate_file_type = "Zeiss LSM files" # short text describing raw data files
  198. self.associated_extensions = [".lsm"] # possible extensions of files containing metadata
  199. self.movie_data_extensions = [".lsm"] # possible extension of file containing data (calcium imaging movies)
  200. self.LE_loadExp = 20 # associated value of the flag LE_loadExp
  201. def get_path_relative_to_data_dir(self, fle):
  202. for movie_data_extension in self.movie_data_extensions:
  203. if fle.endswith(movie_data_extension):
  204. fle_path = pl.PureWindowsPath(fle)
  205. return 1, str(pl.Path(fle_path.parts[-3]) / fle_path.parts[-2] / fle_path.stem)
  206. else:
  207. return 0, -1
  208. def convert_lsm_metadata_to_lst_row(self, measu, fle, lsm_metadata, default_row):
  209. """
  210. Convert values from lsm_metadata to .lst nomenclature
  211. :param lsm_metadata: dict, like the one returned by tifffile.TiffFile.lsm_metadata
  212. :param default_row: pandas.Series, with default values
  213. :return: pandas.Series
  214. """
  215. lst_line = default_row.copy()
  216. lst_line["Label"] = lsm_metadata["ScanInformation"]["Name"]
  217. # converting from seconds to milliseconds
  218. lst_line["Cycle"] = lsm_metadata["TimeIntervall"] * 1000
  219. lst_line["Lambda"] = lsm_metadata["ScanInformation"]["Tracks"][0]["IlluminationChannels"][0]["Wavelength"]
  220. lst_line['UTC'] = excel_datetime(lsm_metadata["ScanInformation"]["Sample0time"]).timestamp()
  221. # convert from meters to micrometers
  222. lst_line["PxSzX"] = lsm_metadata["VoxelSizeX"] / 1e-6
  223. lst_line["PxSzY"] = lsm_metadata["VoxelSizeY"] / 1e-6
  224. analyze, dbb1_relative = self.get_path_relative_to_data_dir(fle)
  225. lst_line["DBB1"] = dbb1_relative
  226. lst_line["Analyze"] = analyze
  227. lst_line["Measu"] = measu
  228. return pd.DataFrame(lst_line).T
  229. # for till data, a single raw data file is a .lsm file
  230. def parse_metadata(self, fle: str, fle_ind: int,
  231. measurement_filter: typing.Callable[[pd.Series], bool] = True) -> pd.DataFrame:
  232. lsm_metadata = tifffile.TiffFile(fle).lsm_metadata
  233. lst_row = self.convert_lsm_metadata_to_lst_row(measu=fle_ind + 1,
  234. fle=fle,
  235. lsm_metadata=lsm_metadata,
  236. default_row=self.get_default_row())
  237. return lst_row
  238. class P1DualWavelengthTIFSingleFileImporter(BaseImporter):
  239. #added Dec2021, to import single tiff file with dual wavelength as used in Trondheim
  240. # init in view uses read_single_file_fura_tif(filename)
  241. def __init__(self, default_values: typing.Mapping):
  242. super().__init__(default_values)
  243. self.associate_file_type = "Dual Wavelength Tif files" # short text describing raw data files
  244. self.associated_extensions = [".tif"] # possible extensions of files containing metadata
  245. self.movie_data_extensions = [".tif"] # possible extension of file containing data (calcium imaging movies)
  246. self.LE_loadExp = 35 # associated value of the flag LE_loadExp
  247. def get_path_relative_to_data_dir(self, fle):
  248. for movie_data_extension in self.movie_data_extensions:
  249. if fle.endswith(movie_data_extension):
  250. fle_path = pl.PureWindowsPath(fle)
  251. return 1, str(pl.Path(fle_path.parts[-3]) / fle_path.parts[-2] / fle_path.stem)
  252. else:
  253. return 0, -1
  254. def convert_metadata_to_lst_row(self, measu, fle, meta_info, default_row):
  255. """
  256. Convert values from meta_info to .lst nomenclature
  257. :param meta_info['PsSzX']: dict, like the one returned by tifffile.TiffFile.lsm_metadata
  258. :param default_row: pandas.Series, with default values
  259. :return: pandas.Series
  260. """
  261. lst_line = default_row.copy()
  262. lst_line["Label"] = meta_info['Label']
  263. # converting from seconds to milliseconds
  264. lst_line["Cycle"] = meta_info['GDMfreq']
  265. lst_line["Lambda"] = "340/380" # meta_info['PsSzX']
  266. lst_line['UTC'] = meta_info['UTCTime']
  267. # convert from meters to micrometers
  268. lst_line["PxSzX"] = meta_info['PsSzX']
  269. lst_line["PxSzY"] = meta_info['PsSzY']
  270. analyze, dbb1_relative = self.get_path_relative_to_data_dir(fle)
  271. lst_line["DBB1"] = meta_info['dbb']
  272. lst_line["dbb2"] = meta_info['dbb']
  273. lst_line["Analyze"] = analyze
  274. lst_line["Measu"] = measu
  275. #additional info
  276. lst_line["ExposureTime_ms"] = meta_info['ExposureTime_ms']
  277. lst_line["AcquisitionDate"] = meta_info['AcquisitionDate']
  278. lst_line["Binning"] = meta_info['Binning']
  279. lst_line["StartTime"] = meta_info['StartTime']
  280. return pd.DataFrame(lst_line).T
  281. # for till data, a single raw data file
  282. def parse_metadata(self, fle: str, fle_ind: int,
  283. measurement_filter: typing.Callable[[pd.Series], bool] = True) -> pd.DataFrame:
  284. # load metadata
  285. tif_file=pl.Path(fle)
  286. with tifffile.TiffFile(tif_file) as tif:
  287. metadata = tif.imagej_metadata
  288. # imagej_metadata does not work any more or never worked on stack - read metadata from first frame
  289. if metadata is None:
  290. metadata = tif.pages[0].description
  291. # extract XML tree from metadata into root
  292. root = ET.fromstring(metadata)
  293. # define namespace for OME data
  294. # this uses xTree OME syntax
  295. # https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
  296. ns = {
  297. "d": "http://www.openmicroscopy.org/Schemas/OME/2013-06"
  298. }
  299. # now get all infos that we put into settings file
  300. meta_info = root.find("./d:Image/d:Pixels", ns).attrib
  301. # result is a dictionary, for example:
  302. # {'ID': 'Pixels:1-0',
  303. # 'DimensionOrder': 'XYTZC',
  304. # 'Type': 'uint16',
  305. # 'SizeX': '1392',
  306. # 'SizeY': '1040',
  307. # 'SizeZ': '1',
  308. # 'SizeC': '1',
  309. # 'SizeT': '160',
  310. # 'PhysicalSizeX': '6.45',
  311. # 'PhysicalSizeY': '6.45',
  312. # 'PhysicalSizeZ': '1000',
  313. # 'SignificantBits': '14'}
  314. # acquisition date as string, e.g. '2021-09-19T16:49:28'
  315. AcquisitionDate = root.find("./d:Image/d:AcquisitionDate", ns).text
  316. meta_info.update({'AcquisitionDate':AcquisitionDate})
  317. # binning info, e.g. '1x1'
  318. Binning = root.find("./d:Image/d:Pixels/d:Channel/d:DetectorSettings", ns).attrib["Binning"]
  319. meta_info.update({'Binning':Binning})
  320. # frame interval. Since this is dual wavelength, take distance of frame3 and frame5
  321. # relative time of secoond image (first image looks unsafe - often it is blanck. Therefore use frames 2 and 3)
  322. time_frame1 = root.find("./d:Image/d:Pixels/d:Plane[3]", ns).attrib["DeltaT"]
  323. # relative time of third image
  324. time_frame2 = root.find("./d:Image/d:Pixels/d:Plane[5]", ns).attrib["DeltaT"]
  325. GDMfreq = (float(time_frame2) - float(time_frame1))
  326. GDMfreq = round(GDMfreq*1000) # unit is ms, rounded
  327. meta_info.update({'GDMfreq':str(GDMfreq)})
  328. # this format is for two-wavelength recording,
  329. # so I take exposure time for frame 3 and 4
  330. # just in case the very first one would be strange
  331. ExposureTime_ms = float(root.find("./d:Image/d:Pixels/d:Plane[3]", ns).attrib["ExposureTime"])
  332. ExposureTime_ms_340 = int(1000*ExposureTime_ms) # value in Andor is in seconds
  333. ExposureTime_ms = float(root.find("./d:Image/d:Pixels/d:Plane[4]", ns).attrib["ExposureTime"])
  334. ExposureTime_ms_380 = int(1000*ExposureTime_ms) # value in Andor is in seconds
  335. ExposureTimeStr = str(ExposureTime_ms_340)+'/'+str(ExposureTime_ms_380)
  336. meta_info.update({'ExposureTime_ms':ExposureTimeStr})
  337. # columns in .settings that need to be filled here:
  338. # get the tif file, including the last directory
  339. this_filename = tif_file.parts
  340. dbb = this_filename[-2] +'/'+ this_filename[-1]
  341. meta_info.update({'dbb':dbb})
  342. meta_info.update({'Label':this_filename[-1]})
  343. # PxSzX
  344. # replace the Andor name "PhysicalSizeX' with the Galizia name PsSzX
  345. meta_info['PsSzX'] = meta_info.pop('PhysicalSizeX')
  346. meta_info['PsSzY'] = meta_info.pop('PhysicalSizeY')
  347. # PxSzY, e.g. 1.5625
  348. # When was this measurement taken?
  349. # first get the time when the measurement was started
  350. measurementtime = datetime.datetime.fromisoformat(AcquisitionDate)
  351. # now add the time of the first frame, since measurement start time ie equal for all measurements in one loop
  352. measurementtime_delta = datetime.timedelta(seconds=float(time_frame1))
  353. measurementtime = measurementtime + measurementtime_delta
  354. # StartTime, e.g. 10:05:04
  355. StartTime = measurementtime.strftime('%H:%M:%S')
  356. meta_info.update({'StartTime':StartTime})
  357. # UTC, e.g. 1623229504.482
  358. UTC = measurementtime.timestamp()
  359. meta_info.update({'UTCTime':UTC})
  360. ##example for meta_info now:
  361. # {'ID': 'Pixels:1-0',
  362. # 'DimensionOrder': 'XYCTZ',
  363. # 'Type': 'uint16',
  364. # 'SizeX': '336',
  365. # 'SizeY': '256',
  366. # 'SizeZ': '1',
  367. # 'SizeC': '2',
  368. # 'SizeT': '100',
  369. # 'PhysicalSizeZ': '1000',
  370. # 'SignificantBits': '16',
  371. # 'AcquisitionDate': '2019-08-14T14:44:29',
  372. # 'Binning': '4x4',
  373. # 'GDMfreq': '34',
  374. # 'ExposureTime_ms': '13',
  375. # 'dbb': '190815_h2_El/A_3.tif',
  376. # 'Label': 'A_3.tif',
  377. # 'PsSzX': '1.3',
  378. # 'PsSzY': '1.3',
  379. # 'StartTime': '14:44:29',
  380. # 'UTCTime': 1565786669.06601}
  381. lst_row = self.convert_metadata_to_lst_row(measu=fle_ind + 1,
  382. fle=fle,
  383. meta_info=meta_info,
  384. default_row=self.get_default_row())
  385. return lst_row
  386. def get_animal_tag_raw_data_mapping(self, files_chosen: list) -> dict:
  387. if len(files_chosen) == 0:
  388. return {}
  389. else:
  390. parents = [pl.Path(fle).parent for fle in files_chosen]
  391. assert all(x == parents[0] for x in parents), f"Tif files specified for constructing measurement " \
  392. f"list file do no belong to the same directory: " \
  393. f"{files_chosen}"
  394. return {parents[0].parent.name: files_chosen}
  395. def get_importer_class(LE_loadExp):
  396. if LE_loadExp == 3:
  397. return TillImporterOneWavelength
  398. elif LE_loadExp == 4:
  399. return TillImporterTwoWavelength
  400. elif LE_loadExp == 20:
  401. return LSMImporter
  402. elif LE_loadExp == 35:
  403. return P1DualWavelengthTIFSingleFileImporter
  404. else:
  405. raise NotImplementedError
  406. def get_setup_extension(LE_loadExp):
  407. """
  408. returns the file extension of raw data file of the setup specified by <LE_loadExp>
  409. :param int LE_loadExp: value of the flag of the same name
  410. :rtype: list
  411. """
  412. importer_class = get_importer_class(LE_loadExp)
  413. return importer_class({}).movie_data_extension