io.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. import pandas as pd
  2. import tifffile
  3. import numpy as np
  4. import yaml
  5. import pathlib as pl
  6. import datetime as dt
  7. import xml.etree.ElementTree as ET
  8. import logging
  9. import os
  10. import datetime
  11. from readlif.reader import LifFile
  12. class LIFReaderGio(LifFile):
  13. def __init__(self, lif_file: str):
  14. super().__init__(lif_file)
  15. def load_all_metadata(self):
  16. """
  17. Load all metadata in the initialized LIF file
  18. only including sets with more than one frame (i.e. exclude snapshots)
  19. :return: pd.DataFrame with each row containing metadata of one measurement
  20. """
  21. root = ET.fromstring(self.xml_header) # this is the full metadata as XML
  22. all_metadata_df = pd.DataFrame()
  23. # iterate all measurements
  24. for fle_ind, measurement in enumerate(self.get_iter_image()):
  25. this_measurement = self.get_image(fle_ind)
  26. lif_metadata = pd.Series()
  27. lif_metadata["Label"] = this_measurement.name
  28. # lif_metadata["Measu"] = fle_ind
  29. if this_measurement.dims.t > 1:
  30. #information about time between frames if more than one frame
  31. # converting from seconds to milliseconds
  32. cycle = float(
  33. this_measurement.info["settings"]["FrameTime"]) # milliseconds per frame, Leica gives microseconds
  34. lif_metadata["Cycle"] = 1000 * cycle
  35. lif_metadata['SampFreq'] = this_measurement.info["scale"][3] # frames per second?
  36. else:
  37. lif_metadata["Cycle"] = -1
  38. lif_metadata['SampFreq'] = -1 # frames per second?
  39. lif_metadata["Lambda"] = 0 # TODO
  40. # convert from meters to micrometers
  41. lif_metadata["PxSzX"] = this_measurement.info["scale"][0]
  42. lif_metadata["PxSzY"] = this_measurement.info["scale"][1] # y-size
  43. lif_metadata['FrameSizeX'] = this_measurement.dims.x # pixel number in x
  44. lif_metadata['FrameSizeY'] = this_measurement.dims.y # pixel number in y
  45. lif_metadata['NumFrames'] = this_measurement.dims.t # pixel number in t
  46. lif_metadata['Comment'] = "Leica .lif file"
  47. # extract measurement time - which is only in the XML of the full LIF file, and not in this_measurement
  48. # see /pyview/view/python_core/measurement_list/importers.py
  49. # i.e. if changes are necessary here, do them also there
  50. # time stamps are not correct - I do not know why yet (15.6.2022)
  51. # that is: there are less time stamps in the XML file than measurements in the .lif file
  52. # therefore, I cannot attribute the right time to each measurements
  53. print('Now using UTC from first frame in measu: ', fle_ind)
  54. # timestamp of first frame in measurement measu!
  55. time = root.findall(".//TimeStampList")[fle_ind].text[:15]
  56. timeStamp = int(time, 16)
  57. # windows uses 1. Januar<y 1601 as reference
  58. # https://gist.github.com/Mostafa-Hamdy-Elgiar/9714475f1b3bc224ea063af81566d873
  59. EPOCH_AS_FILETIME = 116444736000000000 # January 1, 1970 as MS file time
  60. HUNDREDS_OF_NANOSECONDS = 10000000
  61. measurementtime = datetime.datetime.utcfromtimestamp((timeStamp - EPOCH_AS_FILETIME) / HUNDREDS_OF_NANOSECONDS)
  62. print('Lif-File time in importers.py: ', measurementtime) # for debugging
  63. # UTC, e.g. 1623229504.482
  64. UTC = measurementtime.timestamp()
  65. # meta_info.update({'UTCTime':UTC})
  66. lif_metadata['UTC'] = UTC
  67. # MTime is the time passed with respect to the very first measurement in this animal
  68. time = root.findall(".//TimeStampList")[0].text[:15]
  69. timeStamp = int(time, 16)
  70. measurementtime_first = datetime.datetime.utcfromtimestamp(
  71. (timeStamp - EPOCH_AS_FILETIME) / HUNDREDS_OF_NANOSECONDS)
  72. MTime = measurementtime - measurementtime_first
  73. # format this timedelta
  74. minutes, seconds = divmod(MTime.seconds + MTime.days * 86400, 60)
  75. hours, minutes = divmod(minutes, 60)
  76. lif_metadata['MTime'] = '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
  77. all_metadata_df = all_metadata_df.append(pd.DataFrame(lif_metadata).T, ignore_index=True)
  78. return all_metadata_df
  79. def load_data(self, measu):
  80. this_measurement = self.get_image(measu)
  81. dims = this_measurement.dims
  82. # dimensions are x, y, z, t, m. We are interested in x, y, t
  83. img_data = np.zeros((dims.x, dims.y, dims.t), dtype=np.float)
  84. frame_list = [i for i in this_measurement.get_iter_t(c=0, z=0)]
  85. for count, frame in enumerate(frame_list):
  86. img_data[:, :, count] = np.asarray(frame)
  87. return img_data
  88. # end LIFReaderGio
  89. def read_lif(lif_file, measu):
  90. """
  91. Read a measurement from a Leica lif file into numpy array.
  92. implemented May 2022, tested with data from Marco Paoli, Toulouse
  93. :param str lif_file: path of lif file
  94. :param int measu: which measurement in the lif file to load
  95. :return: numpy.ndarray in XYT format
  96. """
  97. lif_reader_wrapper = LIFReaderGio(lif_file)
  98. return lif_reader_wrapper.load_data(measu)
  99. # end read_lif
  100. def read_tif_2Dor3D(tif_file, flip_y=True, return_3D=False, load_data=True):
  101. """
  102. Read a TIF file into numpy array. TIF file axes are assumed to be TYX or YX. Also works for OME Tiff files,
  103. e.g. Live Acquisition, or FIJI
  104. :param str tif_file: path of tif file
  105. :param bool flip_y: whether to flip Y axis
  106. :param bool return_3D: whether to convert 2D to 3D if required
  107. :param bool load_data: if True loads data and returns else first return value is None
  108. :return: data, metadata
  109. data: numpy.ndarray in XY or XYT format
  110. metadata: dictionary if present, else None
  111. # if tif_file is str, convert it to path
  112. Read a TIF file into numpy array. TIF file axes are assumed to be TYX or YX. Also works for OME Tiff files,
  113. e.g. Live Acquisition, or FIJI
  114. :param str tif_file: path of tif file
  115. :param bool flip_y: whether to flip Y axis
  116. :param bool return_3D: whether to convert 2D to 3D if required
  117. :param bool load_data: if True loads data and returns else first return value is None
  118. :return: data, metadata
  119. data: numpy.ndarray in XY or XYT format
  120. metadata: dictionary if present, else None
  121. """
  122. # if tif_file is str, convert it to path
  123. # a476b63c975103c2cd6357311bbcd521129766f9
  124. if type(tif_file) == str:
  125. tif_file = pl.Path(tif_file)
  126. # load metadata
  127. # tif_file=animal_list[10]
  128. with tifffile.TiffFile(tif_file) as tif:
  129. metadata = tif.imagej_metadata
  130. # imagej_metadata does not work any more or never worked on stack - read metadata from first frame
  131. if metadata is None:
  132. metadata = tif.pages[0].description
  133. # extract XML tree from metadata into root
  134. try:
  135. root = ET.fromstring(metadata)
  136. metadata_present = True
  137. # define namespace for OME data
  138. # this uses xTree OME syntax
  139. # https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
  140. ns = {
  141. "d": "http://www.openmicroscopy.org/Schemas/OME/2013-06"
  142. }
  143. # now get all infos that we put into settings file
  144. meta_info = root.find("./d:Image/d:Pixels", ns).attrib
  145. # result is a dictionary, for example:
  146. # {'ID': 'Pixels:1-0',
  147. # 'DimensionOrder': 'XYTZC',
  148. # 'Type': 'uint16',
  149. # 'SizeX': '1392',
  150. # 'SizeY': '1040',
  151. # 'SizeZ': '1',
  152. # 'SizeC': '1',
  153. # 'SizeT': '160',
  154. # 'PhysicalSizeX': '6.45',
  155. # 'PhysicalSizeY': '6.45',
  156. # 'PhysicalSizeZ': '1000',
  157. # 'SignificantBits': '14'}
  158. # acquisition date as string, e.g. '2021-09-19T16:49:28'
  159. AcquisitionDate = root.find("./d:Image/d:AcquisitionDate", ns).text
  160. meta_info.update({'AcquisitionDate':AcquisitionDate})
  161. # binning info, e.g. '1x1'
  162. Binning = root.find("./d:Image/d:Pixels/d:Channel/d:DetectorSettings", ns).attrib["Binning"]
  163. meta_info.update({'Binning':Binning})
  164. # frame interval
  165. # relative time of secoond image (first image looks unsafe - often it is blanck. Therefore use frames 2 and 3)
  166. time_frame1 = root.find("./d:Image/d:Pixels/d:Plane[2]", ns).attrib["DeltaT"]
  167. # relative time of third image
  168. time_frame2 = root.find("./d:Image/d:Pixels/d:Plane[3]", ns).attrib["DeltaT"]
  169. GDMfreq = (float(time_frame2) - float(time_frame1))
  170. GDMfreq = int(GDMfreq*1000 + 0.5) # unit is ms, rounded
  171. meta_info.update({'GDMfreq':str(GDMfreq)})
  172. # exposure time for frame 2 - expecting that to be uniform
  173. ExposureTime_ms = float(root.find("./d:Image/d:Pixels/d:Plane[2]", ns).attrib["ExposureTime"])
  174. ExposureTime_ms = int(1000*ExposureTime_ms) # value in Andor is in seconds
  175. meta_info.update({'ExposureTime_ms':str(ExposureTime_ms)})
  176. # columns in .settings that need to be filled here:
  177. # get the tif file, including the last directory
  178. this_filename = tif_file.parts
  179. dbb = this_filename[-2] +'/'+ this_filename[-1]
  180. meta_info.update({'dbb':dbb})
  181. meta_info.update({'Label':this_filename[-1]})
  182. # PxSzX
  183. # replace the Andor name "PhysicalSizeX' with the Galizia name PsSzX
  184. meta_info['PsSzX'] = meta_info.pop('PhysicalSizeX')
  185. meta_info['PsSzY'] = meta_info.pop('PhysicalSizeY')
  186. # PxSzY, e.g. 1.5625
  187. # When was this measurement taken?
  188. # first get the time when the measurement was started
  189. measurementtime = dt.datetime.fromisoformat(AcquisitionDate)
  190. # now add the time of the first frame, since measurement start time ie equal for all measurements in one loop
  191. measurementtime_delta = dt.timedelta(seconds=float(time_frame1))
  192. measurementtime = measurementtime + measurementtime_delta
  193. # StartTime, e.g. 10:05:04
  194. StartTime = measurementtime.strftime('%H:%M:%S')
  195. meta_info.update({'StartTime':StartTime})
  196. # UTC, e.g. 1623229504.482
  197. UTC = measurementtime.timestamp()
  198. meta_info.update({'UTCTime':UTC})
  199. except:
  200. metadata_present = False
  201. meta_info = None
  202. # load data
  203. if load_data:
  204. with tifffile.TiffFile(tif_file) as tif:
  205. imagej_hyperstack = tif.asarray()
  206. if len(imagej_hyperstack.shape) == 3: # 3D data in TYX format
  207. if flip_y:
  208. imagej_hyperstack = np.flip(imagej_hyperstack, axis=1)
  209. imagej_hyperstack = imagej_hyperstack.swapaxes(0, 2) # return in XYT format
  210. # read 2D tif data
  211. elif len(imagej_hyperstack.shape) == 2: # 2D data in YX format
  212. if flip_y:
  213. imagej_hyperstack = np.flip(imagej_hyperstack, axis=0)
  214. imagej_hyperstack = imagej_hyperstack.swapaxes(0, 1) # YX to XY format
  215. if return_3D:
  216. imagej_hyperstack = np.stack([imagej_hyperstack], axis=2)
  217. else: # i.e., if load_data is false
  218. imagej_hyperstack = None
  219. return imagej_hyperstack, meta_info
  220. # end read_ometif_metadict
  221. def read_single_file_fura_tif(tif_file):
  222. """
  223. Read FURA data from <tif_file>. Assume input file has the format TWYX, where W is wavelength and
  224. this dimension has size 2.
  225. :param str tif_file: absolute path of the file on file system
  226. :rtype: data_340, data_380
  227. data_340: 340nm data as an numpy.ndarray, format XYT
  228. data_380: 380nm data as an numpy.ndarray, format XYT
  229. """
  230. data_in = tifffile.imread(tif_file)
  231. data_in = np.flip(data_in, axis=1) # format TWYX
  232. # split data, each will have format TYX
  233. data_340 = data_in[:, 1, :, :]
  234. data_380 = data_in[:, 0, :, :]
  235. return data_340.swapaxes(0, 2), data_380.swapaxes(0, 2) # return in format XYT
  236. def write_tif_2Dor3D(array_xy_or_xyt, tif_file, dtype=None, scale_data=False, labels=None):
  237. """
  238. Write a 2D or a 3D numpy array to a TIFF file with data type format <dtype>. If <dtype> is None, data is written
  239. in its own data type. Else, the function will try to safely cast data in <array_xy_or_xyt> to <dtype>.
  240. If it is not possible and <scale_data> is True, data is scaled to fit the dynamic range of <dtype> and
  241. written to disc. Else an error is raised
  242. :param numpy.ndarray array_xy_or_xyt: array to be written
  243. :param str tif_file: name of file to which data is to be written
  244. :param dtype: data type format to use. Must be a valid numerical numpy dtype
  245. (https://numpy.org/doc/stable/reference/arrays.scalars.html)
  246. :param str|Sequence labels: a str or 1 member sequence when <array_xy_or_xyt> is 2D, else a sequence
  247. with the same size as the last (3rd) dimension of <array_xy_or_xyt>
  248. """
  249. if dtype is None:
  250. array_cast = array_xy_or_xyt
  251. else:
  252. if issubclass(dtype, np.integer):
  253. info = np.iinfo(dtype)
  254. elif issubclass(dtype, np.flexible):
  255. info = np.finfo(dtype)
  256. else:
  257. raise ValueError(
  258. "Invalid dtype. Please specify a valid numerical numpy dtype "
  259. "(https://numpy.org/doc/stable/reference/arrays.scalars.html)")
  260. if np.can_cast(array_xy_or_xyt, dtype):
  261. array_cast = array_xy_or_xyt.astype(dtype)
  262. elif scale_data:
  263. array_min, array_max = array_xy_or_xyt.min(), array_xy_or_xyt.max()
  264. array_xy_or_xyt_0_1 = (array_xy_or_xyt - array_min) / (array_max - array_min)
  265. array_scaled = info.min + array_xy_or_xyt_0_1 * (info.max - info.min)
  266. array_cast = array_scaled.astype(dtype)
  267. else:
  268. raise ValueError(
  269. f"The values in the specified array could not be safely cast into the specified dtype ({dtype})."
  270. f"If you want the values in the specified array to be scaled into the dynamic range of {dtype}, "
  271. f"set the argument <scale_data> to True")
  272. # flip Y axis
  273. array_cast = np.flip(array_cast, axis=1)
  274. if type(labels) is str:
  275. labels = [labels]
  276. if len(array_cast.shape) == 2:
  277. array_to_write = array_cast.swapaxes(0, 1) # from XY to YX
  278. if labels is not None:
  279. assert len(labels) == 1, \
  280. f"Expected one label to write along with a one page TIF. Got ({len(labels)})"
  281. elif len(array_cast.shape) == 3:
  282. array_to_write = array_cast.swapaxes(0, 2) # from XYT to TYX
  283. if labels is not None:
  284. assert len(labels) == array_cast.shape[2], \
  285. f"Expected {array_cast.shape[2]} labels two write along with array with shape {array_cast.shape}. " \
  286. f"Got {len(labels)}"
  287. else:
  288. raise ValueError("This function can only write 2D or 3D arrays")
  289. kwargs = {"description": None}
  290. if labels is not None:
  291. kwargs["description"] = "Labels=" + ";;".join(labels)
  292. tifffile.imwrite(tif_file, data=array_to_write, **kwargs)
  293. def read_check_yml_file(yml_filename, expected_type=None):
  294. """
  295. Reads flags from <filename>, applies some checks and returns them
  296. :param yml_filename: str, path of a .yml file
  297. :param expected_type: any, if specified, as assertion error is raised if contents
  298. of the yml file is not of the specfied type
  299. :return: any, depending of contents of the yml file
  300. """
  301. with open(yml_filename, 'r') as fle:
  302. yml_contents = yaml.load(fle, yaml.SafeLoader)
  303. if expected_type is not None:
  304. assert type(yml_contents) is expected_type, f"YML file {yml_filename} was expected to contain " \
  305. f"{expected_type} data," \
  306. f"found, {type(yml_contents)} instead"
  307. return yml_contents
  308. def write_yml(yml_filename, to_write):
  309. with open(yml_filename, 'w') as fle:
  310. yaml.dump(to_write, fle, Dumper=yaml.SafeDumper)
  311. def read_lsm(path):
  312. """ takes a path to a lsm file, reads the file with the tifffile lib and
  313. returns a np array
  314. """
  315. data_cut = tifffile.imread(path)
  316. data_cut_rot = np.swapaxes(data_cut, 0, 2)
  317. data_cut_rot_flip = np.flip(data_cut_rot, axis=1)
  318. return data_cut_rot_flip
  319. def load_pst(filename):
  320. """
  321. read tillvision based .pst files as uint16.
  322. """
  323. # filename can have an extension (e.g. .pst), or not
  324. # reading stack size from inf
  325. # inf_path = os.path.splitext(filename)[0] + '.inf'
  326. # this does not work for /data/030725bR.pst\\dbb10F, remove extension by hand,
  327. # assuming it is exactly 3 elements
  328. if filename.endswith(".pst") or filename.endswith(".ps"):
  329. filepath = pl.Path(filename)
  330. else:
  331. filepath = pl.Path(f"{filename}.pst")
  332. if not filepath.is_file():
  333. filepath = filepath.with_suffix(".ps")
  334. assert filepath.is_file(), \
  335. f"Could not find either of the following raw data files:\n{filename}.pst\n{filename}.ps"
  336. meta = {}
  337. with open(filepath.with_suffix(".inf"), 'r') as fh:
  338. # fh.next()
  339. for line in fh.readlines():
  340. try:
  341. k, v = line.strip().split('=')
  342. meta[k] = v
  343. except:
  344. pass
  345. # reading stack from pst
  346. shape = np.int32((meta['Width'], meta['Height'], meta['Frames']))
  347. expected_units = np.prod(shape)
  348. assert filepath.stat().st_size >= 2 * expected_units, \
  349. f"Expected at least {2 * expected_units} bytes in {filepath}. Found {filepath.stat().st_size}"
  350. raw = np.fromfile(filepath, dtype='int16', count=expected_units)
  351. data = np.reshape(raw, shape, order='F')
  352. # was swapping x, y axes; commented out to retain original order
  353. # data = data.swapaxes(0,1)
  354. # data is upside down as compared to what we see in TillVision
  355. data = np.flip(data, axis=1)
  356. data = data.astype('uint16')
  357. return data