__init__.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. '''
  2. (explanation as of Sept. 4th, 2019. Code by Ajay)
  3. p1 structure contains all parameters of an experiment that are
  4. part of this particular measurement
  5. e.g. image size, frame rate, stimulus times, etc.
  6. this information comes from the imaging equipment (e.g. frame rate in Till .log file)
  7. or from other equipment (e.g. odorant name in PAL bar-code reader)
  8. or from the user (e.g. stimulus time or the like).
  9. All this information has been collected prior to analysis in a file,
  10. where each line/row is one measurement.
  11. These are the .lst OR .settings files (.xls, or .csv, or tab-delimited...)
  12. There is one .lst file for every animal.
  13. For historical reasons, the same variable may have a different name in .settings,
  14. .lst or p1.
  15. the full list of allowed variables and their names is in metadata_definition.csv
  16. e.g. ... Code/git_repos/VIEW/view/flags_and_metadata_definitions/metadata_definition.csv
  17. The current format of p1 is implemented as a class in python_core/measurement_list/__init__.py:
  18. MeasurementList.
  19. In particular, this is used to extract p1 values for a particular measurement.
  20. stimuli.py contains the tools to adjust p1 values related to stimulus timing.
  21. '''
  22. import re
  23. from view.python_core.get_internal_files import get_metadata_definition
  24. from view.python_core.paths import get_existing_raw_data_filename, convert_to_path_for_current_os
  25. from view.python_core.p1_class.metadata_related import parse_p1_metadata_from_measurement_list_row
  26. from ..p1_class import get_p1
  27. import pandas as pd
  28. import numpy as np
  29. import pathlib as pl
  30. from .io import get_ext_based_values
  31. from .importers import get_importer_class
  32. from view.python_core.old_file_handler import get_old_file_handler
  33. import typing
  34. import logging
  35. import copy
  36. import pprint
  37. class MeasurementList(object):
  38. def __init__(self, LE_loadExp):
  39. self._metadata_def_df = get_metadata_definition()
  40. self.LE_loadExp = LE_loadExp
  41. self.measurement_list_df = None
  42. self.last_measurement_list_fle = None
  43. self.animal_name = None
  44. def get_column_mapping(self, from_col=None, to_col=None):
  45. """
  46. Return a dictionary with column names of <from_col> as keys and corresponding column names of <to_col> as values.
  47. If one of them is set to None, the internal column names are used. Both cannot be None.
  48. :param from_col: str or None
  49. :param to_col: str or None
  50. :return: dict
  51. """
  52. assert not (from_col is None and to_col is None), "One of 'from_col' and 'to_col' needs to be specified!"
  53. if from_col is None:
  54. from_col = "LST Name"
  55. if to_col is None:
  56. to_col = "LST Name"
  57. metadata_reset = self._metadata_def_df.reset_index()
  58. if from_col == to_col == "LST Name":
  59. return {k: k for k in metadata_reset["LST Name"]}
  60. else:
  61. temp_df = metadata_reset.set_index(from_col)
  62. return dict(temp_df[to_col])
  63. @classmethod
  64. def create_from_lst_file(cls, lst_fle: str, LE_loadExp: int):
  65. """
  66. Creates an empty MeasurementList object, reads values from a list file and and initializes the MeasurementList
  67. object with values read
  68. :param lst_fle: str, containing the path of a measurement list file
  69. :param LE_loadExp: int, the LE_loadExp flag of VIEW
  70. :return: MeasurementList object
  71. """
  72. measurement_list = cls(LE_loadExp)
  73. io_class, relevant_column, ext = get_ext_based_values(lst_fle)
  74. measurement_list.last_measurement_list_fle = lst_fle
  75. measurement_list.animal_name = pl.Path(lst_fle).name[:-len(ext)]
  76. in_df = io_class.read(lst_fle)
  77. column_name_mapping = measurement_list.get_column_mapping(from_col=relevant_column, to_col=None)
  78. in_df_lst_names = in_df.rename(columns=column_name_mapping)
  79. # define a new column order where columns known to VIEW are moved to the beginning
  80. new_column_order = \
  81. [column_name_mapping[x] for x in in_df.columns if x in column_name_mapping] + \
  82. [x for x in in_df.columns if x not in column_name_mapping]
  83. # reorder columns to resemble the row order in the internal metadata definition file
  84. # columns not internal definition will be at the end
  85. in_df_reordered = in_df_lst_names[new_column_order]
  86. measurement_list.measurement_list_df = in_df_reordered
  87. measurement_list.revise_dbbs_for_current_OS()
  88. measurement_list.add_missing_defaults()
  89. measurement_list.convert_to_numeric()
  90. measurement_list.check_minimum_requirements()
  91. return measurement_list
  92. @classmethod
  93. def create_from_df(cls, LE_loadExp, df):
  94. measurement_list = cls(LE_loadExp)
  95. measurement_list.measurement_list_df = df
  96. measurement_list.convert_to_numeric()
  97. measurement_list.check_minimum_requirements()
  98. return measurement_list
  99. def write_to_list_file(self, lst_fle: str, columns2write: typing.Union[None, list] = None,
  100. overwrite_old_values=()):
  101. """
  102. Write to file with column names based on file extension
  103. :param lst_fle: str, path of the file to be written
  104. :param columns2write: list or None, the columns 2 write. The written file will have the same column order as
  105. <columns2write>. If None, all columns are used and written in no particular order.
  106. :param overwrite_old_values: iterable of strings or None, names of columns of the list file to be written whose
  107. values are to be overwritten from a file of the same name, if it exists. If None, file is not backed up
  108. :return: None
  109. """
  110. io_class, relevant_column, _ = get_ext_based_values(lst_fle)
  111. old_file_handler = get_old_file_handler(lst_fle)
  112. old_file_handler.backup()
  113. column_name_mapping = self.get_column_mapping(to_col=relevant_column)
  114. df_to_write = self.measurement_list_df.rename(columns=column_name_mapping)
  115. # rewrite values from old df
  116. df_with_old_values = old_file_handler.write_old_values(df_to_write, overwrite_old_values,
  117. measu_col_name=column_name_mapping["Measu"],
  118. label_col_name=column_name_mapping["Label"])
  119. # use all columns if <columns2write> is None
  120. if columns2write is None:
  121. columns2write = df_to_write.columns.values
  122. # add those columns to <columns2write> that exist additionally in <df_with_old_values>
  123. columns2write = list(columns2write)
  124. for column2overwrite in df_with_old_values.columns.values:
  125. if column2overwrite not in columns2write:
  126. columns2write.append(column2overwrite)
  127. # reorder / limits columns of <df_with_old_values> based on <columns2write>
  128. df_with_old_values = df_with_old_values.reindex(columns=columns2write)
  129. io_class.write(df=df_with_old_values, fle=lst_fle)
  130. def write_to_lst_file_cross_format(self, output_lst_file: str, backup_current_file: bool=True):
  131. """
  132. Writes the current measurement list to another format, based on the extension of <output_lst_file>.
  133. :param str output_lst_file: path where output will be written
  134. :param bool backup_current_file: if True, and this MeasurementList object was loaded from a file, that file
  135. will be backed up
  136. """
  137. io_class_output, = get_ext_based_values(output_lst_file)
  138. if backup_current_file:
  139. old_file_handler = get_old_file_handler(self.last_measurement_list_fle)
  140. old_file_handler.backup()
  141. io_class_output.write(df=self.measurement_list_df, fle=output_lst_file)
  142. def get_minimum_columns_required(self):
  143. return [k for k, v in self._metadata_def_df["Requirement for List load"].items()
  144. if v in ["all", str(self.LE_loadExp)]]
  145. def check_minimum_requirements(self):
  146. minimum_columns_required = self.get_minimum_columns_required()
  147. missing_columns = set(minimum_columns_required) - set(self.measurement_list_df.columns)
  148. assert missing_columns == set(), f"These columns are required but were not found in the list file: " \
  149. f"{missing_columns}"
  150. def add_missing_defaults(self):
  151. for col_name, default_value in self._metadata_def_df["Default values"].items():
  152. if col_name not in self.measurement_list_df.columns:
  153. self.measurement_list_df[col_name] = default_value
  154. def convert_to_numeric(self):
  155. self.measurement_list_df = \
  156. self.measurement_list_df.applymap(lambda x: pd.to_numeric(x, errors="ignore"))
  157. def revise_dbbs_for_current_OS(self):
  158. for row_ind, row in self.measurement_list_df.iterrows():
  159. for k, v in self.get_metadata_by_type(measurement_row=row, tpye="paths").items():
  160. self.measurement_list_df.loc[row_ind, k] = convert_to_path_for_current_os(v)
  161. def get_df_from_file(self, fle):
  162. pass
  163. def get_row_by_measu(self, measu):
  164. return self.get_row_by_column_value(column_name="Measu", column_value=measu)
  165. def get_row_by_label(self, label):
  166. return self.get_row_by_column_value(column_name="Label", column_value=label)
  167. def get_row_index_by_column_value(self, column_name, column_value):
  168. rows_mask = self.measurement_list_df[column_name].apply(lambda x: x == column_value)
  169. assert not sum(rows_mask) > 1, f"More than one rows found in {self.last_measurement_list_fle} with " \
  170. f"{column_name}={column_value}"
  171. assert not sum(rows_mask) == 0, f"No rows with {column_name}={column_value} " \
  172. f"found in {self.last_measurement_list_fle}"
  173. row_index = np.where(rows_mask.values)[0][0]
  174. return row_index
  175. def get_row_by_column_value(self, column_name, column_value):
  176. row_index = self.get_row_index_by_column_value(column_name, column_value)
  177. return self.get_row_by_index(row_index)
  178. def get_row_by_index(self, index):
  179. assert index in range(self.measurement_list_df.shape[0]), \
  180. f"Index={index} out of range for {self.last_measurement_list_fle} " \
  181. f"containing {self.measurement_list_df.shape[0]} rows"
  182. return self.measurement_list_df.iloc[index, :]
  183. def get_metadata_by_type(self, measurement_row, tpye):
  184. metadata_subset = self._metadata_def_df["Type"].apply(lambda x: x == tpye)
  185. return {ind: measurement_row[ind] for ind, val in metadata_subset.items() if val and ind in measurement_row}
  186. def get_p1_metadata_by_index(self, index):
  187. selected_row = self.get_row_by_index(index)
  188. return parse_p1_metadata_from_measurement_list_row(selected_row)
  189. def get_p1_metadata_by_measu(self, measu):
  190. selected_row = self.get_row_by_measu(measu)
  191. return parse_p1_metadata_from_measurement_list_row(selected_row)
  192. def get_p1_metadata_by_label(self, label):
  193. selected_row = self.get_row_by_label(label)
  194. return parse_p1_metadata_from_measurement_list_row(selected_row)
  195. def get_measus(self, analyze_values_accepted=None):
  196. """
  197. Returns those measus that have a value in the column 'Analyze' from among those in <analyze_values_accepted>.
  198. If <analyze_values_accepted> is None, then all measus for the animal are returned
  199. :param analyze_values_accepted: iterable
  200. :return: list of int
  201. """
  202. if analyze_values_accepted is None:
  203. return self.measurement_list_df["Measu"].values.tolist()
  204. else:
  205. analyse_col = self.measurement_list_df["Analyze"].values
  206. row_filter = [x in analyze_values_accepted for x in analyse_col]
  207. return self.measurement_list_df.loc[row_filter, "Measu"].values.tolist()
  208. def sub_select_based_on_analyze(self, analyze_values_accepted=None):
  209. """
  210. Returns a measurement list object with only those rows that have 'Analyze' values from among those in
  211. <analyze_values_accepted>. If <analyze_values_accepted> is None, returns a copy of self
  212. :param analyze_values_accepted: iterable
  213. :return: MeasurementList object
  214. """
  215. if analyze_values_accepted is None:
  216. return copy.deepcopy(self)
  217. else:
  218. analyse_col = self.measurement_list_df["Analyze"].values
  219. row_filter = [x in analyze_values_accepted for x in analyse_col]
  220. list2return = MeasurementList(self.LE_loadExp)
  221. list2return.measurement_list_df = self.measurement_list_df.loc[row_filter, :].copy()
  222. list2return.last_measurement_list_fle = self.last_measurement_list_fle
  223. return list2return
  224. def get_last_measurement_list_path(self):
  225. return pl.Path(self.last_measurement_list_fle)
  226. def get_STG_ReportTag(self):
  227. current_fle_path = self.get_last_measurement_list_path()
  228. if "." in current_fle_path.name:
  229. return current_fle_path.name.split(".")[0]
  230. else:
  231. return current_fle_path.name
  232. def get_STG_OdorInfoPath(self):
  233. current_fle_path = self.get_last_measurement_list_path()
  234. return current_fle_path.parent
  235. def update_metadata_of_measurement(self, measu: int, metadata2update: dict):
  236. """
  237. In the row for the measurement <measu>, replaces values of those cells whose names are keys
  238. in <meatadata2update> with corresponding dictionary values
  239. :param measu: int
  240. :param metadata2update: dict, whose keys are strings
  241. """
  242. measu_row_ind = self.get_row_index_by_column_value(column_name="Measu", column_value=measu)
  243. measu_index_value = self.measurement_list_df.index.values[measu_row_ind]
  244. for column_name, column_value in metadata2update.items():
  245. if column_name in self.measurement_list_df.columns:
  246. self.measurement_list_df.loc[measu_index_value, column_name] = column_value
  247. def get_value(self, measu, column):
  248. """
  249. Returns the value in the column <column> for the row with measu <measu>
  250. :param measu: int
  251. :param column: str
  252. """
  253. measu_row_ind = self.get_row_index_by_column_value(column_name="Measu", column_value=measu)
  254. measu_index_value = self.measurement_list_df.index.values[measu_row_ind]
  255. return self.measurement_list_df.loc[measu_index_value, column]
  256. def update_from_custom_func(self, custom_func, **kwargs):
  257. """
  258. update measurement list using custom function. This function is expected to take on argument: one row of the
  259. measurement list as a pandas.Series object and return a pandas.Series object. Ideally the index of the returned
  260. pandas.Series object must be a superset of the index of the input pandas.Series. In that case, the net effect
  261. would be to modify some columns and add other to the measurement list, as defined in <custom_func>.
  262. :param custom_func: a function, taking one pandas.Series object and returning a pandas.Series object
  263. :param kwargs: dictionary, whose key-value pairs will be passed onto 'custom_func' as arguments
  264. :return: None
  265. """
  266. self.measurement_list_df = self.measurement_list_df.apply(custom_func, axis=1,
  267. **kwargs)
  268. def check_set_analyze(self, loading_criterion_pass, index):
  269. """
  270. If <loading_criterion_pass> is false, sets unconditionally "Analyze" for the row at <index> to 0.
  271. If <loading_criterion_pass> is true, sets "Analyze" for the the row at <index> to 1 only if the column
  272. "Analyze" does not exist or has been set to a negative value
  273. :param bool loading_criterion_pass: a criterion
  274. :param int index: row index
  275. """
  276. analyze_column_exists = "Analyze" in self.measurement_list_df.columns.values
  277. if loading_criterion_pass and not analyze_column_exists:
  278. self.measurement_list_df.loc[index, "Analyze"] = 1
  279. elif loading_criterion_pass and analyze_column_exists:
  280. current_analyze = self.measurement_list_df.loc[index, "Analyze"]
  281. if current_analyze < 0 or pd.isnull(current_analyze):
  282. self.measurement_list_df.loc[index, "Analyze"] = 1
  283. else:
  284. pass # do nothing
  285. elif not loading_criterion_pass:
  286. self.measurement_list_df.loc[index, "Analyze"] = 0
  287. else:
  288. pass # should not come here as all possible cases are covered above
  289. def sanitize(self, data_file_extensions, STG_Datapath=None, flags=None, make_paths_absolute=False):
  290. """
  291. For each, set analyze to zero if indicated data files or their possible alternatives don't exist
  292. :param flags: instance of view.python_core.flags.FlagsManager
  293. :param STG_Datapath: str, either flags are specified or this argument, retained for backward compatibility
  294. :param data_file_extensions: list of str, extensions of the data file expected.
  295. :param make_paths_absolute: bool, if True, all data file paths found on file system wil be expanded to their
  296. absolute paths
  297. """
  298. which_data_cols = ["DBB1"]
  299. minimum_requirements = self.get_minimum_columns_required()
  300. extra_cols = [x for x in ("dbb2", "dbb3") if x in minimum_requirements]
  301. for index, row in self.measurement_list_df.iterrows():
  302. this_which_data_cols = which_data_cols.copy()
  303. for col in extra_cols:
  304. if col in self.measurement_list_df.columns:
  305. if self.measurement_list_df.loc[index, col] != self._metadata_def_df.loc[col, "Default values"]:
  306. this_which_data_cols.append(col)
  307. existences = []
  308. warnings = {}
  309. for data_col in this_which_data_cols:
  310. if STG_Datapath is None and flags is not None:
  311. try:
  312. dbb = row[data_col]
  313. absolute_data_path = get_existing_raw_data_filename(flags=flags, dbb=dbb,
  314. extensions=data_file_extensions,
  315. )
  316. if make_paths_absolute:
  317. self.measurement_list_df.loc[index, data_col] = absolute_data_path
  318. existences.append(True)
  319. except FileNotFoundError as fnfe:
  320. logging.getLogger("VIEW").warning(str(fnfe))
  321. existences.append(False)
  322. elif flags is None and STG_Datapath is not None:
  323. expected_data_file = pl.Path(STG_Datapath) / f"{row[data_col]}{data_file_extensions}"
  324. existence = expected_data_file.is_file()
  325. if existence:
  326. if make_paths_absolute:
  327. self.measurement_list_df.loc[index, data_col] = expected_data_file
  328. else:
  329. warnings[data_col] = f"Expected file not found: {expected_data_file}"
  330. existences.append(existence)
  331. else:
  332. raise ValueError("This function has invalid arguments. Exactly one among STG_Datapath and"
  333. "flags need to be set and the other not set or set to None")
  334. self.check_set_analyze(loading_criterion_pass=all(existences), index=index)
  335. if not all(existences):
  336. logging.getLogger("VIEW").warning(
  337. f"Some expected data files were not found for the measurement with "
  338. f"measu={self.measurement_list_df.loc[index, 'Measu']} and "
  339. f"label={self.measurement_list_df.loc[index, 'Label']}. 'Analyze' for this row "
  340. f"has been set to 0. I looked for:\n"
  341. f"{pprint.pformat(warnings)}")
  342. def sanitize_based_on_loading(self, flags):
  343. """
  344. Try to load measurement of each row, if unsuccessful, set Analyze=0
  345. :param FlagsManager flags:
  346. """
  347. for index, row in self.measurement_list_df.iterrows():
  348. measu = row["Measu"]
  349. try:
  350. self.load_data(flags=flags, measu=measu)
  351. self.check_set_analyze(loading_criterion_pass=True, index=index)
  352. except (IOError, FileNotFoundError, AssertionError, ValueError):
  353. self.measurement_list_df.loc[index, "Analyze"] = 0
  354. def append(self, measurement_list, label_suffix: str = None):
  355. """
  356. Creates a new measurement label by appending <measurement_list> to self. Measus are updated to avoid
  357. duplication. If <label_suffix> is not None, it is appended to each entry in the column "Label"
  358. of <measurement_list> before appending it to self.
  359. :param measurement_list: MeasurementList object, to be appended
  360. :param label_suffix: str, see function description
  361. :return: MeasurementList Object
  362. """
  363. if self.LE_loadExp == measurement_list.LE_loadExp:
  364. new_ml = MeasurementList(self.LE_loadExp)
  365. else:
  366. new_ml = MeasurementList(None)
  367. max_measu_current_ml = self.measurement_list_df["Measu"].max()
  368. incoming_ml = measurement_list.measurement_list_df
  369. incoming_ml.loc[:, "Measu"] = np.arange(incoming_ml.shape[0]) + max_measu_current_ml + 1
  370. incoming_ml.loc[:, "Label"] = [f"{x}{label_suffix}" for x in incoming_ml["Label"].values]
  371. new_ml.measurement_list_df = self.measurement_list_df.append(incoming_ml, ignore_index=True)
  372. return new_ml
  373. def load_data(self, flags, measu):
  374. p1_metadata, extra_metadata = self.get_p1_metadata_by_measu(measu)
  375. p1 = get_p1(p1_metadata=p1_metadata, flags=flags, extra_metadata=extra_metadata)
  376. return flags.get_measurement_label(measurement_row=self.get_row_by_measu(measu)), p1
  377. def get_animal_name_from_list_file(list_file):
  378. io_class, relevant_column, ext = get_ext_based_values(list_file)
  379. return pl.Path(list_file).stem.rstrip(ext)