io.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. import logging
  2. import pandas as pd
  3. import pathlib as pl
  4. class XLSIO(object):
  5. def __init__(self):
  6. super().__init__()
  7. @classmethod
  8. def revise_file_name(cls, fle):
  9. """
  10. Irrespective of whether the extension of fle is XLS or XLSX, tries to find an existing file with the same file
  11. prefix and one of the two suffixes. Raises an FileNotFoundError when neither an XLS nor an XLSX file is found.
  12. :param str|pl.Path fle:
  13. :return: pl.Path
  14. """
  15. possible_suffixes = [".xls", ".xlsx"]
  16. fle_path = pl.Path(fle)
  17. if not fle_path.is_file():
  18. possible_suffixes.remove(fle_path.suffix)
  19. possible_file = fle_path.with_suffix(possible_suffixes[0])
  20. if possible_file.is_file():
  21. fle = possible_file
  22. else:
  23. raise FileNotFoundError(f"Could not find {fle} or {possible_file}!")
  24. return pl.Path(fle)
  25. @classmethod
  26. def read(cls, fle, **kwargs):
  27. fle = cls.revise_file_name(fle)
  28. df = pd.read_excel(fle, **kwargs).reset_index()
  29. # the first column may be the index called "index". In that case remove it.
  30. if "index" in df.columns:
  31. del df["index"]
  32. return df
  33. @classmethod
  34. def write(cls, df: pd.DataFrame, fle, **kwargs):
  35. fle_path = pl.Path(fle)
  36. assert fle_path.suffix == ".xlsx", \
  37. "VIEW does not support writing measurement lists in XLS format. Please try again writing to XLSX format"
  38. df.to_excel(fle, **kwargs)
  39. class LSTIO(object):
  40. def __init__(self):
  41. super().__init__()
  42. @classmethod
  43. def read(cls, fle):
  44. # 'utf-8' codec, the default, cannot read the umlaute ä etc
  45. df = pd.read_csv(fle, sep="\t", encoding='latin-1', skipinitialspace=True)
  46. # and set all column names to lower case
  47. df.columns = [x.rstrip() for x in df.columns]
  48. return df
  49. @classmethod
  50. def write(cls, df: pd.DataFrame, fle):
  51. df.to_csv(fle, sep="\t")
  52. def get_format_specific_defs():
  53. """
  54. Returns a pandas DataFrame with the columns "IOclass", "relevant column" and "extension" which
  55. contain information about the IO interfaces of measurement list files.
  56. :return:
  57. """
  58. # the order of definition here is very important. It sets the hierarchy when looking for list files.
  59. df = pd.DataFrame(columns=["IOclass", "relevant_column", "extension"])
  60. df.loc["XLS LST format", :] = [XLSIO, "LST Name", ".lst.xls"]
  61. df.loc["XLSX LST format", :] = [XLSIO, "LST Name", ".lst.xlsx"]
  62. df.loc["Legacy Text LST format"] = [LSTIO, "LST Name", ".lst"]
  63. df.loc["XLS FID Settings format"] = [XLSIO, "Settings Name", ".settings.xls"]
  64. df.loc["XLSX FID Settings format"] = [XLSIO, "Settings Name", ".settings.xlsx"]
  65. return df
  66. def get_ext_based_values(lst_fle: str):
  67. io_defs = get_format_specific_defs()
  68. matches = []
  69. to_return = []
  70. for format_name, (io_class, relevant_column, ext) in io_defs.iterrows():
  71. if lst_fle.endswith(ext):
  72. matches.append(True)
  73. to_return.append((io_class, relevant_column, ext))
  74. else:
  75. matches.append(False)
  76. to_return.append(())
  77. if any(matches):
  78. return to_return[matches.index(True)]
  79. else:
  80. raise NotImplementedError(
  81. f"The specified measurement list ({lst_fle}) does not have a supported suffix."
  82. f"The supported suffixes are .lst, .lst.xls, .lst.xlsx, .settings.xls and .settings.xlsx. Sorry!")