doi
/
pyview
forked from ag_galizia/pyview


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
							# -*- coding: utf-8 -*-
"""
Program to read Till vision .log files
and write .settings.csv files

the program works like this (not all implemented yet):
- set flag settings that are default values
- read .log files and parse
- extract information that is in there

then, run Set_loca_Values_XXX, depending on reference_settings flag, e.g. 
    - get FURA measurement partners
    - set stimulus times
    - etc (all the personal information)
    - extract information from names

Since default flag settings are peculiar to every user,
and extracting information from names is too,
this program should be modified by everybody. 

Latest modification: 25.7.19 (Inga files)
Output is .lst.xls, into folder /Lists

"""

import scipy as sp
import datetime
import pandas as pd
import os
import tkinter as tk
from tkinter.filedialog import askopenfilenames
from tillvisionio.vws import  VWSDataManager
import pathlib as pl


flag_OneDirectoryUp = True #if True, settings file is saved one directory up
flag_intoListDirectory = True
flag_writeMovies    = False # not used any more
flag_outextension    = '.lst.xls'
dbb_withDir = True  # extract dbbXX.pst together with last directory, e.g. '190301_locust_ip16.pst/dbb6C.pst'


### set settings here!
reference_settings = 'Temp' # 'Or42b_GC6' 'Or22a_GC6'
## for Jibin: set values in function Set_Jibin_Values(lst_frame)


class OldFileHandlerBlank(object):

    def __init__(self):
        super().__init__()
        pass

    def backup(self):

        pass

    def write_old_values(self, df, columns):

        return df


class OldFileHandler(OldFileHandlerBlank):

    def __init__(self, lst_file):

        super().__init__()

        self.lst_fle = lst_file
        self.old_lst_df = pd.read_excel(self.lst_fle).reset_index()
        if "index" in self.old_lst_df.columns:
            del self.old_lst_df["index"]
        self.backup_path = None

    def backup(self):

        lst_fle_path = pl.Path(self.lst_fle)
        self.backup_path = lst_fle_path.with_suffix(f".{datetime.datetime.now().strftime('%Y%m%d')}.xls")
        self.old_lst_df.to_excel(str(self.backup_path))
        print("Output file existed! Old file saved as ")
        print(str(self.backup_path))

    def write_old_values(self, df, columns: list):

        if "Measu" not in columns:
            columns.append("Measu")

        mask = self.old_lst_df["Label"].apply(lambda x: x in df["Label"].values)

        old_df_subset = self.old_lst_df.loc[mask, columns].set_index("Measu")

        df_temp = df.set_index("Measu")

        combined_df = df_temp.combine(old_df_subset, func=lambda s1, s2: s2, overwrite=False)

        return combined_df.reset_index()


def get_old_file_handler(lst_file):

    if os.path.exists(lst_file):
        return OldFileHandler(lst_file)
    else:
        return OldFileHandlerBlank()


def Set_local_Values_Sercan(lst_frame):
    lst_frame['StimON']   = 20
    lst_frame['StimLen']  = 1000 
    lst_frame['Stim2ON']  = 60
    lst_frame['Stim2Len'] = 1000 
    lst_frame['Comment'] = 'log2lst_Sercan_CalciumGreen'

    lst_frame['Line']      = 'locust'
    lst_frame['Age']       = '-1'
    lst_frame['Sex']       = 'o'

    #!!odor and concentration extracted from label!!
    return lst_frame


def get_default_line():
    ################################################
    ###   default values    ########################
    ################################################

    # if there is a function such as Set_Jibin_Values, it will overwrite some of these
    default_comment = 'log2lst'
    default_line = 'locust'
    default_PxSzX = '1.5625'  # um per pixel, for 50x air objective, measured by Hanna Schnell July 2017, with a binning of 8
    default_PxSzY = '1.5625'
    default_age = '-1'
    default_sex = 'o'
    default_odor = 'unknown'
    default_NOConc = '0'
    default_setting = 'setting'
    default_prefer = '1'  # which area from KNIME to plot
    default_slvFlip = '0'

    ################################################

    # copy the default ones, some will be overwritten later

    lst_line = pd.Series()

    lst_line['Odour'] = default_odor
    lst_line['OConc'] = default_NOConc
    lst_line['setting'] = default_setting
    lst_line['Comment'] = default_comment
    lst_line['Line'] = default_line
    lst_line['PxSzX'] = default_PxSzX
    lst_line['PxSzY'] = default_PxSzY
    lst_line['Age'] = default_age
    lst_line['Sex'] = default_sex
    lst_line['Prefer'] = default_prefer
    lst_line["slvFlip"] = default_slvFlip

    return lst_line


def parse_label(label):

    
    if reference_settings == "Temp":
        info = {}
        info["Odour"] = 'not set'
        info["OConc"] = 'not set'
    else:
        parts = label.split("_")
        info = {}
        info["Odour"] = parts[1]
        info["OConc"] = int(parts[2])

    return info


def convert_vws_names_to_lst_names(vws_measurement_series, default_line):
    """
    Convert values from vws.log nomenclaure to .lst nomenclature
    :param vws_measurement_series: pandas.Series
    :return: pandas.series
    """
    lst_line = default_line.copy()
    lst_line['Animal']    = vws_measurement_series["Label"]
    lst_line['Measu']     = vws_measurement_series['index'] + 1
    lst_line['Label']     = vws_measurement_series['Label']
    lst_line['DBB1']      = vws_measurement_series["Location"]
    lst_line['Cycle']     = vws_measurement_series["dt"]
    lst_line['Lambda']    = vws_measurement_series['MonochromatorWL_nm']
    lst_line['UTC']       = vws_measurement_series['UTCTime']
    lst_line['StartTime'] = vws_measurement_series['StartTime']
    lst_line["Analyze"] = vws_measurement_series["Analyze"]

    #all others, just to not have empty columns
    lst_line['dbb2']    = 'none'
    lst_line['dbb3']    = 'none'
    lst_line['MTime']    = 0
    lst_line['Countl']    = 0
    lst_line['Control']    = 0
    lst_line['StimISI']    = 0
    lst_line['PhConc']    = 0
    lst_line['PhTime']    = 0
    lst_line['Pharma']    = "no_pharma"
    lst_line['PosZ']    = 0
    lst_line['ShiftX']    = 0
    lst_line['ShiftY']    = 0
    lst_line['Stim2OFF']    = -1
    lst_line['Stim2ON']    = -1
    lst_line['StimOFF']    = -1
    lst_line['StimON']    = -1

    label_info = parse_label(vws_measurement_series["Label"])
    lst_line["Odour"] = label_info["Odour"]
    lst_line["OConc"] = label_info["OConc"]

    return lst_line

#####################################
## DEFINE REFERENCE ODOR AND TIME!!!!
#####################################


def log2settings(in_logFile, out_trunc, animal):
    """ converts a till photonics .vws.log into a lst. 
    Function is not split into
    read and write because that is never needed 
    Many columns may not be needed, but are still defined for compatibility fear
    """

    # # some constants and declarations
    # block_starts = []
    # block_ends   = []
    # block_names  = []
    # flag_oldvalues = False
    # in the future: replace with searching <end of info>
    lst_labels = ["Measu","Label","Odour","DBB1", "Cycle","MTime","OConc","Control",
                  "StimON","StimOFF","Pharma","PhTime","PhConc","Comment","ShiftX","ShiftY","StimISI",
                  "setting","dbb2","dbb3","PxSzX","PxSzY","PosZ","Lambda","Countl", "slvFlip", "Stim2ON","Stim2OFF",
                  "Age", "Analyze","UTC", "Animal"]
    # last_time = 0
    # if a settings file already exists, make a backup

  
#     # read log and parse
#     with open(in_logFile, 'r') as fh:
#         lines = [line.strip() for line in fh.readlines()]
#     for i,line in enumerate(lines):
#         match = re.search('^\[(.*)\]',line) # looks for lines with []
#         if match:
#             block_starts.append(i)
#             block_names.append(match.group(1))
#         match = re.search('end of info',line) # looks for lines with end of info
#         if match:
#             block_ends.append(i)
#
# # make a list of those blocks that follow the naming conventions
#     valid_blocks = []
#     # remove blocks that have in the name "Snapshot" or "Delta"
#     # or that do not have any "_"
#     # 'Fluo340nm_00' would pass
#     for i,name in enumerate(block_names):
#         if name.count('Snapshot') > 0 \
#             or name.count('Delta') > 0 \
#             or name.count('_') < 1:
#             pass
#         else:
#             valid_blocks.append([i,block_starts[i],name,block_ends[i]])
# # make a list for what is written in each block, with label. The log file has lines such as Date: 04/04/18
#             # and this will move into e.g. {'Date ': ' 04/04/18'}
#     Measurements = []
#     for i,block_info in enumerate(valid_blocks):
#         Measurements.append({'index':block_info[0],'label':block_info[2]})
#         block = lines[block_info[1]+1:block_info[3]] # all the lines that belong to this block
#         for line in block:
#             line_split = line.split(':')
#             if len(line_split) == 2:
#                 key,value = line_split
#             if len(line_split) > 2:
#                 key = line_split[0]
#                 value = ':'.join(line_split[1:]).strip()
#             Measurements[i][key] = value
#
#     lst_frame  = pd.DataFrame(columns=lst_labels)
#     for Measurement in Measurements:  # Measurement = Measurements[0]
#         lst_line  = pd.DataFrame(columns=lst_labels) #one line for ease of writing
#
#         # analyze label given in TILL for odor and concentration - not used in Jibin
# #        label_split = Measurement['label'].split('_')
# #        Odour = default_odor
# #        NOConc = default_NOConc
# #        setting = default_setting
# #        feedback = True
# #        if len(label_split) == 4: ## e.g. GC_06_ETBE_-2
# #            tmp, setting, Odour, NOConc = label_split
# #            setting = tmp + '_' + setting #reassemble
# #            feedback = False
# #        if len(label_split) == 3: ## e.g. GC06_ETBE_-2
# #            setting,Odour,NOConc = label_split
# #            feedback = False
# #        if len(label_split) == 2: ## eg. ETBE_-4
# #            Odour,NOConc = label_split
# #            feedback = False
# #        if feedback:
# #            print()
# #            print("names in TILL should be of the type xxx_odor_conc, e.g. GC00_ETBE_-2")
# #            print("or ETBE_-2 or GC_00_ETBE_-6")
# #            print()
#
#         # time & analyze
#         try:
#             times = Measurement['timing [ms]'].strip()
#             times = sp.array(times.split(' '),dtype='float64')
#             # calculate frame rate as time of (last frame - first frame) / (frames-1)
#             dt = str((times[-1]-times[0])/(len(times)-1))
#             analyze = '1' # since there are at least two frames, and thus a time, I suppose it is worth analyzing
#         except:
#             dt = '-1'
#             analyze = '0'
#
#         # location, check if pst, else -1
#         ext = os.path.splitext(Measurement['Location'])[1]
#         if ext != '.pst':
#             Location = '-1'
#         else:
#             #tanimal = os.path.splitext(os.path.splitext(os.path.basename(fname))[0])[0] # tanimal
#             dbb = os.path.splitext(Measurement['Location'].split('\\')[-1])[0] # dbb wo pst
#             # Location = '\\'.join([tanimal + '.pst',dbb])  #use this line to add the animal's .pst directory
#             Location = dbb #for \\Jibin\\JJ_01_C_F_180404a.pst\\dbbBA.pst, return 'dbbBA'
# #    "MTime","Control","StimON","StimOFF","Pharma","PhTime","PhConc","ShiftX","ShiftY","StimISI","dbb2","dbb3","PosZ","Countl","slvFlip","Stim2ON","Stim2OFF",]

    def measurement_filter(s):

        # remove blocks that have in the name "Snapshot" or "Delta"
        # or that do not have any "_"
        name = s["Label"]
        label_not_okay = name.count('Snapshot') > 0 or name.count('Delta') > 0 or name.count('_') < 1
        label_okay = not label_not_okay

        atleast_two_frames = False
        if type(s["Timing_ms"]) is str:
            if len(s["Timing_ms"].split(' ')) >= 2:
                atleast_two_frames = True

        return label_okay and atleast_two_frames

    def additional_cols(s):

        # time & analyze
        try:
            times = s['Timing_ms'].strip()
            times = sp.array(times.split(' '), dtype='float64')
            # calculate frame rate as time of (last frame - first frame) / (frames-1)
            dt = str((times[-1]-times[0])/(len(times)-1))
            analyze = '1' # since there are at least two frames, and thus a time, I suppose it is worth analyzing
        except Exception as e:
            dt = '-1'
            analyze = '0'

        # location, check if pst, else -1
        ext = os.path.splitext(s['Location'])[1]
        if ext != '.pst':
            Location = '-1'
        else:
            # for "....\\Jibin\\JJ_01_C_F_180404a.pst\\dbbBA.pst", return 'JJ_01_C_F_180404a.pst\\dbbBA.pst'
            dbb = pl.PureWindowsPath(s["Location"])
            if dbb_withDir:
                Location = str(pl.Path(dbb.parts[-2]) / dbb.stem)

                out_trunc_path = pl.Path(out_trunc)
                expected_path = out_trunc_path.parent.parent / "Data" / pl.Path(dbb.parts[-2]) / dbb.name
                if os.path.isfile(str(expected_path)):
                    print(f"File found at expected location: {str(expected_path)}")
                else:
                    print(f"File NOT found at expected location: {str(expected_path)}")

            else:
                Location = dbb.stem

        return {"dt": dt, "Analyze": analyze, "Location": Location}

    vws_manager = VWSDataManager(in_logFile)
    measurements = vws_manager.get_all_metadata(filter=measurement_filter, additional_cols_func=additional_cols)

    this_lst_frame = pd.DataFrame()

    for measurement_index, measurement_row in measurements.iterrows():

        lst_line = convert_vws_names_to_lst_names(vws_measurement_series=measurement_row,
                                                  default_line=get_default_line())
        this_lst_frame = this_lst_frame.append(lst_line, ignore_index=True)


    labels_not_initialzed = set(lst_labels) - set(this_lst_frame)
    assert labels_not_initialzed == set(), f"Some required columns were not initialized:\n{labels_not_initialzed}"

    #     # if .settings existed before, maybe some entries were already given
    #     # this assumes that the rows are the same!
    #     # better: find row where old_lst_df.label == label
    #     if flag_oldvalues:
    #         this_lst_df = old_lst_df.loc[old_lst_df.Label == lst_line['label'],:] #till uses "label", I use "Label"
    #         lst_line.loc[index,'Line']    = this_lst_df.iloc[0]["Line"]
    #         # alternative, in one code line: line = old_lst_df.loc[old_lst_df.Label == Label].iloc[0]['line']
    #         lst_line.loc[index,'PxSzX']   = this_lst_df.iloc[0]["PxSzX"]
    #         lst_line.loc[index,'PxSzY']   = this_lst_df.iloc[0]["PxSzY"]
    #         lst_line.loc[index,'Age']     = this_lst_df.iloc[0]["Age"]
    #         lst_line.loc[index,'Sex']     = this_lst_df.iloc[0]["Sex"]
    #         lst_line.loc[index,'Prefer']  = this_lst_df.iloc[0]["Prefer"]
    #         lst_line.loc[index,'Comment'] = this_lst_df.iloc[0]["Comment"]
    #         lst_line.loc[index,'Analyze'] = this_lst_df.iloc[0]["Analyze"]
    #         #get old entry, for age, sex, line, comment.PxSzY, PxSzX, odorshift
    #
    #     # now collect all this into a dataframe, and move to next
    #     lst_frame = lst_frame.append(lst_line)
    #


    return this_lst_frame
# end function log2settings
#now lst_frame contains all information
        

#######################################################################
# MAIN starts here 
#######################################################################
# Choose raw files
root = tk.Tk()
root.withdraw() # so that windows closes after file chosen 
root.attributes('-topmost', True)
filenames = askopenfilenames(
                parent=root,
                title='Select one or more Till .log files',
                filetypes=[('settings files', '*.log'), ('all files', '*')]
                ) # ask user to choose file
# i = 0 #for debugging       
for i in range(len(filenames)):
    in_logFile = filenames[i]
    if flag_OneDirectoryUp:
        #slowly. e.g. c:/me/myself/I/animaldir/animal.vws.log
        lst_trunc = os.path.splitext(os.path.splitext(in_logFile)[0])[0] #remove two extensions
        fn_tmp    = os.path.split(lst_trunc)[1] # name of the file (without extensions)
        dir_oneup = os.path.split(os.path.split(lst_trunc)[0])[0] #path to parent directory
        if flag_intoListDirectory:
            out_trunc = os.path.join(dir_oneup,'Lists', fn_tmp) #add filename to "Lists" folder in path
        else:
            out_trunc = os.path.join(dir_oneup, fn_tmp) #add filename             
        #result: 'c:/me/myself/I/animal'   extension, e.g. .lst.xls to be added
    else:
        out_trunc = os.path.splitext(os.path.splitext(in_logFile)[0])[0] # + '.settings' # removes both .vws and .log

    animal = os.path.basename(out_trunc)

    old_file_handler = get_old_file_handler(f"{out_trunc}{flag_outextension}")
    old_file_handler.backup()

    # read values from log file, into dataframe
    lst_frame = log2settings(in_logFile, out_trunc, animal)
    #'''''
    #Outside the measurements loop. Set values according to a list, depends on reference_settings
    #
    # modify this and calculate additional things, depending on reference_settings
    if reference_settings == 'Inga':
        lst_frame = Set_local_Values_Sercan(lst_frame)
#        lst_frame = CalcFuraFilms(lst_frame, out_trunc) # calculate and write a FURA film for each 340 in lst_frame

    lst_frame = old_file_handler.write_old_values(lst_frame, ["Line", "PxSzX", "PxSzY", "Age", "Sex", "Prefer",
                                                                   "Comment", "Analyze", "Odour", "OConc"])
    
    # write lst_frame
    # write dataframe version to .xls file
    print('writing to ',out_trunc,' and extension', flag_outextension)
    if flag_outextension == '.lst.xls':
        lst_frame.to_excel(out_trunc+'.lst.xls')  
    if flag_outextension == '.lst':
        lst_frame.to_csv(out_trunc+'.lst',sep='\t')
    
    # think about creating response overview image, and single-area glodatamix - copy from log2settings_valid
    # write_martin_glodatamix(os.path.splitext(lst_fname)[0], lst_trunc, mask)