# ------------------------------------------------------------------ # # merge the single threaded hdf5 files (many repetitions) into one # ------------------------------------------------------------------ # import os import sys import fcntl import h5py import numpy as np import matplotlib.pyplot as plt # import mrestimator as mre # v0.1.6b4 from itertools import product from time import gmtime, strftime from tqdm import tqdm # set directory to the location of this script file to use relative paths os.chdir(os.path.dirname(__file__)) path_base = "../dat" numfiles = 17000 numreps = 100 targettau = np.logspace(1, 3, 5) targetlength = np.logspace(1, 4, 25) # in multiples of tau targetlength = np.insert(targetlength, 0, [1, 2, 3, 4, 5, 6, 7, 8, 9]) files_processed = np.full((numfiles), False, dtype=bool) def h5_load(filename, dsetname, raise_ex=False): try: file = h5py.File(filename, "r") try: res = file[dsetname][:] # maybe it is a scalar except ValueError: res = file[dsetname][()] file.close() return res except Exception as e: # print(f"failed to load {dsetname} from {filename}") if raise_ex: raise e for method in ["sm", "ts"]: for tau_tar in tqdm(targettau): k_max = int(20 * tau_tar) f_tar = h5py.File(path_base + f"/merged_{method}_tau_{tau_tar:.2}.hdf5", "a") try: # this seems broken due to integer division # f_tar.create_dataset("absolute_length", data=absolute_length) f_tar.create_dataset("relative_length", data=targetlength) f_tar.create_dataset("target_tau", data=tau_tar) f_tar.create_dataset("method", data=f"{method}") except: pass try: # print("reusing exisiting dset 'data'") dset = f_tar['data'] except Exception as e: # print(e) dset = f_tar.create_dataset( "data", (len(targetlength), numreps, k_max), dtype="f", fillvalue=np.nan ) descrirption = "3-dimensional array where indices are assigend as follows:\n" descrirption += "0 = length_index, check other arrays to retrieve absolute or relative length from the entry matching this index\n" descrirption += "1 = repetition\n" descrirption += "2 = rk_values, padded with NaN if trial was too short" dset.attrs["description"] = descrirption for i in range(1, numfiles+1): file_name = f"{path_base}/{i}.hdf5" try: tau = h5_load(file_name, "meta/tau", raise_ex=True) except Exception as e: # print(f"Unable to open {file_name}: {e}") continue if tau != tau_tar: continue num_steps = h5_load(file_name, "meta/numsteps") src_length_index = np.where(targetlength.astype("i") == int(num_steps))[0][0] src_rep_index = int(h5_load(file_name, "meta/repetition")) src_rks = h5_load(file_name, f"rks/{method}") # print(src_rks) # write dset, keep in mind that rks_might be shorter than 20 tau due to # super short trial lengths. dset[src_length_index, src_rep_index, 0 : len(src_rks)] = src_rks files_processed[i-1] = True f_tar.close() if files_processed.all() == True: print("All files processed") else: print("Missing:") print(np.where(files_processed == False))