123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- # ------------------------------------------------------------------ #
- # merge the single threaded hdf5 files (many repetitions) into one
- # ------------------------------------------------------------------ #
- import os
- import sys
- import fcntl
- import h5py
- import numpy as np
- import matplotlib.pyplot as plt
- # import mrestimator as mre # v0.1.6b4
- from itertools import product
- from time import gmtime, strftime
- from tqdm import tqdm
- # set directory to the location of this script file to use relative paths
- os.chdir(os.path.dirname(__file__))
- path_base = "../dat"
- numfiles = 17000
- numreps = 100
- targettau = np.logspace(1, 3, 5)
- targetlength = np.logspace(1, 4, 25) # in multiples of tau
- targetlength = np.insert(targetlength, 0, [1, 2, 3, 4, 5, 6, 7, 8, 9])
- files_processed = np.full((numfiles), False, dtype=bool)
- def h5_load(filename, dsetname, raise_ex=False):
- try:
- file = h5py.File(filename, "r")
- try:
- res = file[dsetname][:]
- # maybe it is a scalar
- except ValueError:
- res = file[dsetname][()]
- file.close()
- return res
- except Exception as e:
- # print(f"failed to load {dsetname} from {filename}")
- if raise_ex:
- raise e
- for method in ["sm", "ts"]:
- for tau_tar in tqdm(targettau):
- k_max = int(20 * tau_tar)
- f_tar = h5py.File(path_base + f"/merged_{method}_tau_{tau_tar:.2}.hdf5", "a")
- try:
- # this seems broken due to integer division
- # f_tar.create_dataset("absolute_length", data=absolute_length)
- f_tar.create_dataset("relative_length", data=targetlength)
- f_tar.create_dataset("target_tau", data=tau_tar)
- f_tar.create_dataset("method", data=f"{method}")
- except:
- pass
- try:
- # print("reusing exisiting dset 'data'")
- dset = f_tar['data']
- except Exception as e:
- # print(e)
- dset = f_tar.create_dataset(
- "data", (len(targetlength), numreps, k_max), dtype="f", fillvalue=np.nan
- )
- descrirption = "3-dimensional array where indices are assigend as follows:\n"
- descrirption += "0 = length_index, check other arrays to retrieve absolute or relative length from the entry matching this index\n"
- descrirption += "1 = repetition\n"
- descrirption += "2 = rk_values, padded with NaN if trial was too short"
- dset.attrs["description"] = descrirption
- for i in range(1, numfiles+1):
- file_name = f"{path_base}/{i}.hdf5"
- try:
- tau = h5_load(file_name, "meta/tau", raise_ex=True)
- except Exception as e:
- # print(f"Unable to open {file_name}: {e}")
- continue
- if tau != tau_tar:
- continue
- num_steps = h5_load(file_name, "meta/numsteps")
- src_length_index = np.where(targetlength.astype("i") == int(num_steps))[0][0]
- src_rep_index = int(h5_load(file_name, "meta/repetition"))
- src_rks = h5_load(file_name, f"rks/{method}")
- # print(src_rks)
- # write dset, keep in mind that rks_might be shorter than 20 tau due to
- # super short trial lengths.
- dset[src_length_index, src_rep_index, 0 : len(src_rks)] = src_rks
- files_processed[i-1] = True
- f_tar.close()
- if files_processed.all() == True:
- print("All files processed")
- else:
- print("Missing:")
- print(np.where(files_processed == False))
|