# ------------------------------------------------------------------ #
# merge the single threaded hdf5 files (many repetitions) into one
# ------------------------------------------------------------------ #

import os
import sys
import fcntl
import h5py
import numpy as np
import matplotlib.pyplot as plt
# import mrestimator as mre  # v0.1.6b4
from itertools import product
from time import gmtime, strftime
from tqdm import tqdm

# set directory to the location of this script file to use relative paths
os.chdir(os.path.dirname(__file__))
path_base = "../dat"

numfiles = 17000
numreps = 100
targettau = np.logspace(1, 3, 5)
targetlength = np.logspace(1, 4, 25)  # in multiples of tau
targetlength = np.insert(targetlength, 0, [1, 2, 3, 4, 5, 6, 7, 8, 9])

files_processed = np.full((numfiles), False, dtype=bool)


def h5_load(filename, dsetname, raise_ex=False):
    try:
        file = h5py.File(filename, "r")
        try:
            res = file[dsetname][:]
        # maybe it is a scalar
        except ValueError:
            res = file[dsetname][()]
        file.close()
        return res
    except Exception as e:
        # print(f"failed to load {dsetname} from {filename}")
        if raise_ex:
            raise e


for method in ["sm", "ts"]:
    for tau_tar in tqdm(targettau):
        k_max = int(20 * tau_tar)

        f_tar = h5py.File(path_base + f"/merged_{method}_tau_{tau_tar:.2}.hdf5", "a")

        try:
            # this seems broken due to integer division
            # f_tar.create_dataset("absolute_length", data=absolute_length)
            f_tar.create_dataset("relative_length", data=targetlength)
            f_tar.create_dataset("target_tau", data=tau_tar)
            f_tar.create_dataset("method", data=f"{method}")
        except:
            pass


        try:
            # print("reusing exisiting dset 'data'")
            dset = f_tar['data']
        except Exception as e:
            # print(e)
            dset = f_tar.create_dataset(
                "data", (len(targetlength), numreps, k_max), dtype="f", fillvalue=np.nan
            )
            descrirption = "3-dimensional array where indices are assigend as follows:\n"
            descrirption += "0 = length_index, check other arrays to retrieve absolute or relative length from the entry matching this index\n"
            descrirption += "1 = repetition\n"
            descrirption += "2 = rk_values, padded with NaN if trial was too short"
            dset.attrs["description"] = descrirption

        for i in range(1, numfiles+1):
            file_name = f"{path_base}/{i}.hdf5"

            try:
                tau = h5_load(file_name, "meta/tau", raise_ex=True)
            except Exception as e:
                # print(f"Unable to open {file_name}: {e}")
                continue
            if tau != tau_tar:
                continue

            num_steps = h5_load(file_name, "meta/numsteps")
            src_length_index = np.where(targetlength.astype("i") == int(num_steps))[0][0]

            src_rep_index = int(h5_load(file_name, "meta/repetition"))

            src_rks = h5_load(file_name, f"rks/{method}")
            # print(src_rks)

            # write dset, keep in mind that rks_might be shorter than 20 tau due to
            # super short trial lengths.
            dset[src_length_index, src_rep_index, 0 : len(src_rks)] = src_rks

            files_processed[i-1] = True

        f_tar.close()

if files_processed.all() == True:
    print("All files processed")
else:
    print("Missing:")
    print(np.where(files_processed == False))