# ------------------------------------------------------------------------------ # # @Author: F. Paul Spitzner # @Email: paul.spitzner@ds.mpg.de # @Created: 2020-01-20 17:26:55 # @Last Modified: 2020-07-06 12:13:44 # ------------------------------------------------------------------------------ # # load the merged hdf5 file that contains correlation coefficients # for different tau and time series length. the fitting is done here. # print an error for datasets that seem incorrect # ------------------------------------------------------------------------------ # import os import sys import fcntl import h5py import glob import numpy as np import matplotlib import matplotlib.pyplot as plt import matplotlib.patheffects as pe import mrestimator as mre # v0.1.6b4 from itertools import product from time import gmtime, strftime from tqdm import tqdm from humanize import naturalsize import pickle import utility as ut # ------------------------------------------------------------------------------ # # helper functions # ------------------------------------------------------------------------------ # def load_and_fit(h5pattern): """ Load data form the hdf5 files matching a pattern e.g. "h5pattern = "./dat/merged_sm*.hdf5" Returns a dict with the fit results and original data. Fit results layout: tau_index, ts_length_index """ print(f"Processing {h5pattern}") # get a file list h5files = glob.glob(os.path.expanduser(h5pattern)) # number of target tau values matches file list path_dict = dict() for h5file in h5files: tau = ut.h5_load(h5file, "target_tau") path_dict[tau] = h5file # key = tau, then numpy arrays because the number of coefficients changes # each arrays layout: relative_ts_length * repetition * rk data_dict = dict() # sorted order by tau just for good measure for tau in sorted(path_dict.keys()): h5file = path_dict[tau] data = ut.h5_load(h5file, "data") num_lens, num_reps, num_steps = data.shape data_dict[tau] = data len_vals = ut.h5_load(h5file, "relative_length") tau_vals = np.array(sorted(path_dict.keys())) num_taus = len(tau_vals) # check for dodgy data for tau in data_dict.keys(): # we know that entries will be Nan for when the ts length < tau / 2 # so, do not look at the first few values of relative length min_len = int(len(len_vals) / 2) fishy = np.where(np.isnan(data_dict[tau][min_len:, :, :])) if len(fishy[0]) > 0: print(f"tau = {tau:.2f}, fishy repetitions:\n{np.unique(fishy[1])}") shape = (num_taus, num_lens, num_reps) fitres_e = np.ones(shape) * np.nan fitres_eo = np.ones(shape) * np.nan fitres_e_shift = np.ones(shape) * np.nan fitres_eo_shift = np.ones(shape) * np.nan # do the fits print("Fitting ...") for tdx, tau in enumerate(tqdm(tau_vals, leave=False)): for ldx, lens in enumerate(tqdm(len_vals, leave=False)): for rdx in range(num_reps): rk = data_dict[tau][ldx][rdx] steps = np.arange(1, len(rk) + 1) # avoid the Nans valid = np.where(np.isfinite(rk))[0] steps = steps[valid] rk = rk[valid] try: tmp = mre.fit(rk, steps=steps, fitfunc="e").tau except: tmp = np.nan fitres_e[tdx, ldx, rdx] = tmp try: tmp = mre.fit(rk, steps=steps, fitfunc="eo").tau except: tmp = np.nan fitres_eo[tdx, ldx, rdx] = tmp # analytically motivated bias # not shown in the main figure tlen = int(lens * tau) bias = (1 / tlen) * (1 + np.exp(-1 / tau)) / (1 - np.exp(-1 / tau)) rk = rk + bias try: tmp = mre.fit(rk, steps=steps, fitfunc="e").tau except: tmp = np.nan fitres_e_shift[tdx, ldx, rdx] = tmp try: tmp = mre.fit(rk, steps=steps, fitfunc="eo").tau except: tmp = np.nan fitres_eo_shift[tdx, ldx, rdx] = tmp res = dict() res["tau_vals"] = tau_vals res["len_vals"] = len_vals res["raw_data"] = data_dict res["medi_e"] = np.nanmedian(fitres_e, axis=2) res["mean_e"] = np.nanmean(fitres_e, axis=2) res["stdd_e"] = np.nanstd(fitres_e, axis=2) res["medi_eo"] = np.nanmedian(fitres_eo, axis=2) res["mean_eo"] = np.nanmean(fitres_eo, axis=2) res["stdd_eo"] = np.nanstd(fitres_eo, axis=2) res["medi_e_shift"] = np.nanmedian(fitres_e_shift, axis=2) res["mean_e_shift"] = np.nanmean(fitres_e_shift, axis=2) res["stdd_e_shift"] = np.nanstd(fitres_e_shift, axis=2) res["medi_eo_shift"] = np.nanmedian(fitres_eo_shift, axis=2) res["mean_eo_shift"] = np.nanmean(fitres_eo_shift, axis=2) res["stdd_eo_shift"] = np.nanstd(fitres_eo_shift, axis=2) # keep all the fits so we can debug later res['fits_e'] = fitres_e; res['fits_eo'] = fitres_eo; res['fits_e_shift'] = fitres_e_shift; res['fits_eo_shift'] = fitres_eo_shift; return res # Marriott and Pope (1954) Eq 407 def rk_analytic_407(k_max, m, numsteps): k_range = np.arange(1, int(k_max + 1)) geom = np.zeros(k_max) for idx, k in enumerate(k_range): geom[idx] = np.sum(np.power(m, k_range[:k] - 1)) res = np.power(m, k_range) - 1.0 / numsteps * ( (1 + m) * geom - 2 * k_range * np.power(m, k_range) / numsteps ) return res def rk_analytic_408(k_max, m, numsteps): k_range = np.arange(1, int(k_max + 1)) return np.power(m, k_range) - 2 * k_range * np.power(m, k_range) / numsteps # plot the coefficients, rk vs k def plot_coefficients(ax, data, rel_len_idx=0, **kwargs): # data = data_dict[tau] # fig, ax = plt.subplots() # print(f"relative length: {len_vals[rel_len_idx]} x tau") steps = np.arange(1, data.shape[2] + 1) for i in range(data.shape[1]): ax.plot(steps, data[rel_len_idx, i, :], **kwargs) ax.set_xlim(1, steps[-1]) ax.set_xlabel(r"Steps $k$") ax.set_ylabel(r"Coefficients $r_k$") # ------------------------------------------------------------------------------ # # main script # ------------------------------------------------------------------------------ # mre.ut._logstreamhandler.setLevel("ERROR") mre.ut._logfilehandler.setLevel("ERROR") # set directory to the location of this script file to use relative paths os.chdir(os.path.dirname(__file__)) load_pickled = False if not load_pickled: # load, analyze, save as pickle sm = load_and_fit("../dat/merged_sm*.hdf5") ts = load_and_fit("../dat/merged_ts*.hdf5") with open("../dat/sm_fits.pickle", "wb") as handle: pickle.dump(sm, handle) with open("../dat/ts_fits.pickle", "wb") as handle: pickle.dump(ts, handle) else: # load from pickle with open("../dat/ts_fits.pickle", "rb") as handle: ts = pickle.load(handle) with open("../dat/sm_fits.pickle", "rb") as handle: sm = pickle.load(handle) fig, ax = plt.subplots(figsize=(3.5, 2.55), dpi=300) # ax.clear() cdx = 0 # color index for tdx in [0, 2, 4]: tau = ts["tau_vals"][tdx] # ts default ax.plot( ts["len_vals"], ts["medi_eo"][tdx] / tau, label=f"tau={tau}", color=f"C{cdx}", ls="-", alpha=1.0, ) # sm default ax.plot( sm["len_vals"], sm["medi_eo"][tdx] / tau, label=None, color=f"C{cdx}", ls="--", alpha=1.0, ) # errors if tdx <= 4: ut.plot_stdd( ax, ts["len_vals"], ts["medi_eo"][tdx] / tau, ts["stdd_eo"][tdx] / tau, mode='bar', color=f"C{cdx}", ) ut.plot_stdd( ax, sm["len_vals"], sm["medi_eo"][tdx] / tau, sm["stdd_eo"][tdx] / tau, mode='bar', eb_ls='--', color=f"C{cdx}", ) print(sm["medi_eo"][tdx] / tau) print(sm["stdd_eo"][tdx] / tau) cdx += 1 # first order approximation based on Marriott and Pope (1954) Eq 407 targetlength = ts["len_vals"] # in multiples of tau ax.plot(targetlength, 1 / (1 + (4 / 1) * (1 / targetlength)), color="black", label=r"analytic") # ------------------------------------------------------------------------------ # # main figure styling # ------------------------------------------------------------------------------ # ax.legend(loc=4) ax.set_title(r"$A=1000$, no subsampling", fontname="Arial") ax.set_yscale("log") ax.set_xscale("log") ax.set_xlabel(f"Relative trial length $\\,T/\\tau$", fontname="Arial") ax.set_ylabel(f"Estimated timescale $\\,\\hat{{\\tau}}/\\tau$", fontname="Arial") ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) ut.arrowed_spines(ax, ["bottom", "left"]) fig.tight_layout() ax.set_xlim((1, None)) ax.set_ylim((0.1, 1.5)) fig.savefig("../fig/triallength.pdf", transparent=True) # ------------------------------------------------------------------------------ # # the TS shifting issue # ------------------------------------------------------------------------------ # tau = 100.0 rel_lens = [10, 2, 1] fig, ax = plt.subplots(figsize=(3.5, 2.55), dpi=300) ax.set_title("SM") ax.set_rasterization_zorder(-1) # we dont want 100 vector lines for idx, i in enumerate(rel_lens): plot_coefficients( ax, sm["raw_data"][tau], rel_lens[idx], alpha=0.1, lw=0.5, color=f"C{idx}", zorder=-2 ) # y = rk_analytic_407(k_max=k_max, m=np.exp(-1.0 / tau), numsteps=rel_lens[idx] * tau) # ax.plot(x, y, color=f"C{idx}", ls="-") x = np.arange(1, 2001) y = np.exp(-x / tau) ax.plot(x, y, color="black", zorder=1) ax.set_xlim((1, None)) ax.set_xticks([1, 500, 1000, 1500, 2000]) ax.set_ylim((-.325, 1.0)) fig.tight_layout() fig.savefig("../fig/rk_sm.pdf", transparent=True) fig, ax = plt.subplots(figsize=(3.5, 2.55), dpi=300) ax.set_title("TS") ax.set_rasterization_zorder(-1) # we dont want 100 vector lines for idx, i in enumerate(rel_lens): plot_coefficients( ax, ts["raw_data"][tau], rel_lens[idx], alpha=0.1, lw=0.5, color=f"C{idx}", zorder=-2 ) # Marriott and Pope solutions numsteps = int(ts["len_vals"][i] * tau) # 20*tau was the parameter of the simulation # 0.5 * numstep is the safety check implemented in the mrestimator toolbox k_max = int(np.clip(a=20 * tau, a_min=None, a_max=0.5 * numsteps)) x = np.arange(1, k_max + 1) outline=[pe.Stroke(linewidth=3, foreground=f"white"), pe.Normal()] y = rk_analytic_407(k_max=k_max, m=np.exp(-1.0 / tau), numsteps=numsteps) ax.plot(x, y, color=f"C{idx}", ls="-", path_effects=outline, zorder=2, label=f"tau/T={ts['len_vals'][i]}") # rk from 408 have the same bending as our sim data, but at different numsteps. # likely due to the 'known mean' not being zero. # numsteps = int(numsteps / 4) # y = rk_analytic_408(k_max=20*tau, m=np.exp(-1.0 / tau), numsteps=numsteps) # ax.plot(x, y[0:len(x)], color=f"C{idx}", ls="-", alpha=1) x = np.arange(1, 2001) y = np.exp(-x / tau) ax.plot(x, y, color="black", zorder=2) ax.legend(loc=1) ax.set_xlim((1, None)) ax.set_ylim((-.325, 1.0)) ax.set_xticks([1, 500, 1000, 1500, 2000]) fig.tight_layout() fig.savefig("../fig/rk_ts.pdf", transparent=True)