pspitzner
/
mrestimator_triallength


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
							# ------------------------------------------------------------------ #
# checking the dependence of estimated tau vs given tau as a
# function of trial length.
#
# now using higher activity a=1000 without subsampling
#
# also saving the rk to hdf5. want to merge into one large file later
# ------------------------------------------------------------------ #


import os
import numpy as np
import h5py
from itertools import product

import sys

# temp = sys.path[0]
# sys.path[0]='/scratch/nst/projects/paul_toolbox/mrestimator/'
# sys.path.append(temp)

# v0.1.6b2
import mrestimator as mre

import matplotlib.pyplot as plt
from time import gmtime, strftime

import fcntl

# writing to the same file from different threads


numtrials = 50
numboot = 0
numreps = 100

targettau = np.logspace(1, 3, 5)
targetlength = np.logspace(1, 4, 25)  # in multiples of tau
targetlength = np.insert(targetlength, 0, [1, 2, 3, 4, 5, 6 ,7, 8, 9])

# let's skip the complex fit here
fitfuncs = ["e", "eo"]
methods = ["ts", "sm"]
ranks = np.arange(numreps)

# set directory to the location of this script file to use relative paths
os.chdir(os.path.dirname(__file__))
path_base = "../dat"
# os.makedirs(path_base, exist_ok=True)

seed = 80000

# call with -t 1-17000
arg_list = product(targettau, targetlength, ranks)

prev = 0
temp = []
for i in arg_list:
    i = i + (seed,)
    temp.append(i)
    seed += 1
arg_list = temp

mre.ut._logstreamhandler.setLevel("ERROR")
mre.ut._logfilehandler.setLevel("ERROR")
mre.ut._logstreamhandler.setFormatter(
    mre.ut.CustomExceptionFormatter(
        "%(asctime)s %(levelname)8s: %(message)s",
        "%Y-%m-%d %H-%M-%S",
        force_disable_trace=True,
    )
)

my_rank = int(sys.argv[1])
t, numsteps, rdx, my_seed = arg_list[my_rank - 1]


print(f"parameter combinations: {len(arg_list)}")
print(f"this is: {my_rank}")

print(f"writing to {path_base}/{my_rank}.hdf5")

# 1_method 2_fitfunc 3_targettau 4_targetlenth_in_units_of_tau 5_repetition 6_tau_measured 7_seed 8_SGE_TASK_ID

kmax = int(20 * t)
steps = (1, kmax)
dist = 1

print(
    f"\ttau: {t:.2e}, kmax: {kmax:.2e}, dist: {dist:.2e}, numsteps: {numsteps:.2e} [times tau]"
)
print(f'Repetition: {rdx}/{numreps} {strftime("%Y-%m-%d %H-%M-%S", gmtime())}')

print(f"branching process ...")
numsteps = int(numsteps)
bpar = np.exp(-1 * 1 / t)
bp = mre.simulate_branching(
    bpar, a=1000, seed=my_seed, numtrials=numtrials, length=int(numsteps * t)
)

with h5py.File(path_base + f"/{my_rank}.hdf5", "w") as h5f:
    f_rk = h5f.create_group("rks")
    f_fits = h5f.create_group("fits")
    f_meta = h5f.create_group("meta")
    f_meta.create_dataset("tau", data=t)
    f_meta.create_dataset("length", data=int(numsteps * t))
    f_meta.create_dataset("numsteps", data=int(numsteps))
    f_meta.create_dataset("repetition", data=rdx)

    for mdx, m in enumerate(methods):
        print(f"coefficients for {m} ...")
        rk = mre.coefficients(
            bp, steps=steps, numboot=numboot, dt=1, dtunit="steps", method=m
        )

        print(f"writing coefficients ...")
        f_rk.create_dataset(m, data=rk.coefficients, compression="gzip")
        if mdx == 0:
            f_rk.create_dataset("steps", data=rk.steps, compression="gzip")

        print(f"fitting ...")
        for fdx, f in enumerate(fitfuncs):
            f_group = f_fits.create_group(f"{m}_{f}")
            try:
                fitres = mre.fit(rk, numboot=numboot, fitfunc=f)
            except Exception:
                fitres = mre.FitResult(np.nan, np.nan, f)

            # ugly to write is as string but im afraid things might break elsewise
            f_group.create_dataset(f"mdx", data=mdx)
            f_group.create_dataset(f"fdx", data=fdx)
            f_group.create_dataset(f"target_tau", data=t)
            f_group.create_dataset(f"numsteps", data=numsteps)
            f_group.create_dataset(f"rdx", data=rdx)
            f_group.create_dataset(f"fitres_tau", data=fitres.tau)
            f_group.create_dataset(f"my_seed", data=my_seed)
            f_group.create_dataset(f"my_rank", data=my_rank)

            # this is redundant and not quite needed.
            # new_entry  = f"{mdx}\t"
            # new_entry += f"{fdx}\t"
            # new_entry += f"{t:.2e}\t"
            # new_entry += f"{numsteps:.2e}\t"
            # new_entry += f"{rdx}\t"
            # new_entry += f"{fitres.tau}\t"
            # new_entry += f"{my_seed}\t"
            # new_entry += f"{my_rank}\n"

            # with open(path_base+f"/{my_rank}.tsv", "a") as g:
            #     fcntl.flock(g, fcntl.LOCK_EX)
            #     g.write(new_entry)
            #     fcntl.flock(g, fcntl.LOCK_UN)


print(f'{strftime("%Y-%m-%d %H-%M-%S", gmtime())} All done')