123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- # ------------------------------------------------------------------ #
- # checking the dependence of estimated tau vs given tau as a
- # function of trial length.
- #
- # now using higher activity a=1000 without subsampling
- #
- # also saving the rk to hdf5. want to merge into one large file later
- # ------------------------------------------------------------------ #
- import os
- import numpy as np
- import h5py
- from itertools import product
- import sys
- # temp = sys.path[0]
- # sys.path[0]='/scratch/nst/projects/paul_toolbox/mrestimator/'
- # sys.path.append(temp)
- # v0.1.6b2
- import mrestimator as mre
- import matplotlib.pyplot as plt
- from time import gmtime, strftime
- import fcntl
- # writing to the same file from different threads
- numtrials = 50
- numboot = 0
- numreps = 100
- targettau = np.logspace(1, 3, 5)
- targetlength = np.logspace(1, 4, 25) # in multiples of tau
- targetlength = np.insert(targetlength, 0, [1, 2, 3, 4, 5, 6 ,7, 8, 9])
- # let's skip the complex fit here
- fitfuncs = ["e", "eo"]
- methods = ["ts", "sm"]
- ranks = np.arange(numreps)
- # set directory to the location of this script file to use relative paths
- os.chdir(os.path.dirname(__file__))
- path_base = "../dat"
- # os.makedirs(path_base, exist_ok=True)
- seed = 80000
- # call with -t 1-17000
- arg_list = product(targettau, targetlength, ranks)
- prev = 0
- temp = []
- for i in arg_list:
- i = i + (seed,)
- temp.append(i)
- seed += 1
- arg_list = temp
- mre.ut._logstreamhandler.setLevel("ERROR")
- mre.ut._logfilehandler.setLevel("ERROR")
- mre.ut._logstreamhandler.setFormatter(
- mre.ut.CustomExceptionFormatter(
- "%(asctime)s %(levelname)8s: %(message)s",
- "%Y-%m-%d %H-%M-%S",
- force_disable_trace=True,
- )
- )
- my_rank = int(sys.argv[1])
- t, numsteps, rdx, my_seed = arg_list[my_rank - 1]
- print(f"parameter combinations: {len(arg_list)}")
- print(f"this is: {my_rank}")
- print(f"writing to {path_base}/{my_rank}.hdf5")
- # 1_method 2_fitfunc 3_targettau 4_targetlenth_in_units_of_tau 5_repetition 6_tau_measured 7_seed 8_SGE_TASK_ID
- kmax = int(20 * t)
- steps = (1, kmax)
- dist = 1
- print(
- f"\ttau: {t:.2e}, kmax: {kmax:.2e}, dist: {dist:.2e}, numsteps: {numsteps:.2e} [times tau]"
- )
- print(f'Repetition: {rdx}/{numreps} {strftime("%Y-%m-%d %H-%M-%S", gmtime())}')
- print(f"branching process ...")
- numsteps = int(numsteps)
- bpar = np.exp(-1 * 1 / t)
- bp = mre.simulate_branching(
- bpar, a=1000, seed=my_seed, numtrials=numtrials, length=int(numsteps * t)
- )
- with h5py.File(path_base + f"/{my_rank}.hdf5", "w") as h5f:
- f_rk = h5f.create_group("rks")
- f_fits = h5f.create_group("fits")
- f_meta = h5f.create_group("meta")
- f_meta.create_dataset("tau", data=t)
- f_meta.create_dataset("length", data=int(numsteps * t))
- f_meta.create_dataset("numsteps", data=int(numsteps))
- f_meta.create_dataset("repetition", data=rdx)
- for mdx, m in enumerate(methods):
- print(f"coefficients for {m} ...")
- rk = mre.coefficients(
- bp, steps=steps, numboot=numboot, dt=1, dtunit="steps", method=m
- )
- print(f"writing coefficients ...")
- f_rk.create_dataset(m, data=rk.coefficients, compression="gzip")
- if mdx == 0:
- f_rk.create_dataset("steps", data=rk.steps, compression="gzip")
- print(f"fitting ...")
- for fdx, f in enumerate(fitfuncs):
- f_group = f_fits.create_group(f"{m}_{f}")
- try:
- fitres = mre.fit(rk, numboot=numboot, fitfunc=f)
- except Exception:
- fitres = mre.FitResult(np.nan, np.nan, f)
- # ugly to write is as string but im afraid things might break elsewise
- f_group.create_dataset(f"mdx", data=mdx)
- f_group.create_dataset(f"fdx", data=fdx)
- f_group.create_dataset(f"target_tau", data=t)
- f_group.create_dataset(f"numsteps", data=numsteps)
- f_group.create_dataset(f"rdx", data=rdx)
- f_group.create_dataset(f"fitres_tau", data=fitres.tau)
- f_group.create_dataset(f"my_seed", data=my_seed)
- f_group.create_dataset(f"my_rank", data=my_rank)
- # this is redundant and not quite needed.
- # new_entry = f"{mdx}\t"
- # new_entry += f"{fdx}\t"
- # new_entry += f"{t:.2e}\t"
- # new_entry += f"{numsteps:.2e}\t"
- # new_entry += f"{rdx}\t"
- # new_entry += f"{fitres.tau}\t"
- # new_entry += f"{my_seed}\t"
- # new_entry += f"{my_rank}\n"
- # with open(path_base+f"/{my_rank}.tsv", "a") as g:
- # fcntl.flock(g, fcntl.LOCK_EX)
- # g.write(new_entry)
- # fcntl.flock(g, fcntl.LOCK_UN)
- print(f'{strftime("%Y-%m-%d %H-%M-%S", gmtime())} All done')
|