|
@@ -22,10 +22,11 @@ def create_single_table_meas(df_dict):
|
|
how='inner')
|
|
how='inner')
|
|
return so_far
|
|
return so_far
|
|
|
|
|
|
-def load_ts(files, archive):
|
|
|
|
|
|
+def load_ts(uid_files, archive):
|
|
|
|
+ uid, files = uid_files
|
|
with ZipFile(archive, 'r') as z:
|
|
with ZipFile(archive, 'r') as z:
|
|
tmp_list = []
|
|
tmp_list = []
|
|
- for f in files:
|
|
|
|
|
|
+ for f in tqdm.tqdm(files, disable=(uid != 0)):
|
|
# Retrieve index from filename
|
|
# Retrieve index from filename
|
|
ix = int(re.findall('[0-9]+', os.path.basename(f))[0])
|
|
ix = int(re.findall('[0-9]+', os.path.basename(f))[0])
|
|
tmp = np.loadtxt(io.BytesIO(z.read(f)))
|
|
tmp = np.loadtxt(io.BytesIO(z.read(f)))
|
|
@@ -41,11 +42,12 @@ def single_dataframe(archive):
|
|
ts = []
|
|
ts = []
|
|
# Get all the timeseries
|
|
# Get all the timeseries
|
|
# We'll do this in parallel as loadtxt is rather slow
|
|
# We'll do this in parallel as loadtxt is rather slow
|
|
|
|
+ print('Linking timeseries')
|
|
p = multiprocessing.Pool()
|
|
p = multiprocessing.Pool()
|
|
ts_files = [f for f in z.namelist() if '.txt' in f]
|
|
ts_files = [f for f in z.namelist() if '.txt' in f]
|
|
func = functools.partial(load_ts, archive=archive)
|
|
func = functools.partial(load_ts, archive=archive)
|
|
n_cpu = multiprocessing.cpu_count()
|
|
n_cpu = multiprocessing.cpu_count()
|
|
- splits = [ts_files[splt[0]:splt[-1]+1] for splt in np.array_split(np.arange(len(ts_files)), min(n_cpu, len(ts_files)))]
|
|
|
|
|
|
+ splits = [(uid, ts_files[splt[0]:splt[-1]+1]) for uid, splt in enumerate(np.array_split(np.arange(len(ts_files)), min(n_cpu, len(ts_files))))]
|
|
ts = p.map(func, splits)
|
|
ts = p.map(func, splits)
|
|
p.close()
|
|
p.close()
|
|
|
|
|