3 years ago · 2226146156
--- a/README.md
+++ b/README.md
@@ -1,10 +1,33 @@
 
																 # Motor evoked potentials for multiple sclerosis: A multiyear follow-up dataset.
															
 
																 # Introduction
															
 
																-This dataset contains Motor Evoked Potential measurements, performed on Multiple Sclerosis patients.
															
 
																+Multiple sclerosis (MS) is a chronic disease affecting millions of people worldwide. The signal conduction through the central
															
 
																+nervous system of MS patients deteriorates. Evoked potential measurements allow clinicians to monitor the degree of
															
 
																+deterioration and are used for decision support. We share a dataset that contains motor evoked potential (MEP) measurements,
															
 
																+in which the brain is stimulated and the resulting signal is measured in the hands and feet. This results in time series of 100
															
 
																+milliseconds long. Typically, both hands and feet are measured in one hospital visit. The dataset consists of 5586 visits of
															
 
																+963 patients, performed in day-to-day clinical care over a period of 6 years. The dataset consists of approximately 100,000
															
 
																+MEP. Clinical metadata such as the expanded disability status scale, sex, and age is also available. This dataset can be used
															
 
																+to explore the role of evoked potentials in MS research and patient care. It may also be used as a real-world benchmark for
															
 
																+machine learning techniques for time series analysis and predictive modelling.
															
 
																 # Usage
															
 
																+## Downloading the dataset
															
 
																+There are a few ways to download the dataset ([mep_dataset.zip](mep_dataset.zip)). Since it is a fairly small filesize (~300MB), it can just be downloaded through the web interface.
															
 
																+Or from the commandline:
															
 
																+```bash
															
 
																+wget https://gin.g-node.org/JanYperman/motor_evoked_potentials/raw/master/mep_dataset.zip
															
 
																+```
															
 
																+
															
 
																+Alternatively, you may clone the repository to your local machine, which will also include the dataset:
															
 
																+
															
 
																+```bash
															
 
																+git clone https://gin.g-node.org/JanYperman/motor_evoked_potentials.git
															
 
																+```
															
 
																+
															
 
																+For more ways of accessing the data, please refer to GIN's [FAQ](https://gin.g-node.org/G-Node/Info/wiki/FAQ+Troubleshooting#how-can-i-access-the-data).
															
 
																+
															
 
																 ## Structure
															
 
																 The dataset itself is stored in [mep_dataset.zip](mep_dataset.zip). The general structures is as follows:
															
--- a/create_df_from_portable_dataset.py
+++ b/create_df_from_portable_dataset.py
@@ -22,10 +22,11 @@ def create_single_table_meas(df_dict):
 
																                          how='inner')
															
 
																     return so_far
															
 
																-def load_ts(files, archive):
															
 
																+def load_ts(uid_files, archive):
															
 
																+    uid, files = uid_files
															
 
																     with ZipFile(archive, 'r') as z:
															
 
																         tmp_list = []
															
 
																-        for f in files:
															
 
																+        for f in tqdm.tqdm(files, disable=(uid != 0)):
															
 
																             # Retrieve index from filename
															
 
																             ix = int(re.findall('[0-9]+', os.path.basename(f))[0])
															
 
																             tmp = np.loadtxt(io.BytesIO(z.read(f)))
															
@@ -41,11 +42,12 @@ def single_dataframe(archive):
 
																         ts = []
															
 
																         # Get all the timeseries
															
 
																         # We'll do this in parallel as loadtxt is rather slow
															
 
																+        print('Linking timeseries')
															
 
																         p = multiprocessing.Pool()
															
 
																         ts_files = [f for f in z.namelist() if '.txt' in f]
															
 
																         func = functools.partial(load_ts, archive=archive)
															
 
																         n_cpu = multiprocessing.cpu_count()
															
 
																-        splits = [ts_files[splt[0]:splt[-1]+1] for splt in np.array_split(np.arange(len(ts_files)), min(n_cpu, len(ts_files)))]
															
 
																+        splits = [(uid, ts_files[splt[0]:splt[-1]+1]) for uid, splt in enumerate(np.array_split(np.arange(len(ts_files)), min(n_cpu, len(ts_files))))]
															
 
																         ts = p.map(func, splits)
															
 
																         p.close()
															
--- a/sample_use_case.ipynb
+++ b/sample_use_case.ipynb
@@ -7229,7 +7229,7 @@
 
																    "name": "python",
															
 
																    "nbconvert_exporter": "python",
															
 
																    "pygments_lexer": "ipython3",
															
 
																-   "version": "3.8.8"
															
 
																+   "version": "3.7.4"
															
 
																   },
															
 
																   "toc": {
															
 
																    "base_numbering": 1,