123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419 |
- '''
- Created on 20.08.2020
- Author:
- Michael Diedenhofen
- Max Planck Institute for Metabolism Research, Cologne
- Read Bruker ParaVision JCAMP parameter files (e.g. acqp, method, visu_pars).
- '''
- from __future__ import print_function
- VERSION = 'pv_parser.py v 1.0.2 20200820'
- import re
- import sys
- import collections
- import numpy as np
- def strfind(string, sub):
- len_sub = len(sub)
- result = []
- if (len_sub == 0) or (len_sub > len(string)):
- return result
- pos = string.find(sub)
- while pos >= 0:
- result.append(pos)
- pos = string.find(sub, pos + len_sub)
- return result
- def strtok(string, delimiters=None):
- token = ''
- remainder = ''
- len_str = len(string)
- if len_str == 0:
- return (token, remainder)
- if delimiters is None: # whitespace characters
- delimiters = list(map(chr, list(range(9, 14)) + [32]))
- i = 0
- while string[i] in delimiters:
- i += 1
- if i >= len_str:
- return (token, remainder)
- start = i
- while string[i] not in delimiters:
- i += 1
- if i >= len_str:
- break
- token = string[start:i]
- remainder = string[i:len_str]
- return (token, remainder)
- def extract_jcamp_strings(string, get_all=True):
- if string is None:
- result = None
- elif get_all:
- result = re.findall(r'<(.*?)>', string)
- else:
- result = re.search(r'<(.*?)>', string)
- if result is not None:
- result = result.group(1)
- return result
- def extract_unit_string(string):
- if string is None:
- result = None
- else:
- result = re.search(r'\[(.*?)\]', string)
- if result is not None:
- result = result.group(1)
- else:
- result = string
- return result
- def replace_jcamp_strings(string):
- pos_stop = 0
- elements = []
- str_list = []
- index = 0
- while True:
- pos_start = string.find('<', pos_stop)
- if pos_start < 0:
- elements.append(string[pos_stop:])
- break
- elements.append(string[pos_stop:pos_start])
- pos_stop = string.find('>', pos_start + 1)
- if pos_stop < 0:
- elements.append(string[pos_start:])
- break
- pos_stop += 1
- elements.append(''.join(['<#', str(index), '>']))
- str_list.append(string[pos_start:pos_stop])
- index += 1
- return (''.join(elements), str_list)
- def check_struct_list(values, str_list):
- flag_int = True
- flag_float = True
- for value in values:
- if flag_int:
- try:
- value = int(value)
- except ValueError:
- flag_int = False
- else:
- continue
- try:
- value = float(value)
- except ValueError:
- flag_float = False
- break
- if flag_int:
- return (list(map(int, values)), 0)
- if flag_float:
- return (list(map(float, values)), 0)
- # Restore JCAMP strings
- count = len(str_list)
- if count > 0:
- for index, value in enumerate(values):
- result = re.findall(r'<#(.*?)>', value)
- if len(result) == 1:
- str_id = int(result[0])
- values[index] = str_list[str_id]
- count -= 1
- if count == 0:
- break
- elif len(result) > 1:
- sys.exit("Found more than one ID string in a value: %s" % (value,))
- return (values, len(str_list) - count)
- def create_struct_list(string, str_list, restored):
- if len(string) < 1:
- return ([], restored)
- # Split one struct in its parts
- #items = re.split(r'^ +| *, *| +$', string)
- items = re.split(r'(?:^ +| *),(?: *| +$)', string)
- #items = [x.strip(' ') for x in string.split(',')]
- for index, item in enumerate(items):
- #values = re.findall(r'[^\s]+', item)
- values = item.split(' ')
- #values = item.split()
- values, number = check_struct_list(values, str_list)
- if len(values) == 1:
- items[index] = values[0]
- else:
- items[index] = values
- restored += number
- return (items, restored)
- def push_list(level, obj_list, obj):
- while level > 0:
- obj_list = obj_list[-1]
- level -= 1
- obj_list.append(obj)
- def parse_struct(string, str_list):
- level = 0
- restored = 0
- obj_list = []
- pos_start = string.find('(')
- if pos_start < 0:
- return (obj_list, restored)
- pos_left, start_left = (pos_start + 1, True)
- pos_start = string.find('(', pos_left)
- pos_stop = string.find(')', pos_left)
- while True:
- if (pos_start >= pos_left) and (pos_stop >= pos_left):
- pos_right, start_right = (pos_start, True) if pos_start < pos_stop else (pos_stop, False)
- elif pos_start >= pos_left:
- pos_right, start_right = (pos_start, True)
- elif pos_stop >= pos_left:
- pos_right, start_right = (pos_stop, False)
- else:
- pos_right, start_right = (len(string), False)
- sub = string[pos_left:pos_right].strip(' ')
- if sub.startswith(','):
- sub = sub[1:].lstrip(' ')
- if sub.endswith(','):
- sub = sub[:-1].rstrip(' ')
- #print("sub:%d:%s:" % (len(sub), sub))
- items, restored = create_struct_list(sub, str_list, restored)
- if start_left:
- push_list(level, obj_list, items)
- if start_right:
- level += 1
- else:
- for item in items:
- push_list(level, obj_list, item)
- if not start_right:
- level -= 1
- if pos_right >= len(string):
- break
- pos_left, start_left = (pos_right + 1, start_right)
- if start_left:
- pos_start = string.find('(', pos_left)
- else:
- pos_stop = string.find(')', pos_left)
- return (obj_list, restored)
- def check_array_list(values):
- flag_int = True
- flag_float = True
- for value in values:
- if flag_int:
- try:
- value = int(value)
- except ValueError:
- flag_int = False
- else:
- continue
- try:
- value = float(value)
- except ValueError:
- flag_float = False
- break
- if flag_int:
- return np.array(values, dtype=np.int32)
- if flag_float:
- return np.array(values, dtype=np.float64)
- return np.array(values, dtype=object)
- def get_array_values(label, sizes, data):
- # Removing whitespaces at the edge of strings
- #data = data.replace('< ', '<')
- #data = data.replace(' >', '>')
- if data.startswith('<'): # Checking if array is a single string or an array of strings ...
- #data = data.replace('> <', '><')
- #values = re.findall(r'<(.*?)>', data)
- values = re.findall(r'<.*?>', data)
- if len(sizes) > 1:
- values = np.array(values, dtype=object)
- if np.prod(sizes[:-1]) == values.size:
- values = values.reshape(sizes[:-1])
- elif len(values) == 1:
- values = values[0]
- elif data.startswith('('): # ... or a struct or an array of structs ...
- if len(sizes) > 1:
- print("Warning: The sizes dimension is greater than 1 for the %s array of structs." % (label,), file=sys.stderr)
- data, str_list = replace_jcamp_strings(data)
- values, restored = parse_struct(data, str_list)
- if len(str_list) != restored:
- print("%s:" % (label,), values)
- sys.exit("Not all replaced JCAMP strings are restored (%d of %d)." % (restored, len(str_list)))
- else: # ... or a simple array (most frequently numeric)
- values = re.findall(r'[^\s]+', data)
- #values = data.split()
- values = np.reshape(check_array_list(values), sizes)
- return values
- def read_param_file(filename):
- # Open parameter file
- try:
- fid = open(filename, 'r')
- except IOError as V:
- if V.errno == 2:
- sys.exit("Cannot open parameter file %s" % (filename,))
- else:
- raise
- # Generate header information
- header = collections.OrderedDict()
- weekdays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
- line = ''
- for index, line in enumerate(fid):
- line = line.lstrip(' \t').rstrip('\r\n')
- if line.startswith('##$'):
- break
- #print("line:%d:%s:" % (len(line), line))
- if line.startswith('##'): # It's a variable with ##
- # Retrieve the Labeled Data Record
- label, value = strtok(line, delimiters='=')
- label = strtok(label, delimiters='#')[0].strip()
- value = strtok(value, delimiters='=')[0].strip()
- # Save value without $
- #value = strtok(value, delimiters='$')[0].strip()
- header[label] = value
- elif line.startswith('$$'): # It's a comment
- comment = strtok(line, delimiters='$')[0].strip()
- if comment.startswith('/'):
- header['Path'] = comment
- elif comment.startswith('process'):
- header['Process'] = comment[8:]
- else:
- pos = strfind(comment[:10], '-')
- if (comment[:3] in weekdays) or ((comment[:2] in ('19', '20')) and (len(pos) == 2)):
- header['Date'] = comment
- else:
- header['Header' + str(index + 1)] = comment
- # Check if using a supported version of JCAMP file format
- if 'JCAMPDX' in header:
- version = float(header['JCAMPDX'])
- elif 'JCAMP-DX' in header:
- version = float(header['JCAMP-DX'])
- else:
- sys.exit("The file header is not correct.")
- if (version != 4.24) and (version != 5):
- print("Warning: JCAMP version %s is not supported (%s)." % (version, filename), file=sys.stderr)
- params = collections.OrderedDict()
- # Loop for reading parameters
- while line.lstrip(' \t').startswith('##'):
- result = re.search(r'##(.*)=(.*)', line)
- result = [] if result is None else list(result.groups())
- # Checking if label present and removing proprietary tag
- try:
- label = result[0]
- except:
- label = None
- else:
- if label.startswith('$'):
- label = label[1:]
- #print("label:%d:%s:" % (len(label), label))
- # Checking if value present otherwise value is set to empty string
- try:
- value = result[1]
- except:
- value = ''
- #print("value:%d:%s:" % (len(value), value))
- flag_comment = True if '$$' in line else False
- line = ''
- data = []
- for line in fid:
- if line.lstrip(' \t').startswith('##'):
- break
- if not line.lstrip(' \t').startswith('$$'): # Skip comment line
- if (not flag_comment) and ('$$' in line):
- flag_comment = True
- #data.append(line.rstrip('\\\r\n'))
- data.append(line.rstrip('\r\n'))
- #print("line:%d:%s:" % (len(data[-1]), data[-1]))
- # Create data string
- data = ''.join(data)
- #print("data:%d:%s:" % (len(data), data))
- if flag_comment:
- sys.exit("Found JCAMP comment ('$$') in LDR %s." % (label,))
- # Checking for END tag
- if (label is None) or (label == 'END'):
- break
- # Checking if value is a string or an array, a struct or a single value
- if value.startswith('( <'):
- print("Warning: The parsing of the LDR %s failed." % (label,), file=sys.stderr)
- elif value.startswith('( '): # A single string, an array of strings or structs or a simple array
- sizes = [int(x) for x in value.strip('( )').split(',')]
- try:
- params[label] = get_array_values(label, sizes, data)
- except ValueError:
- pass
- elif value.startswith('('): # A struct
- data = ''.join([value, data])
- params[label] = get_array_values(label, [1], data)[0]
- else: # A single value
- try:
- params[label] = int(value)
- except ValueError:
- try:
- params[label] = float(value)
- except ValueError:
- params[label] = value
- fid.close()
- if label != 'END':
- sys.exit("Unexpected end of file: Missing END Statement")
- return (header, params)
- def main():
- import argparse
- parser = argparse.ArgumentParser(description='Read ParaVision parameter file')
- parser.add_argument('filename', help='ParaVision parameter file (acqp, method, visu_pars)')
- args = parser.parse_args()
- # read parameter file
- header, params = read_param_file(args.filename)
- for (label, value) in header.items():
- print("%s: %s" % (label, value))
- for (label, value) in params.items():
- if isinstance(value, np.ndarray):
- print("%s:" % (label,))
- print(value)
- else:
- print("%s: %s" % (label, value))
- if __name__ == '__main__':
- main()
|