dict_parser.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. """
  2. The dict_parser module provides access to the DictWriter and DictReader class.
  3. Both handle the conversion of odML documents from and to Python dictionary objects.
  4. """
  5. import sys
  6. from .. import format as odmlfmt
  7. from ..info import FORMAT_VERSION
  8. from .parser_utils import InvalidVersionException, ParserException, odml_tuple_export
  9. LABEL_ERROR = "Error"
  10. LABEL_WARNING = "Warning"
  11. def parse_cardinality(vals):
  12. """
  13. Parses an odml specific cardinality from an input value.
  14. If the input content is valid, returns an appropriate tuple.
  15. Returns None if the input is empty or the content cannot be
  16. properly parsed.
  17. :param vals: list or tuple
  18. :return: None or 2-tuple
  19. """
  20. if not vals:
  21. return None
  22. if isinstance(vals, (list, tuple)) and len(vals) == 2:
  23. min_val = vals[0]
  24. max_val = vals[1]
  25. if min_val is None or str(min_val).strip() == "None":
  26. min_val = None
  27. if max_val is None or str(max_val).strip() == "None":
  28. max_val = None
  29. min_int = isinstance(min_val, int) and min_val >= 0
  30. max_int = isinstance(max_val, int) and max_val >= 0
  31. if min_int and max_int and max_val > min_val:
  32. return min_val, max_val
  33. if min_int and not max_val:
  34. return min_val, None
  35. if max_int and not min_val:
  36. return None, max_val
  37. # We were not able to properly parse the current cardinality, so add
  38. # an appropriate Error/Warning once the reader 'ignore_errors' option has been implemented.
  39. return None
  40. class DictWriter:
  41. """
  42. A writer to parse an odml.Document to a Python dictionary object equivalent.
  43. """
  44. def __init__(self):
  45. self.doc = None # odML document
  46. def to_dict(self, odml_document):
  47. """
  48. Parses a full odml.Document to a Python dict object. Will also parse any
  49. contained odml.Sections, their subsections and odml.Properties.
  50. :param odml_document: an odml.Document.
  51. :return: parsed odml.Document as a Python dict object.
  52. """
  53. self.doc = odml_document
  54. parsed_doc = {}
  55. for i in odmlfmt.Document.arguments_keys:
  56. attr = i
  57. if i in odmlfmt.Document.map_keys:
  58. attr = odmlfmt.Document.map(i)
  59. if hasattr(odml_document, attr):
  60. if attr == 'sections':
  61. sections = self.get_sections(odml_document.sections)
  62. parsed_doc[attr] = sections
  63. else:
  64. tag = getattr(odml_document, attr)
  65. if tag:
  66. # Always use the arguments key attribute name when saving
  67. parsed_doc[i] = tag
  68. return parsed_doc
  69. def get_sections(self, section_list):
  70. """
  71. Parses a list of odml.Sections to a Python dict object. Will also parse any
  72. contained subsections and odml.Properties.
  73. :param section_list: list of odml.Sections.
  74. :return: list of parsed odml.Sections as a single Python dict object.
  75. """
  76. section_seq = []
  77. for section in section_list:
  78. section_dict = {}
  79. for i in odmlfmt.Section.arguments_keys:
  80. attr = i
  81. if i in odmlfmt.Section.map_keys:
  82. attr = odmlfmt.Section.map(i)
  83. if hasattr(section, attr):
  84. if attr == 'properties':
  85. properties = self.get_properties(section.properties)
  86. section_dict[attr] = properties
  87. elif attr == 'sections':
  88. sections = self.get_sections(section.sections)
  89. section_dict[attr] = sections
  90. else:
  91. tag = getattr(section, attr)
  92. # Tuples have to be serialized as lists to avoid
  93. # nasty python code annotations when writing to yaml.
  94. if tag and isinstance(tag, tuple):
  95. section_dict[i] = list(tag)
  96. elif tag:
  97. # Always use the arguments key attribute name when saving
  98. section_dict[i] = tag
  99. section_seq.append(section_dict)
  100. return section_seq
  101. @staticmethod
  102. def get_properties(props_list):
  103. """
  104. Parses a list of odml.Properties to a Python dict object.
  105. :param props_list: list of odml.Properties.
  106. :return: list of parsed odml.Properties as a single Python dict object.
  107. """
  108. props_seq = []
  109. for prop in props_list:
  110. prop_dict = {}
  111. for i in odmlfmt.Property.arguments_keys:
  112. attr = i
  113. if i in odmlfmt.Property.map_keys:
  114. attr = odmlfmt.Property.map(i)
  115. if hasattr(prop, attr):
  116. tag = getattr(prop, attr)
  117. # Tuples have to be serialized as lists to avoid
  118. # nasty python code annotations when writing to yaml.
  119. if isinstance(tag, tuple):
  120. prop_dict[attr] = list(tag)
  121. elif (tag == []) or tag: # Even if 'values' is empty, allow '[]'
  122. # Custom odML tuples require special handling.
  123. if attr == "values" and prop.dtype and \
  124. prop.dtype.endswith("-tuple") and prop.values:
  125. prop_dict["value"] = odml_tuple_export(prop.values)
  126. else:
  127. # Always use the arguments key attribute name when saving
  128. prop_dict[i] = tag
  129. props_seq.append(prop_dict)
  130. return props_seq
  131. class DictReader:
  132. """
  133. A reader to parse dictionaries with odML content into an odml.Document.
  134. """
  135. def __init__(self, show_warnings=True, ignore_errors=False):
  136. """
  137. :param show_warnings: Toggle whether to print warnings to the command line.
  138. Any warnings can be accessed via the Reader's class
  139. warnings attribute after parsing is done.
  140. :param ignore_errors: To allow loading and fixing of invalid odml files
  141. encountered errors can be converted to warnings
  142. instead. Such a document can only be saved when
  143. all errors have been addressed though.
  144. """
  145. self.parsed_doc = None # Python dictionary object equivalent
  146. self.warnings = []
  147. self.show_warnings = show_warnings
  148. self.ignore_errors = ignore_errors
  149. def is_valid_attribute(self, attr, fmt):
  150. """
  151. Checks whether a provided attribute is valid for a provided odml class
  152. (Document, Section, Property).
  153. :param attr: Python dictionary tag that will be checked if it is a valid
  154. attribute for the provided format class.
  155. :param fmt: required odml format class format.Document, format.Section or
  156. format.Property against which the attribute is checked.
  157. :returns: the attribute if the attribute is valid, None otherwise.
  158. """
  159. if attr in fmt.arguments_keys:
  160. return attr
  161. if fmt.revmap(attr):
  162. return attr
  163. msg = "Invalid element '%s' inside <%s> tag" % (attr, fmt.__class__.__name__)
  164. self.error(msg)
  165. return None
  166. def error(self, msg):
  167. """
  168. If the parsers ignore_errors property is set to False, a ParserException
  169. will be raised. Otherwise the message is passed to the parsers warning
  170. method.
  171. :param msg: Error message.
  172. """
  173. if self.ignore_errors:
  174. return self.warn(msg, LABEL_ERROR)
  175. raise ParserException(msg)
  176. def warn(self, msg, label=LABEL_WARNING):
  177. """
  178. Adds a message to the parsers warnings property. If the parsers show_warnings
  179. property is set to True, an additional error message will be written
  180. to sys.stderr.
  181. :param msg: Warning message.
  182. :param label: Defined message level, can be 'Error' or 'Warning'. Default is 'Warning'.
  183. """
  184. msg = "%s: %s" % (label, msg)
  185. self.warnings.append(msg)
  186. if self.show_warnings:
  187. sys.stderr.write("Parser%s\n" % msg)
  188. def to_odml(self, parsed_doc):
  189. """
  190. Parses a Python dictionary object containing an odML document to an odml.Document.
  191. Will raise a ParserException if the Python dictionary does not contain a valid
  192. odML document. Also raises an InvalidVersionException if the odML document
  193. is of a previous odML format version.
  194. :param parsed_doc: Python dictionary object containing an odML document.
  195. :returns: parsed odml.Document.
  196. """
  197. self.parsed_doc = parsed_doc
  198. # Parse only odML documents of supported format versions.
  199. if 'Document' not in self.parsed_doc:
  200. msg = "Missing root element 'Document'"
  201. raise ParserException(msg)
  202. if 'odml-version' not in self.parsed_doc:
  203. raise ParserException("Invalid odML document: Could not find odml-version.")
  204. if self.parsed_doc.get('odml-version') != FORMAT_VERSION:
  205. msg = ("Cannot parse odML document with format version '%s'. \n"
  206. "\tUse the 'VersionConverter' from 'odml.tools.converters' "
  207. "to import previous odML formats."
  208. % self.parsed_doc.get('odml-version'))
  209. raise InvalidVersionException(msg)
  210. self.parsed_doc = self.parsed_doc['Document']
  211. doc_attrs = {}
  212. doc_secs = []
  213. for i in self.parsed_doc:
  214. attr = self.is_valid_attribute(i, odmlfmt.Document)
  215. if attr == 'sections':
  216. doc_secs = self.parse_sections(self.parsed_doc['sections'])
  217. elif attr:
  218. # Make sure to always use the correct odml format attribute name
  219. doc_attrs[odmlfmt.Document.map(attr)] = self.parsed_doc[i]
  220. doc = odmlfmt.Document.create(**doc_attrs)
  221. for sec in doc_secs:
  222. doc.append(sec)
  223. return doc
  224. def parse_sections(self, section_list):
  225. """
  226. Parses a list of Python dictionary objects containing odML sections to the
  227. odml.Section equivalents including any subsections and properties.
  228. :param section_list: list of Python dictionary objects containing odML sections.
  229. :returns: list of parsed odml.Sections
  230. """
  231. odml_sections = []
  232. for section in section_list:
  233. sec_attrs = {}
  234. children_secs = []
  235. sec_props = []
  236. for i in section:
  237. attr = self.is_valid_attribute(i, odmlfmt.Section)
  238. if attr == 'properties':
  239. sec_props = self.parse_properties(section['properties'])
  240. elif attr == 'sections':
  241. children_secs = self.parse_sections(section['sections'])
  242. elif attr:
  243. # Tuples had to be serialized as lists to support the yaml format.
  244. # Now convert cardinality lists back to tuples.
  245. content = section[attr]
  246. if attr.endswith("_cardinality"):
  247. content = parse_cardinality(content)
  248. # Make sure to always use the correct odml format attribute name
  249. sec_attrs[odmlfmt.Section.map(attr)] = content
  250. try:
  251. sec = odmlfmt.Section.create(**sec_attrs)
  252. for prop in sec_props:
  253. sec.append(prop)
  254. for child_sec in children_secs:
  255. sec.append(child_sec)
  256. odml_sections.append(sec)
  257. except Exception as exc:
  258. msg = "Section not created (%s)\n %s" % (sec_attrs, str(exc))
  259. self.error(msg)
  260. return odml_sections
  261. def parse_properties(self, props_list):
  262. """
  263. Parses a list of Python dictionary objects containing odML properties to the
  264. odml.Property equivalents.
  265. :param props_list: list of Python dictionary objects containing odML properties.
  266. :returns: list of parsed odml.Properties
  267. """
  268. odml_props = []
  269. for _property in props_list:
  270. prop_attrs = {}
  271. for i in _property:
  272. attr = self.is_valid_attribute(i, odmlfmt.Property)
  273. if attr:
  274. content = _property[attr]
  275. # Tuples had to be serialized as lists to support the yaml format.
  276. # Now convert cardinality lists back to tuples.
  277. if attr.endswith("_cardinality"):
  278. content = parse_cardinality(content)
  279. # Make sure to always use the correct odml format attribute name
  280. prop_attrs[odmlfmt.Property.map(attr)] = content
  281. try:
  282. prop = odmlfmt.Property.create(**prop_attrs)
  283. odml_props.append(prop)
  284. except Exception as exc:
  285. msg = "Property not created (%s)\n%s" % (prop_attrs, str(exc))
  286. self.error(msg)
  287. return odml_props