Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

validation.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. # -*- coding: utf-8
  2. """
  3. Generic odML validation framework
  4. """
  5. LABEL_ERROR = 'error'
  6. LABEL_WARNING = 'warning'
  7. class ValidationError(object):
  8. """
  9. Represents an error found in the validation process
  10. The error is bound to an odML-object (*obj*) or a list of
  11. those and contains a message and a rank which may be one of:
  12. 'error', 'warning'.
  13. """
  14. def __init__(self, obj, msg, rank=LABEL_ERROR):
  15. self.obj = obj
  16. self.msg = msg
  17. self.rank = rank
  18. @property
  19. def is_warning(self):
  20. return self.rank == LABEL_WARNING
  21. @property
  22. def is_error(self):
  23. return self.rank == LABEL_ERROR
  24. @property
  25. def path(self):
  26. return self.obj.get_path()
  27. def __repr__(self):
  28. return "<ValidationError(%s):%s '%s'>" % (self.rank,
  29. self.obj,
  30. self.msg)
  31. class Validation(object):
  32. _handlers = {}
  33. @staticmethod
  34. def register_handler(klass, handler):
  35. """
  36. Add a validation handler for a odml class.
  37. *type* may be one of the following:
  38. * odML
  39. * section
  40. * property
  41. And is called in the validation process for each corresponding
  42. object. The *handler* is assumed to be a generator function
  43. yielding all ValidationErrors it finds:
  44. handler(obj)
  45. The section handlers are only called for sections and not for
  46. the document node. If both are required, you need to register
  47. the handler twice.
  48. """
  49. Validation._handlers.setdefault(klass, set()).add(handler)
  50. def __init__(self, doc):
  51. self.doc = doc # may also be a section
  52. self.errors = []
  53. self.validate(doc)
  54. for sec in doc.itersections(recursive=True):
  55. self.validate(sec)
  56. for prop in sec.properties:
  57. self.validate(prop)
  58. def validate(self, obj):
  59. handlers = self._handlers.get(obj.format().name, [])
  60. for handler in handlers:
  61. for err in handler(obj):
  62. self.error(err)
  63. def error(self, validation_error):
  64. """
  65. Register an error found during the validation process
  66. """
  67. self.errors.append(validation_error)
  68. def __getitem__(self, obj):
  69. """return a list of the errors for a certain object"""
  70. errors = []
  71. for err in self.errors:
  72. if err.obj is obj:
  73. errors.append(err)
  74. return errors
  75. # ------------------------------------------------
  76. # validation rules
  77. def section_type_must_be_defined(sec):
  78. """test that no section has an undefined type"""
  79. if sec.type is None or sec.type == '' or sec.type == 'undefined':
  80. yield ValidationError(sec, 'Section type undefined', LABEL_WARNING)
  81. Validation.register_handler('section', section_type_must_be_defined)
  82. def section_repository_present(sec):
  83. """
  84. 1. warn, if a section has no repository or
  85. 2. the section type is not present in the repository
  86. """
  87. repo = sec.get_repository()
  88. if repo is None:
  89. yield ValidationError(sec,
  90. 'A section should have an associated repository',
  91. LABEL_WARNING)
  92. return
  93. try:
  94. tsec = sec.get_terminology_equivalent()
  95. except Exception as exc:
  96. yield ValidationError(sec,
  97. 'Could not load terminology: %s' % exc,
  98. LABEL_WARNING)
  99. return
  100. if tsec is None:
  101. yield ValidationError(sec,
  102. "Section type '%s' not found in terminology" % sec.type,
  103. LABEL_WARNING)
  104. Validation.register_handler('section', section_repository_present)
  105. def document_unique_ids(doc):
  106. """
  107. Traverse an odML Document and check whether all
  108. assigned ids are unique within the document.
  109. Yields all duplicate odML object id entries
  110. that are encountered.
  111. :param doc: odML document
  112. """
  113. id_map = {doc.id: "Document '%s'" % doc.get_path()}
  114. for i in section_unique_ids(doc, id_map):
  115. yield i
  116. def section_unique_ids(parent, id_map=None):
  117. """
  118. Traverse a parent (odML Document or Section)
  119. and check whether all assigned ids are unique.
  120. A "id":"odML object / path" dictionary of additional
  121. 'to-be-excluded' ids may be handed in via the
  122. *id_map* attribute.
  123. Yields all duplicate odML object id entries
  124. that are encountered.
  125. :param parent: odML Document or Section
  126. :param id_map: "id":"odML object / path" dictionary
  127. """
  128. if not id_map:
  129. id_map = {}
  130. for sec in parent.sections:
  131. for i in property_unique_ids(sec, id_map):
  132. yield i
  133. if sec.id in id_map:
  134. yield ValidationError(sec, "Duplicate id in Section '%s' and %s" %
  135. (sec.get_path(), id_map[sec.id]))
  136. else:
  137. id_map[sec.id] = "Section '%s'" % sec.get_path()
  138. for i in section_unique_ids(sec, id_map):
  139. yield i
  140. def property_unique_ids(section, id_map=None):
  141. """
  142. Check whether all ids assigned to the odML
  143. Properties of an odML Section are unique.
  144. A "id":"odML object / path" dictionary of additional
  145. 'to-be-excluded' ids may be handed in via the
  146. *id_map* attribute.
  147. Yields all duplicate odML object id entries
  148. that are encountered.
  149. :param section: odML Section
  150. :param id_map: "id":"odML object / path" dictionary
  151. """
  152. if not id_map:
  153. id_map = {}
  154. for prop in section.properties:
  155. if prop.id in id_map:
  156. yield ValidationError(prop, "Duplicate id in Property '%s' and %s" %
  157. (prop.get_path(), id_map[prop.id]))
  158. else:
  159. id_map[prop.id] = "Property '%s'" % prop.get_path()
  160. Validation.register_handler('odML', document_unique_ids)
  161. def object_unique_names(obj, children, attr=lambda x: x.name,
  162. msg="Object names must be unique"):
  163. """
  164. Test that object names within one section are unique
  165. *attr* is a function, that returns the item that needs to be unique
  166. *children* is a function, that returns the children to be
  167. considered. This is to be able to use the same function
  168. for sections and properties
  169. """
  170. names = set(map(attr, children(obj)))
  171. if len(names) == len(children(obj)):
  172. return # quick exit
  173. names = set()
  174. for i in children(obj):
  175. if attr(i) in names:
  176. yield ValidationError(i, msg, LABEL_ERROR)
  177. names.add(attr(i))
  178. def section_unique_name_type(obj):
  179. for i in object_unique_names(
  180. obj,
  181. attr=lambda x: (x.name, x.type),
  182. children=lambda x: x.sections,
  183. msg="name/type combination must be unique"):
  184. yield i
  185. def property_unique_names(obj):
  186. for i in object_unique_names(obj, lambda x: x.properties):
  187. yield i
  188. Validation.register_handler('odML', section_unique_name_type)
  189. Validation.register_handler('section', section_unique_name_type)
  190. Validation.register_handler('section', property_unique_names)
  191. def property_terminology_check(prop):
  192. """
  193. Executes a couple of checks:
  194. 1. warn, if there are properties that do not occur in the terminology
  195. 2. warn, if there are multiple values with different units or the unit does
  196. not match the one in the terminology
  197. """
  198. tsec = prop.parent.get_terminology_equivalent()
  199. if tsec is None:
  200. return
  201. try:
  202. tsec.properties[prop.name]
  203. except KeyError:
  204. yield ValidationError(prop,
  205. "Property '%s' not found in terminology" % prop.name,
  206. LABEL_WARNING)
  207. Validation.register_handler('property', property_terminology_check)
  208. def property_dependency_check(prop):
  209. """
  210. Warn, if the dependency attribute refers to a non-existent attribute
  211. or the dependency_value does not match
  212. """
  213. dep = prop.dependency
  214. if dep is None:
  215. return
  216. try:
  217. dep_obj = prop.parent[dep]
  218. except KeyError:
  219. yield ValidationError(prop,
  220. "Property refers to a non-existent dependency object",
  221. LABEL_WARNING)
  222. return
  223. if prop.dependency_value not in dep_obj.values[0]:
  224. yield ValidationError(prop, "Dependency-value is not equal to value of"
  225. " the property's dependency", LABEL_WARNING)
  226. Validation.register_handler('property', property_dependency_check)