Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

validation.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. #-*- coding: utf-8
  2. """
  3. generic odml validation framework
  4. """
  5. import odml.format as format
  6. import odml.mapping as mapping
  7. import odml.tools.event as event
  8. import odml
  9. # event capabilities are needed for mappings
  10. odml.setMinimumImplementation('event')
  11. class ValidationError(object):
  12. """
  13. Represents an error found in the validation process
  14. The error is bound to an odml-object (*obj*) or a list of those
  15. and contains a message and a type which may be one of:
  16. 'error', 'warning', 'info'
  17. """
  18. def __init__(self, obj, msg, type='error'):
  19. self.obj = obj
  20. self.msg = msg
  21. self.type = type
  22. @property
  23. def is_warning(self):
  24. return self.type == 'warning'
  25. @property
  26. def is_error(self):
  27. return self.type == 'error'
  28. @property
  29. def path(self):
  30. if isinstance(self.obj, odml.value.Value):
  31. return self.obj.parent.get_path()
  32. return self.obj.get_path()
  33. def __repr__(self):
  34. return "<ValidationError(%s):%s \"%s\">" % (self.type, self.obj, self.msg)
  35. class Validation(object):
  36. _handlers = {}
  37. @staticmethod
  38. def register_handler(klass, handler):
  39. """
  40. Add a validation handler for a odml-class.
  41. *type* may be one of the following:
  42. * odML
  43. * section
  44. * property
  45. * value
  46. And is called in the validation process for each corresponding
  47. object. The *handler* is assumed to be a generator function
  48. yielding all ValidationErrors it finds:
  49. handler(obj)
  50. The section handlers are only called for sections and not for
  51. the document node. If both are required, you need to register
  52. the handler twice.
  53. """
  54. Validation._handlers.setdefault(klass, set()).add(handler)
  55. def __init__(self, doc):
  56. self.doc = doc # may also be a section
  57. self.errors = []
  58. self.validate(doc)
  59. # TODO isn't there a 'walk' method for these things?
  60. for sec in doc.itersections(recursive=True):
  61. self.validate(sec)
  62. for prop in sec.properties:
  63. self.validate(prop)
  64. for val in prop.values:
  65. self.validate(val)
  66. def validate(self, obj):
  67. handlers = self._handlers.get(obj._format._name, [])
  68. for handler in handlers:
  69. for err in handler(obj):
  70. self.error(err)
  71. def error(self, validation_error):
  72. """
  73. Register an error found during the validation process
  74. """
  75. self.errors.append(validation_error)
  76. def __getitem__(self, obj):
  77. """return a list of the errors for a certain object"""
  78. errors = []
  79. for err in self.errors:
  80. if err.obj is obj:
  81. errors.append(err)
  82. return errors
  83. # ------------------------------------------------
  84. # validation rules
  85. def section_type_must_be_defined(sec):
  86. """test that no section has an undefined type"""
  87. if sec.type is None or sec.type == '' or sec.type == 'undefined':
  88. yield ValidationError(sec, 'Section type undefined', 'warning')
  89. Validation.register_handler('section', section_type_must_be_defined)
  90. def section_repository_should_be_present(sec):
  91. """
  92. 1. warn, if a section has no repository or
  93. 2. the section type is not present in the repository
  94. """
  95. repo = sec.get_repository()
  96. if repo is None:
  97. yield ValidationError(sec, 'A section should have an associated repository', 'warning')
  98. return
  99. try:
  100. tsec = sec.get_terminology_equivalent()
  101. except Exception as e:
  102. yield ValidationError(sec, 'Could not load terminology: ' + e.message, 'warning')
  103. return
  104. if tsec is None:
  105. yield ValidationError(sec, "Section type '%s' not found in terminology" % sec.type, 'warning')
  106. Validation.register_handler('section', section_repository_should_be_present)
  107. def object_unique_names(obj, children, attr=lambda x: x.name, msg="Object names must be unique"):
  108. """
  109. test that object names within one section are unique
  110. *attr* is a function, that returns the item that needs to be unique
  111. *children* is a function, that returns the children to be
  112. considered. This is to be able to use the same function
  113. for sections and properties
  114. """
  115. names = set(map(attr, children(obj)))
  116. if len(names) == len(children(obj)):
  117. return # quick exit
  118. names = set()
  119. for s in children(obj):
  120. if attr(s) in names:
  121. yield ValidationError(s, msg, 'error')
  122. names.add(attr(s))
  123. def section_unique_name_type_combination(obj):
  124. for i in object_unique_names(obj,
  125. attr=lambda x: (x.name, x.type),
  126. children=lambda x: x.sections,
  127. msg="name/type combination must be unique"):
  128. yield i
  129. def property_unique_names(obj):
  130. for i in object_unique_names(obj, lambda x: x.properties):
  131. yield i
  132. Validation.register_handler('odML', section_unique_name_type_combination)
  133. Validation.register_handler('section', section_unique_name_type_combination)
  134. Validation.register_handler('section', property_unique_names)
  135. def odML_mapped_document_be_valid(doc):
  136. """
  137. try to create a mapping of the document and if that succeeds
  138. validate the mapped document according to the validation rules
  139. """
  140. if mapping.proxy is not None and isinstance(doc, mapping.proxy.Proxy):
  141. return # don't try to map already mapped documents
  142. # first check if any object has a mapping attribute
  143. for sec in doc.itersections(recursive=True):
  144. if sec.mapping is not None:
  145. break
  146. for prop in sec.properties:
  147. if prop.mapping is not None:
  148. break
  149. else: # no break in the loop, continue with next section
  150. continue
  151. break # found a mapping can stop searching
  152. else:
  153. return # no mapping found
  154. mdoc = doc._active_mapping
  155. if mdoc is not None:
  156. mapping.unmap_document(doc)
  157. mdoc = None
  158. # TODO: if mdoc is set there is already a mapping present. However, this
  159. # TODO may have been corrupted by user interaction, thus we should actually
  160. # TODO unmap the document, create a new one and then remap the original one
  161. try:
  162. if mdoc is None:
  163. mdoc = mapping.create_mapping(doc)
  164. except mapping.MappingError as e:
  165. yield ValidationError(doc, 'mapping: %s' % str(e), 'error')
  166. return
  167. v = Validation(mdoc)
  168. for err in v.errors:
  169. err.mobj = err.obj
  170. err.obj = mapping.get_object_from_mapped_equivalent(err.obj)
  171. err.msg = "mapping: " + err.msg
  172. yield err
  173. Validation.register_handler('odML', odML_mapped_document_be_valid)
  174. def property_values_same_unit(prop, tprop=None):
  175. units = set(map(lambda x: x.unit, prop.values))
  176. if len(units) > 1:
  177. yield ValidationError(prop, 'Values of a property should be of the same unit', 'warning')
  178. if tprop is not None and tprop.values[0].unit != prop.values[0].unit:
  179. yield ValidationError(prop, 'Values of a property should have the same unit as their terminology equivalent', 'warning')
  180. Validation.register_handler('property', property_values_same_unit)
  181. def property_terminology_check(prop):
  182. """
  183. executes a couple of checks:
  184. 1. warn, if there are properties that do not occur in the terminology
  185. 2. warn, if there are multiple values with different units or the unit does not
  186. match the one in the terminology
  187. """
  188. tsec = prop.parent.get_terminology_equivalent()
  189. if tsec is None: return
  190. try:
  191. tprop = tsec.properties[prop.name]
  192. except KeyError:
  193. tprop = None
  194. yield ValidationError(prop, "Property '%s' not found in terminology" % prop.name, 'warning')
  195. for err in property_values_same_unit(prop, tprop):
  196. yield err
  197. Validation.register_handler('property', property_terminology_check)
  198. def property_dependency_check(prop):
  199. """
  200. warn, if the dependency attribute refers to a non-existant attribute
  201. or the dependency_value does not match
  202. """
  203. dep = prop.dependency
  204. if dep is None: return
  205. try:
  206. dep_obj = prop.parent[dep]
  207. except KeyError:
  208. yield ValidationError(prop, "Property refers to a non-existant dependency object", 'warning')
  209. return
  210. if dep_obj.value.value != prop.dependency_value:
  211. yield ValidationError(prop, "Dependency-value is not equal to value of the property's dependency", 'warning')
  212. Validation.register_handler('property', property_dependency_check)
  213. def value_empty(val):
  214. if val.value == '':
  215. yield ValidationError(val, "Values may only be empty in terminologies", 'warning')
  216. Validation.register_handler('value', value_empty)