validation.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691
  1. # -*- coding: utf-8
  2. """
  3. Generic odML validation framework.
  4. """
  5. import re
  6. from enum import Enum
  7. from . import dtypes
  8. try:
  9. unicode = unicode
  10. except NameError:
  11. unicode = str
  12. LABEL_ERROR = 'error'
  13. LABEL_WARNING = 'warning'
  14. class IssueID(Enum):
  15. """
  16. IDs identifying registered validation handlers.
  17. """
  18. unspecified = 1
  19. # Required attributes validations
  20. object_required_attributes = 101
  21. section_type_must_be_defined = 102
  22. # Unique id, name and type validations
  23. section_unique_ids = 200
  24. property_unique_ids = 201
  25. section_unique_name_type = 202
  26. property_unique_name = 203
  27. # Good form validations
  28. object_name_readable = 300
  29. # Property specific validations
  30. property_terminology_check = 400
  31. property_dependency_check = 401
  32. property_values_check = 402
  33. property_values_string_check = 403
  34. # Cardinality validations
  35. section_properties_cardinality = 500
  36. section_sections_cardinality = 501
  37. property_values_cardinality = 502
  38. # Optional validations
  39. section_repository_present = 600
  40. # Custom validation
  41. custom_validation = 701
  42. class ValidationError(object):
  43. """
  44. Represents an error found in the validation process.
  45. The error is bound to an odML-object (*obj*) or a list of those and contains
  46. a message and a rank which may be one of: 'error', 'warning'.
  47. """
  48. def __init__(self, obj, msg, rank=LABEL_ERROR, validation_id=None):
  49. self.obj = obj
  50. self.msg = msg
  51. self.rank = rank
  52. self.validation_id = validation_id
  53. @property
  54. def is_warning(self):
  55. """
  56. :returns: Boolean whether the current ValidationError has rank 'Warning'.
  57. """
  58. return self.rank == LABEL_WARNING
  59. @property
  60. def is_error(self):
  61. """
  62. :returns: Boolean whether the current ValidationError has rank 'Error'.
  63. """
  64. return self.rank == LABEL_ERROR
  65. @property
  66. def path(self):
  67. """
  68. :returns: The absolute path to the odml object the ValidationError is bound to.
  69. """
  70. return self.obj.get_path()
  71. def __repr__(self):
  72. # Cleanup the odml object print strings
  73. print_str = unicode(self.obj).split()[0].split("[")[0].split(":")[0]
  74. # Document has no name attribute and should not print id or name info
  75. if hasattr(self.obj, "name"):
  76. if self.obj.name and self.obj.name != self.obj.id:
  77. print_str = "%s[%s]" % (print_str, self.obj.name)
  78. else:
  79. print_str = "%s[%s]" % (print_str, self.obj.id)
  80. return "Validation%s: %s '%s'" % (self.rank.capitalize(), print_str, self.msg)
  81. class Validation(object):
  82. """
  83. Validation provides a set of default validations that can used to validate
  84. odml objects. Custom validations can be added via the 'register_handler' method.
  85. :param obj: odml object the validation will be applied to.
  86. """
  87. _handlers = {}
  88. @staticmethod
  89. def register_handler(klass, handler):
  90. """
  91. Adds a validation handler for an odml class. The handler is called in the
  92. validation process for each corresponding object.
  93. The *handler* is assumed to be a generator function yielding
  94. all ValidationErrors it finds.
  95. Section handlers are only called for sections and not for the document node.
  96. If both are required, the handler needs to be registered twice.
  97. :param klass: string corresponding to an odml class. Valid strings are
  98. 'odML', 'section' and 'property'.
  99. :param handler: validation function applied to the odml class.
  100. """
  101. Validation._handlers.setdefault(klass, set()).add(handler)
  102. def __init__(self, obj, validate=True, reset=False):
  103. self.obj = obj # may also be a section
  104. self.errors = []
  105. # If initialized with reset=True, reset all handlers and
  106. # do not run any validation yet to allow custom Validation objects.
  107. if reset:
  108. self._handlers = {}
  109. return
  110. if validate:
  111. self.run_validation()
  112. def validate(self, obj):
  113. """
  114. Runs all registered handlers that are applicable to a provided odml class instance.
  115. Occurring validation errors will be collected in the Validation.error attribute.
  116. :param obj: odml class instance.
  117. """
  118. handlers = self._handlers.get(obj.format().name, [])
  119. for handler in handlers:
  120. for err in handler(obj):
  121. self.error(err)
  122. def error(self, validation_error):
  123. """
  124. Registers an error found during the validation process.
  125. """
  126. self.errors.append(validation_error)
  127. def run_validation(self):
  128. """
  129. Runs a clean new validation on the registered Validation object.
  130. """
  131. self.errors = []
  132. self.validate(self.obj)
  133. if self.obj.format().name == "property":
  134. return
  135. for sec in self.obj.itersections(recursive=True):
  136. self.validate(sec)
  137. for prop in sec.properties:
  138. self.validate(prop)
  139. def report(self):
  140. """
  141. Validates the registered object and returns a results report.
  142. """
  143. self.run_validation()
  144. err_count = 0
  145. reduce = set()
  146. sec_count = 0
  147. prop_count = 0
  148. for i in self.errors:
  149. if i.is_error:
  150. err_count += 1
  151. if i.obj not in reduce and 'section' in str(i.obj).lower():
  152. sec_count += 1
  153. elif i.obj not in reduce and 'property' in str(i.obj).lower():
  154. prop_count += 1
  155. reduce.add(i.obj)
  156. warn_count = len(self.errors) - err_count
  157. msg = ""
  158. if err_count or warn_count:
  159. msg = "Validation found %s errors and %s warnings" % (err_count, warn_count)
  160. msg += " in %s Sections and %s Properties." % (sec_count, prop_count)
  161. return msg
  162. def register_custom_handler(self, klass, handler):
  163. """
  164. Adds a validation handler for an odml class. The handler is called in the
  165. validation process for each corresponding object.
  166. The *handler* is assumed to be a generator function yielding
  167. all ValidationErrors it finds.
  168. Section handlers are only called for sections and not for the document node.
  169. If both are required, the handler needs to be registered twice.
  170. :param klass: string corresponding to an odml class. Valid strings are
  171. 'odML', 'section' and 'property'.
  172. :param handler: validation function applied to the odml class.
  173. """
  174. self._handlers.setdefault(klass, set()).add(handler)
  175. def __getitem__(self, obj):
  176. """
  177. Return a list of the errors for a certain object.
  178. """
  179. errors = []
  180. for err in self.errors:
  181. if err.obj is obj:
  182. errors.append(err)
  183. return errors
  184. # ------------------------------------------------
  185. # validation rules
  186. def object_required_attributes(obj):
  187. """
  188. Tests that no Object has undefined attributes, given in format.
  189. :param obj: document, section or property.
  190. """
  191. validation_id = IssueID.object_required_attributes
  192. args = obj.format().arguments
  193. for arg in args:
  194. if arg[1] == 1:
  195. msg = "Missing required attribute '%s'" % (arg[0])
  196. if not hasattr(obj, arg[0]):
  197. yield ValidationError(obj, msg, LABEL_ERROR, validation_id)
  198. continue
  199. obj_arg = getattr(obj, arg[0])
  200. if not obj_arg and not isinstance(obj_arg, bool):
  201. yield ValidationError(obj, msg, LABEL_ERROR, validation_id)
  202. Validation.register_handler('odML', object_required_attributes)
  203. Validation.register_handler('section', object_required_attributes)
  204. Validation.register_handler('property', object_required_attributes)
  205. def section_type_must_be_defined(sec):
  206. """
  207. Tests that no Section has an unspecified type and adds a warning otherwise.
  208. :param sec: odml.Section.
  209. """
  210. validation_id = IssueID.section_type_must_be_defined
  211. if sec.type and sec.type == "n.s.":
  212. yield ValidationError(sec, "Section type not specified", LABEL_WARNING, validation_id)
  213. Validation.register_handler('section', section_type_must_be_defined)
  214. # The Section repository present is no longer part of the default validation
  215. # and should be added on demand.
  216. def section_repository_present(sec):
  217. """
  218. 1. warn, if a section has no repository or
  219. 2. the section type is not present in the repository
  220. """
  221. validation_id = IssueID.section_repository_present
  222. repo = sec.get_repository()
  223. if repo is None:
  224. msg = "A section should have an associated repository"
  225. yield ValidationError(sec, msg, LABEL_WARNING, validation_id)
  226. return
  227. try:
  228. tsec = sec.get_terminology_equivalent()
  229. except Exception as exc:
  230. msg = "Could not load terminology: %s" % exc
  231. yield ValidationError(sec, msg, LABEL_WARNING, validation_id)
  232. return
  233. if tsec is None:
  234. msg = "Section type '%s' not found in terminology" % sec.type
  235. yield ValidationError(sec, msg, LABEL_WARNING, validation_id)
  236. def document_unique_ids(doc):
  237. """
  238. Traverse an odML Document and check whether all
  239. assigned ids are unique within the document.
  240. Yields all duplicate odML object id entries that are encountered.
  241. :param doc: odML document
  242. """
  243. id_map = {doc.id: "Document '%s'" % doc.get_path()}
  244. for i in section_unique_ids(doc, id_map):
  245. yield i
  246. def section_unique_ids(parent, id_map=None):
  247. """
  248. Traverse a parent (odML Document or Section)
  249. and check whether all assigned ids are unique.
  250. A "id":"odML object / path" dictionary of additional 'to-be-excluded' ids may be
  251. handed in via the *id_map* attribute.
  252. Yields all duplicate odML object id entries that are encountered.
  253. :param parent: odML Document or Section
  254. :param id_map: "id":"odML object / path" dictionary
  255. """
  256. validation_id = IssueID.section_unique_ids
  257. if not id_map:
  258. id_map = {}
  259. for sec in parent.sections:
  260. for i in property_unique_ids(sec, id_map):
  261. yield i
  262. if sec.id in id_map:
  263. msg = "Duplicate id in Section '%s' and %s" % (sec.get_path(), id_map[sec.id])
  264. yield ValidationError(sec, msg, validation_id=validation_id)
  265. else:
  266. id_map[sec.id] = "Section '%s'" % sec.get_path()
  267. for i in section_unique_ids(sec, id_map):
  268. yield i
  269. def property_unique_ids(section, id_map=None):
  270. """
  271. Checks whether all ids assigned to the odML Properties of an odML Section are unique.
  272. A "id":"odML object / path" dictionary of additional 'to-be-excluded' ids may be
  273. handed in via the *id_map* attribute.
  274. Yields all duplicate odML object id entries that are encountered.
  275. :param section: odML Section
  276. :param id_map: "id":"odML object / path" dictionary
  277. """
  278. validation_id = IssueID.property_unique_ids
  279. if not id_map:
  280. id_map = {}
  281. for prop in section.properties:
  282. if prop.id in id_map:
  283. msg = "Duplicate id in Property '%s' and %s" % (prop.get_path(),
  284. id_map[prop.id])
  285. yield ValidationError(prop, msg, validation_id=validation_id)
  286. else:
  287. id_map[prop.id] = "Property '%s'" % prop.get_path()
  288. Validation.register_handler('odML', document_unique_ids)
  289. def object_unique_names(obj, validation_id, children, attr=lambda x: x.name,
  290. msg="Object names must be unique"):
  291. """
  292. Tests that object names within a Section are unique.
  293. :param obj: odml class instance the validation is applied on.
  294. :param validation_id: id of the
  295. :param children: a function that returns the children to be considered.
  296. Required when handling Sections.
  297. :param attr: a function that returns the attribute that needs to be unique.
  298. :param msg: error message that will be registered with a ValidationError.
  299. """
  300. names = set(map(attr, children(obj)))
  301. if len(names) == len(children(obj)):
  302. return
  303. names = set()
  304. for i in children(obj):
  305. if attr(i) in names:
  306. yield ValidationError(i, msg, LABEL_ERROR, validation_id)
  307. names.add(attr(i))
  308. def section_unique_name_type(obj):
  309. """
  310. Tests that the values of names and types within the scope of a Section are unique.
  311. :param obj: odml class instance the validation is applied on.
  312. """
  313. for i in object_unique_names(
  314. obj,
  315. validation_id=IssueID.section_unique_name_type,
  316. attr=lambda x: (x.name, x.type),
  317. children=lambda x: x.sections,
  318. msg="name/type combination must be unique"):
  319. yield i
  320. def property_unique_names(obj):
  321. """
  322. Tests that the values of Property names within the scope of a Section are unique.
  323. :param obj: odml class instance the validation is applied on.
  324. """
  325. for i in object_unique_names(obj,
  326. validation_id=IssueID.property_unique_name,
  327. children=lambda x: x.properties):
  328. yield i
  329. Validation.register_handler('odML', section_unique_name_type)
  330. Validation.register_handler('section', section_unique_name_type)
  331. Validation.register_handler('section', property_unique_names)
  332. def object_name_readable(obj):
  333. """
  334. Tests if object name is easily readable, so not equal to id.
  335. :param obj: odml.Section or odml.Property.
  336. """
  337. validation_id = IssueID.object_name_readable
  338. if obj.name == obj.id:
  339. yield ValidationError(obj, "Name not assigned", LABEL_WARNING, validation_id)
  340. Validation.register_handler('section', object_name_readable)
  341. Validation.register_handler('property', object_name_readable)
  342. def property_terminology_check(prop):
  343. """
  344. Tests if there are properties that do not occur in the terminology.
  345. """
  346. validation_id = IssueID.property_terminology_check
  347. if not prop.parent:
  348. return
  349. tsec = prop.parent.get_terminology_equivalent()
  350. if tsec is None:
  351. return
  352. try:
  353. tsec.properties[prop.name]
  354. except KeyError:
  355. msg = "Property '%s' not found in terminology" % prop.name
  356. yield ValidationError(prop, msg, LABEL_WARNING, validation_id)
  357. def property_dependency_check(prop):
  358. """
  359. Produces a warning if the dependency attribute refers to a non-existent attribute
  360. or the dependency_value does not match.
  361. """
  362. validation_id = IssueID.property_dependency_check
  363. if not prop.parent:
  364. return
  365. dep = prop.dependency
  366. if dep is None:
  367. return
  368. try:
  369. dep_obj = prop.parent[dep]
  370. except KeyError:
  371. msg = "Property refers to a non-existent dependency object"
  372. yield ValidationError(prop, msg, LABEL_WARNING, validation_id)
  373. return
  374. if prop.dependency_value not in dep_obj.values[0]:
  375. msg = "Dependency-value is not equal to value of the property's dependency"
  376. yield ValidationError(prop, msg, LABEL_WARNING, validation_id)
  377. Validation.register_handler('property', property_dependency_check)
  378. def property_values_check(prop):
  379. """
  380. Tests that the values are of consistent dtype.
  381. If dtype is not given, infer from first item in list.
  382. :param prop: property the validation is applied on.
  383. """
  384. validation_id = IssueID.property_values_check
  385. if prop.dtype is not None and prop.dtype != "":
  386. dtype = prop.dtype
  387. elif prop.values:
  388. dtype = dtypes.infer_dtype(prop.values[0])
  389. else:
  390. return
  391. for val in prop.values:
  392. # Do not continue if a value is None
  393. if val is None:
  394. return
  395. if dtype.endswith("-tuple"):
  396. tuple_len = int(dtype[:-6])
  397. if len(val) != tuple_len:
  398. msg = "Tuple of length %s not consistent with dtype %s!" % (len(val), dtype)
  399. yield ValidationError(prop, msg, LABEL_WARNING, validation_id)
  400. else:
  401. try:
  402. dtypes.get(val, dtype)
  403. except ValueError:
  404. msg = "Property values not of consistent dtype!"
  405. yield ValidationError(prop, msg, LABEL_WARNING, validation_id)
  406. Validation.register_handler('property', property_values_check)
  407. def property_values_string_check(prop):
  408. """
  409. PROTOTYPE
  410. Tests whether values with dtype "string" are maybe of different dtype.
  411. :param prop: property the validation is applied on.
  412. """
  413. validation_id = IssueID.property_values_string_check
  414. if prop.dtype != "string" or not prop.values:
  415. return
  416. dtype_checks = {
  417. 'int': r'^(-+)?\d+$',
  418. 'date': r'^\d{2,4}-\d{1,2}-\d{1,2}$',
  419. 'datetime': r'^\d{2,4}-\d{1,2}-\d{1,2} \d{2}:\d{2}(:\d{2})?$',
  420. 'time': r'^\d{2}:\d{2}(:\d{2})?$',
  421. 'float': r'^(-+)?\d+\.\d+$',
  422. 'tuple': r'^\((.*?)\)',
  423. 'boolean': r'^TRUE|FALSE|True|False|t|f+$',
  424. 'text': r'[\r\n]'}
  425. val_dtypes = []
  426. for val in prop.values:
  427. # Do not continue if a value is None
  428. if val is None:
  429. return
  430. curr_dtype = "string"
  431. for check_dtype in dtype_checks.items():
  432. if bool(re.compile(check_dtype[1]).match(val.strip())):
  433. if check_dtype[0] == "tuple" and val.count(';') > 0:
  434. curr_dtype = str(val.count(';') + 1) + "-" + check_dtype[0]
  435. else:
  436. curr_dtype = check_dtype[0]
  437. break
  438. if check_dtype[0] == "text" and len(re.findall(check_dtype[1], val.strip())) > 0:
  439. curr_dtype = check_dtype[0]
  440. break
  441. val_dtypes += [curr_dtype]
  442. res_dtype = max(set(val_dtypes), key=val_dtypes.count)
  443. if len(set(val_dtypes)) > 1:
  444. res_dtype = "string"
  445. if res_dtype != "string":
  446. msg = 'Dtype of property "%s" currently is "string", but might fit dtype "%s"!' % \
  447. (prop.name, res_dtype)
  448. yield ValidationError(prop, msg, LABEL_WARNING, validation_id)
  449. Validation.register_handler('property', property_values_string_check)
  450. def _cardinality_validation(obj, cardinality, card_target_attr, validation_rank, validation_id):
  451. """
  452. Helper function that validates the cardinality of an odml object attribute.
  453. Valid object-attribute combinations are Section-sections, Section-properties and
  454. Property-values.
  455. :param obj: an odml.Section or an odml.Property
  456. :param cardinality: 2-int tuple containing the cardinality value
  457. :param card_target_attr: string containing the name of the attribute the cardinality is
  458. applied against. Supported values are:
  459. 'sections', 'properties' or 'values'
  460. :param validation_rank: Rank of the yielded ValidationError.
  461. :param validation_id: string containing the id of the parent validation.
  462. :return: Returns a ValidationError, if a set cardinality is not met or None.
  463. """
  464. err = None
  465. if cardinality and isinstance(cardinality, tuple):
  466. val_min = cardinality[0]
  467. val_max = cardinality[1]
  468. card_target = getattr(obj, card_target_attr)
  469. val_len = len(card_target) if card_target else 0
  470. invalid_cause = ""
  471. if val_min and val_len < val_min:
  472. invalid_cause = "minimum %s" % val_min
  473. elif val_max and val_len > val_max:
  474. invalid_cause = "maximum %s" % val_max
  475. if invalid_cause:
  476. obj_name = obj.format().name.capitalize()
  477. msg = "%s %s cardinality violated" % (obj_name, card_target_attr)
  478. msg += " (%s values, %s found)" % (invalid_cause, val_len)
  479. err = ValidationError(obj, msg, validation_rank, validation_id)
  480. return err
  481. def section_properties_cardinality(obj):
  482. """
  483. Checks Section properties against any set property cardinality.
  484. :param obj: odml.Section
  485. :return: Yields a ValidationError warning, if a set cardinality is not met.
  486. """
  487. validation_id = IssueID.section_properties_cardinality
  488. err = _cardinality_validation(obj, obj.prop_cardinality, 'properties',
  489. LABEL_WARNING, validation_id)
  490. if err:
  491. yield err
  492. Validation.register_handler("section", section_properties_cardinality)
  493. def section_sections_cardinality(obj):
  494. """
  495. Checks Section sub-sections against any set sub-section cardinality.
  496. :param obj: odml.Section
  497. :return: Yields a ValidationError warning, if a set cardinality is not met.
  498. """
  499. validation_id = IssueID.section_sections_cardinality
  500. err = _cardinality_validation(obj, obj.sec_cardinality, 'sections',
  501. LABEL_WARNING, validation_id)
  502. if err:
  503. yield err
  504. Validation.register_handler("section", section_sections_cardinality)
  505. def property_values_cardinality(obj):
  506. """
  507. Checks Property values against any set value cardinality.
  508. :param obj: odml.Property
  509. :return: Yields a ValidationError warning, if a set cardinality is not met.
  510. """
  511. validation_id = IssueID.property_values_cardinality
  512. err = _cardinality_validation(obj, obj.val_cardinality, 'values',
  513. LABEL_WARNING, validation_id)
  514. if err:
  515. yield err
  516. Validation.register_handler("property", property_values_cardinality)