test_rdf_writer.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. import datetime
  2. import os
  3. import unittest
  4. from sys import version_info
  5. import yaml
  6. if version_info > (3, 4):
  7. from owlrl import DeductiveClosure, RDFS_Semantics
  8. from rdflib import URIRef, Literal
  9. from rdflib.namespace import Namespace, RDF, RDFS, XSD
  10. from rdflib.plugins.sparql import prepareQuery
  11. import odml
  12. from odml.format import Format
  13. from odml.tools.rdf_converter import ODML_NS, RDFWriter
  14. from .test_samplefile import SampleFileCreator
  15. from .test_samplefile import parse
  16. ODMLNS = Format.namespace()
  17. class TestRDFWriter(unittest.TestCase):
  18. def setUp(self):
  19. doc = SampleFileCreator().create_document()
  20. doc1 = SampleFileCreator().create_document()
  21. self.doc = doc
  22. self.doc1 = doc1
  23. def test_convert_to_rdf(self):
  24. rdf_writer = RDFWriter([self.doc, self.doc1])
  25. rdf_writer.convert_to_rdf()
  26. check = rdf_writer.graph.subjects(predicate=RDF.type,
  27. object=URIRef(ODMLNS.Document))
  28. self.assertEqual(len(list(check)), 2)
  29. def test_adding_doc_to_the_hub(self):
  30. rdf_writer = RDFWriter([self.doc])
  31. rdf_writer.convert_to_rdf()
  32. check = rdf_writer.graph.objects(subject=rdf_writer.hub_root,
  33. predicate=ODMLNS.hasDocument)
  34. self.assertEqual(len(list(check)), 1)
  35. def test_adding_repository(self):
  36. rdf_writer = RDFWriter([self.doc])
  37. rdf_writer.convert_to_rdf()
  38. check = rdf_writer.graph.objects(subject=rdf_writer.hub_root,
  39. predicate=ODMLNS.hasTerminology)
  40. self.assertEqual(len(list(check)), 0)
  41. check = rdf_writer.graph.objects(subject=URIRef(ODMLNS + rdf_writer.docs[0].id),
  42. predicate=ODMLNS.hasTerminology)
  43. self.assertEqual(len(list(check)), 0)
  44. url = "terminology_url"
  45. self.doc.repository = url
  46. rdf_writer = RDFWriter([self.doc])
  47. rdf_writer.convert_to_rdf()
  48. check = rdf_writer.graph.subjects(predicate=RDF.type, object=URIRef(url))
  49. self.assertEqual(len(list(check)), 1)
  50. check = rdf_writer.graph.objects(subject=rdf_writer.hub_root,
  51. predicate=ODMLNS.hasTerminology)
  52. self.assertEqual(len(list(check)), 1)
  53. check = rdf_writer.graph.objects(subject=URIRef(ODMLNS + rdf_writer.docs[0].id),
  54. predicate=ODMLNS.hasTerminology)
  55. self.assertEqual(len(list(check)), 1)
  56. def test_adding_sections(self):
  57. doc = odml.Document()
  58. rdf_writer = RDFWriter([doc])
  59. rdf_writer.convert_to_rdf()
  60. check = rdf_writer.graph.subject_objects(predicate=ODMLNS.hasSection)
  61. self.assertEqual(len(list(check)), 0)
  62. rdf_writer = RDFWriter([self.doc])
  63. rdf_writer.convert_to_rdf()
  64. check = rdf_writer.graph.subject_objects(predicate=ODMLNS.hasSection)
  65. self.assertEqual(len(list(check)), 9)
  66. rdf_writer = RDFWriter([self.doc, self.doc1])
  67. rdf_writer.convert_to_rdf()
  68. check = rdf_writer.graph.subject_objects(predicate=ODMLNS.hasSection)
  69. self.assertEqual(len(list(check)), 18)
  70. def test_adding_properties(self):
  71. doc = parse("""
  72. s1[t1]
  73. - s11[t1]
  74. s2[t2]
  75. """)
  76. rdf_writer = RDFWriter([doc])
  77. rdf_writer.convert_to_rdf()
  78. check = rdf_writer.graph.subject_objects(predicate=ODMLNS.hasProperty)
  79. self.assertEqual(len(list(check)), 0)
  80. rdf_writer = RDFWriter([self.doc])
  81. rdf_writer.convert_to_rdf()
  82. check = rdf_writer.graph.subject_objects(predicate=ODMLNS.hasProperty)
  83. self.assertEqual(len(list(check)), 12)
  84. rdf_writer = RDFWriter([self.doc, self.doc1])
  85. rdf_writer.convert_to_rdf()
  86. check = rdf_writer.graph.subject_objects(predicate=ODMLNS.hasProperty)
  87. self.assertEqual(len(list(check)), 24)
  88. def test_adding_values(self):
  89. doc = parse("""
  90. s1[t1]
  91. """)
  92. rdf_writer = RDFWriter([doc])
  93. rdf_writer.convert_to_rdf()
  94. check = rdf_writer.graph.subject_objects(predicate=RDF.li)
  95. self.assertEqual(len(list(check)), 0)
  96. check = rdf_writer.graph.subject_objects(predicate=URIRef("%s_1" % str(RDF)))
  97. self.assertEqual(len(list(check)), 0)
  98. doc = parse("""
  99. s1[t1]
  100. - p1
  101. """)
  102. rdf_writer = RDFWriter([doc])
  103. rdf_writer.convert_to_rdf()
  104. check = rdf_writer.graph.subjects(predicate=RDF.li, object=Literal("val"))
  105. self.assertEqual(len(list(check)), 0)
  106. check = rdf_writer.graph.subjects(predicate=URIRef("%s_1" % str(RDF)),
  107. object=Literal("val"))
  108. self.assertEqual(len(list(check)), 1)
  109. doc.sections[0].properties[0].append("val2")
  110. rdf_writer = RDFWriter([doc])
  111. rdf_writer.convert_to_rdf()
  112. check = rdf_writer.graph.subject_objects(predicate=RDF.li)
  113. self.assertEqual(len(list(check)), 0)
  114. check = rdf_writer.graph.subjects(predicate=RDF.li, object=Literal("val"))
  115. self.assertEqual(len(list(check)), 0)
  116. check = rdf_writer.graph.subjects(predicate=RDF.li, object=Literal("val2"))
  117. self.assertEqual(len(list(check)), 0)
  118. check = rdf_writer.graph.subjects(predicate=URIRef("%s_1" % str(RDF)),
  119. object=Literal("val"))
  120. self.assertEqual(len(list(check)), 1)
  121. check = rdf_writer.graph.subjects(predicate=URIRef("%s_2" % str(RDF)),
  122. object=Literal("val2"))
  123. self.assertEqual(len(list(check)), 1)
  124. doc = parse("""
  125. s1[t1]
  126. - p1
  127. s2[t2]
  128. - p1
  129. - p2
  130. """)
  131. rdf_writer = RDFWriter([doc])
  132. rdf_writer.convert_to_rdf()
  133. check = rdf_writer.graph.subjects(predicate=RDF.li, object=Literal("val"))
  134. self.assertEqual(len(list(check)), 0)
  135. check = rdf_writer.graph.subjects(predicate=URIRef("%s_1" % str(RDF)),
  136. object=Literal("val"))
  137. self.assertEqual(len(list(check)), 3)
  138. def test_section_subclass(self):
  139. file_path = os.path.join(odml.__path__[0], 'resources', 'section_subclasses.yaml')
  140. with open(file_path, "r") as subclass_file:
  141. subclass = yaml.safe_load(subclass_file)
  142. doc = odml.Document()
  143. subclass_key = next(iter(subclass))
  144. sec = odml.Section("S", type=subclass_key)
  145. doc.append(sec)
  146. rdf_writer = RDFWriter(doc)
  147. rdf_writer.convert_to_rdf()
  148. check = rdf_writer.graph.subjects(predicate=RDF.type,
  149. object=URIRef(ODMLNS[subclass[subclass_key]]))
  150. self.assertEqual(len(list(check)), 1)
  151. check = rdf_writer.graph.subjects(predicate=RDF.type, object=URIRef(ODMLNS.Section))
  152. self.assertEqual(len(list(check)), 0)
  153. def test_adding_other_entities_properties(self):
  154. doc = parse("""
  155. s1[t1]
  156. - p1
  157. """)
  158. version = "v1"
  159. date = datetime.date(1979, 10, 12)
  160. author = "nice person"
  161. s_def = "comment"
  162. s_ref = "reference"
  163. p_unit = "u1"
  164. p_name = "p1"
  165. p_def = "p comment"
  166. p_uncertainty = 13.0
  167. p_dtype = "string"
  168. p_value_origin = "value"
  169. p_ref = "p_ref"
  170. doc.version = version
  171. doc.date = date
  172. doc.author = author
  173. doc.sections[0].definition = s_def
  174. doc.sections[0].reference = s_ref
  175. doc.sections[0].properties[0].name = p_name
  176. doc.sections[0].properties[0].unit = p_unit
  177. doc.sections[0].properties[0].definition = p_def
  178. doc.sections[0].properties[0].uncertainty = p_uncertainty
  179. doc.sections[0].properties[0].dtype = p_dtype
  180. doc.sections[0].properties[0].value_origin = p_value_origin
  181. doc.sections[0].properties[0].reference = p_ref
  182. rdf_writer = RDFWriter([doc])
  183. rdf_writer.convert_to_rdf()
  184. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasDocVersion, object=Literal(version))
  185. self.assertEqual(len(list(check)), 1)
  186. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasDate,
  187. object=Literal(date, datatype=XSD.date))
  188. self.assertEqual(len(list(check)), 1)
  189. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasAuthor, object=Literal(author))
  190. self.assertEqual(len(list(check)), 1)
  191. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasName, object=Literal("s1"))
  192. self.assertEqual(len(list(check)), 1)
  193. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasType, object=Literal("t1"))
  194. self.assertEqual(len(list(check)), 1)
  195. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasDefinition, object=Literal(s_def))
  196. self.assertEqual(len(list(check)), 1)
  197. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasReference, object=Literal(s_ref))
  198. self.assertEqual(len(list(check)), 1)
  199. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasName, object=Literal(p_name))
  200. self.assertEqual(len(list(check)), 1)
  201. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasUnit, object=Literal(p_unit))
  202. self.assertEqual(len(list(check)), 1)
  203. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasDefinition, object=Literal(p_def))
  204. self.assertEqual(len(list(check)), 1)
  205. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasUncertainty,
  206. object=Literal(p_uncertainty))
  207. self.assertEqual(len(list(check)), 1)
  208. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasDtype, object=Literal(p_dtype))
  209. self.assertEqual(len(list(check)), 1)
  210. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasValueOrigin,
  211. object=Literal(p_value_origin))
  212. self.assertEqual(len(list(check)), 1)
  213. check = rdf_writer.graph.subjects(predicate=ODMLNS.hasReference, object=Literal(p_ref))
  214. self.assertEqual(len(list(check)), 1)
  215. def test_get_rdf_string(self):
  216. rdf_writer = RDFWriter([self.doc1])
  217. rdf_writer.get_rdf_str()
  218. with self.assertRaises(ValueError):
  219. rdf_writer.get_rdf_str("abc")
  220. def test_rdf_subclassing_switch(self):
  221. """
  222. Test the RDF section subclassing switch.
  223. """
  224. # Section type term defined in odml/resources/section_subclasses.yaml that will
  225. # be converted to an RDF Section Subclass of Class "Cell".
  226. sub_class_term = "cell"
  227. # Create minimal document
  228. doc = odml.Document()
  229. _ = odml.Section(name="test_subclassing", type=sub_class_term, parent=doc)
  230. # Test default subclassing
  231. rdf_writer = RDFWriter([doc])
  232. result = rdf_writer.get_rdf_str()
  233. self.assertIn("odml:Cell", result)
  234. # Test inactivation of subclassing feature
  235. rdf_writer = RDFWriter([doc], rdf_subclassing=False)
  236. result = rdf_writer.get_rdf_str()
  237. self.assertNotIn("odml:Cell", result)
  238. def test_rdf_custom_subclasses(self):
  239. """
  240. Test collection of the odml RDF subclassing feature.
  241. Tests that the resulting output RDF document contains any required
  242. additional RDF subclasses.
  243. """
  244. sub_class_term = "cell"
  245. # Create minimal document
  246. doc = odml.Document()
  247. _ = odml.Section(name="test_subclassing", type=sub_class_term, parent=doc)
  248. # Test None dict
  249. rdf_writer = RDFWriter([doc], custom_subclasses=None)
  250. self.assertIn("odml:Cell", rdf_writer.get_rdf_str())
  251. # Test invalid dict
  252. rdf_writer = RDFWriter([doc], custom_subclasses=["invalid"])
  253. self.assertIn("odml:Cell", rdf_writer.get_rdf_str())
  254. # Test value whitespace
  255. inval_a = "This should"
  256. inval_b = "fail\nin"
  257. inval_c = "the\tmost"
  258. inval_d = "complete\rway"
  259. invalid_dict = {"type_1": inval_a, "type_2": inval_b, "type_3": inval_c, "type_4": inval_d}
  260. with self.assertRaises(ValueError):
  261. _ = RDFWriter([doc], custom_subclasses=invalid_dict)
  262. # Test custom subclassing
  263. type_custom_class = "species"
  264. custom_class_dict = {type_custom_class: "Species"}
  265. doc = odml.Document()
  266. _ = odml.Section(name="test_subclassing", type="cell", parent=doc)
  267. _ = odml.Section(name="test_custom_subclassing", type=type_custom_class, parent=doc)
  268. rdf_writer = RDFWriter([doc], custom_subclasses=custom_class_dict)
  269. self.assertIn("odml:Cell", rdf_writer.get_rdf_str())
  270. self.assertIn("odml:Species", rdf_writer.get_rdf_str())
  271. # Test custom subclassing overwrite
  272. sub_class_type = "cell"
  273. custom_class_dict = {sub_class_type: "Neuron"}
  274. doc = odml.Document()
  275. _ = odml.Section(name="test_subclassing", type=sub_class_type, parent=doc)
  276. if version_info > (3, 4):
  277. with self.assertWarns(UserWarning):
  278. rdf_writer = RDFWriter([doc], custom_subclasses=custom_class_dict)
  279. self.assertNotIn("odml:Cell", rdf_writer.get_rdf_str())
  280. self.assertIn("odml:Neuron", rdf_writer.get_rdf_str())
  281. def test_rdf_subclassing_definitions(self):
  282. """
  283. Test that RDF Subclass definitions are written to the resulting graph.
  284. """
  285. # -- Test default subclassing
  286. doc = odml.Document()
  287. _ = odml.Section(name="test_subclassing", type="cell", parent=doc)
  288. rdf_writer = RDFWriter([doc])
  289. curr_str = " ".join(rdf_writer.get_rdf_str().split())
  290. self.assertIn("odml:Cell a rdfs:Class ; rdfs:subClassOf odml:Section", curr_str)
  291. self.assertIn("odml:Section a rdfs:Class", curr_str)
  292. # -- Test multiple entries; a definition should only occur once in an RDF document
  293. doc = odml.Document()
  294. sec = odml.Section(name="test_subclassing", type="cell", parent=doc)
  295. sub_sec = odml.Section(name="test_subclassing", type="cell", parent=sec)
  296. _ = odml.Section(name="test_subclassing", type="cell", parent=sub_sec)
  297. rdf_writer = RDFWriter([doc])
  298. curr_str = " ".join(rdf_writer.get_rdf_str().split())
  299. self.assertIn("odml:Cell a rdfs:Class ; rdfs:subClassOf odml:Section", curr_str)
  300. self.assertIs(curr_str.count("odml:Cell a rdfs:Class ; rdfs:subClassOf odml:Section"), 1)
  301. self.assertIn("odml:Section a rdfs:Class", curr_str)
  302. self.assertIs(curr_str.count("odml:Section a rdfs:Class"), 1)
  303. # -- Test custom subclassing
  304. type_custom_class = "species"
  305. custom_class_dict = {type_custom_class: "Species"}
  306. doc = odml.Document()
  307. _ = odml.Section(name="test_subclassing", type="cell", parent=doc)
  308. _ = odml.Section(name="test_custom_subclassing", type=type_custom_class, parent=doc)
  309. rdf_writer = RDFWriter([doc], custom_subclasses=custom_class_dict)
  310. curr_str = " ".join(rdf_writer.get_rdf_str().split())
  311. self.assertIn("odml:Cell a rdfs:Class ; rdfs:subClassOf odml:Section", curr_str)
  312. self.assertIn("odml:Species a rdfs:Class ; rdfs:subClassOf odml:Section", curr_str)
  313. self.assertIn("odml:Section a rdfs:Class", curr_str)
  314. # -- Test inactive subclassing
  315. doc = odml.Document()
  316. _ = odml.Section(name="test_subclassing", type="cell", parent=doc)
  317. rdf_writer = RDFWriter([doc], rdf_subclassing=False)
  318. curr_str = " ".join(rdf_writer.get_rdf_str().split())
  319. self.assertNotIn("odml:Section a rdfs:Class", curr_str)
  320. self.assertNotIn("odml:Cell a rdfs:Class ; rdfs:subClassOf odml:Section", curr_str)
  321. def test_rdf_subclassing_queries(self):
  322. """
  323. Test the proper implementation of the RDF subclassing feature. Tests ensure, that queries
  324. relying on RDF Subclasses return appropriate results.
  325. """
  326. if version_info > (3, 4):
  327. namespace_map = {"odml": Namespace(ODML_NS), "rdf": RDF, "rdfs": RDFS}
  328. doc = odml.Document()
  329. _ = odml.Section(name="test_subclass", type="cell", parent=doc)
  330. _ = odml.Section(name="test_regular_class", type="regular", parent=doc)
  331. rdf_writer = RDFWriter([doc])
  332. _ = rdf_writer.get_rdf_str()
  333. use_graph = rdf_writer.graph
  334. DeductiveClosure(RDFS_Semantics).expand(use_graph)
  335. q_string = "SELECT * WHERE {?s rdf:type odml:Section .}"
  336. curr_query = prepareQuery(q_string, initNs=namespace_map)
  337. # Make sure the query finds two sections
  338. self.assertIs(len(use_graph.query(curr_query)), 2)
  339. # Make sure the query finds
  340. result_section = []
  341. for row in use_graph.query(curr_query):
  342. result_section.append(row.s)
  343. q_string = "SELECT * WHERE {?s rdf:type odml:Cell .}"
  344. curr_query = prepareQuery(q_string, initNs=namespace_map)
  345. self.assertIs(len(use_graph.query(curr_query)), 1)
  346. for row in use_graph.query(curr_query):
  347. self.assertIn(row.s, result_section)
  348. # -- Test custom subclassing queries
  349. type_custom_class = "species"
  350. type_overwrite_class = "cell"
  351. custom_class_dict = {type_custom_class: "Species", type_overwrite_class: "Neuron"}
  352. doc = odml.Document()
  353. sec = odml.Section(name="test_subclass", type="species", parent=doc)
  354. _ = odml.Section(name="test_subclass_overwrite", type="cell", parent=sec)
  355. _ = odml.Section(name="test_regular_class", type="regular", parent=sec)
  356. rdf_writer = RDFWriter([doc], custom_subclasses=custom_class_dict)
  357. _ = rdf_writer.get_rdf_str()
  358. use_graph = rdf_writer.graph
  359. DeductiveClosure(RDFS_Semantics).expand(use_graph)
  360. q_string = "SELECT * WHERE {?s rdf:type odml:Section .}"
  361. curr_query = prepareQuery(q_string, initNs=namespace_map)
  362. # Make sure the query finds three sections
  363. self.assertIs(len(use_graph.query(curr_query)), 3)
  364. # Make sure the query finds
  365. result_section = []
  366. for row in use_graph.query(curr_query):
  367. result_section.append(row.s)
  368. # Custom class 'Species' should be found.
  369. q_string = "SELECT * WHERE {?s rdf:type odml:Species .}"
  370. curr_query = prepareQuery(q_string, initNs=namespace_map)
  371. self.assertIs(len(use_graph.query(curr_query)), 1)
  372. for row in use_graph.query(curr_query):
  373. self.assertIn(row.s, result_section)
  374. # Custom class 'Neuron' should be found.
  375. q_string = "SELECT * WHERE {?s rdf:type odml:Neuron .}"
  376. curr_query = prepareQuery(q_string, initNs=namespace_map)
  377. self.assertIs(len(use_graph.query(curr_query)), 1)
  378. for row in use_graph.query(curr_query):
  379. self.assertIn(row.s, result_section)
  380. # Default class 'Cell' was replaced and should not return any result.
  381. q_string = "SELECT * WHERE {?s rdf:type odml:Cell .}"
  382. curr_query = prepareQuery(q_string, initNs=namespace_map)
  383. self.assertIs(len(use_graph.query(curr_query)), 0)
  384. # -- Test inactivated subclassing
  385. doc = odml.Document()
  386. _ = odml.Section(name="test_regular_class", type="regular", parent=doc)
  387. _ = odml.Section(name="test_subclass", type="cell", parent=doc)
  388. rdf_writer = RDFWriter([doc], rdf_subclassing=False)
  389. _ = rdf_writer.get_rdf_str()
  390. use_graph = rdf_writer.graph
  391. DeductiveClosure(RDFS_Semantics).expand(use_graph)
  392. q_string = "SELECT * WHERE {?s rdf:type odml:Section .}"
  393. curr_query = prepareQuery(q_string, initNs=namespace_map)
  394. self.assertIs(len(use_graph.query(curr_query)), 2)
  395. q_string = "SELECT * WHERE {?s rdf:type odml:Cell .}"
  396. curr_query = prepareQuery(q_string, initNs=namespace_map)
  397. self.assertIs(len(use_graph.query(curr_query)), 0)