Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

query_creator.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. import re
  2. from abc import ABCMeta, abstractmethod
  3. from rdflib import Namespace, RDF
  4. from rdflib.plugins.sparql import prepareQuery
  5. from ..format import Document
  6. from ..format import Property
  7. from ..format import Section
  8. class BaseQueryCreator:
  9. __metaclass__ = ABCMeta
  10. possible_query_variables = {'d': 'Document', 's': 'Section',
  11. 'p': 'Property', 'v': 'Bag URI', 'value': 'Value'}
  12. possible_q_dict_keys = ['Doc', 'Sec', 'Prop']
  13. def __init__(self, q_dict=None):
  14. """
  15. :param q_dict: dictionary with query parameters
  16. """
  17. self.q_dict = q_dict if q_dict else {}
  18. self.query = ''
  19. super(BaseQueryCreator, self).__init__()
  20. @abstractmethod
  21. def get_query(self, q_str, q_parser):
  22. pass
  23. @abstractmethod
  24. def _prepare_query(self):
  25. pass
  26. class BaseQueryParser:
  27. __metaclass__ = ABCMeta
  28. def __init__(self):
  29. self.q_dict = {}
  30. @abstractmethod
  31. def parse_query_string(self, q_str):
  32. pass
  33. class QueryParserFuzzy(BaseQueryParser):
  34. def __init__(self):
  35. super(QueryParserFuzzy, self).__init__()
  36. def parse_query_string(self, q_str):
  37. """
  38. Parse query string and returns dict object with parameters.
  39. :param q_str: query string.
  40. Example: FIND sec(name, type) prop(type) HAVING Stimulus, Contrast
  41. :return: dict object.
  42. Example: {'Sec': ['name', 'type'],
  43. 'Doc': ['author'],
  44. 'Search': ['Stimulus', 'Contrast']}
  45. """
  46. self.q_dict = {}
  47. find_pattern = re.compile("FIND(.*?)HAVING")
  48. find_group = re.search(find_pattern, q_str).group(1).strip()
  49. if find_group:
  50. self._parse_find(find_group)
  51. having_pattern = re.compile("HAVING(.*)")
  52. having_group = re.search(having_pattern, q_str).group(1).strip()
  53. if having_group:
  54. if 'Search' in self.q_dict.keys():
  55. raise ValueError('Search values are already parsed')
  56. self._parse_having(having_group)
  57. else:
  58. raise ValueError('Search values in having part were not specified')
  59. return self.q_dict
  60. def _parse_find(self, find_part):
  61. """
  62. Parses find string part into list of specific keys to whih search values would be apllied
  63. e.g. 'sec(name, type) prop(name)' into {'Sec': ['name', 'type'], 'Prop': ['name']} .
  64. :param find_part: string which represent list of searchable odML data model objects
  65. like document(doc), sections(sec) or properties(prop).
  66. e.g. 'sec(name, type) prop(name)'
  67. """
  68. doc_pattern = re.compile("(doc|document)\(.*?\)")
  69. doc = re.search(doc_pattern, find_part)
  70. if doc:
  71. self._parse_doc(doc)
  72. sec_pattern = re.compile("(sec|section)\(.*?\)")
  73. sec = re.search(sec_pattern, find_part)
  74. if sec:
  75. self._parse_sec(sec)
  76. prop_pattern = re.compile("(prop|property)\(.*?\)")
  77. prop = re.search(prop_pattern, find_part)
  78. if prop:
  79. self._parse_prop(prop)
  80. def _parse_doc(self, doc):
  81. p = re.compile("[\(|, ](id|author|date|version|repository|sections)[\)|,]")
  82. if doc:
  83. self.q_dict['Doc'] = re.findall(p, doc.group(0))
  84. def _parse_sec(self, sec):
  85. p = re.compile("[\(|, ](id|name|definition|type|repository|reference|sections|properties)[\)|,]")
  86. if sec:
  87. self.q_dict['Sec'] = re.findall(p, sec.group(0))
  88. def _parse_prop(self, prop):
  89. p = re.compile("[\(|, ](id|name|definition|dtype|unit|uncertainty|reference|value_origin)[\)|,]")
  90. if prop:
  91. self.q_dict['Prop'] = re.findall(p, prop.group(0))
  92. def _parse_having(self, having_part):
  93. """
  94. Parses search value string into list of specific values
  95. e.g. 'Stimulus, Contrast, Date' into list [Stimulus, Contrast, Date].
  96. :param having_part: string with search values, e.g. 'Stimulus, Contrast'
  97. Also spaces errors in the string like 'Stimulus, , Contrast' will be ignored.
  98. """
  99. search_values_list = []
  100. search_params = re.compile("(.*?)(?:,|$)")
  101. if having_part:
  102. search_values = re.findall(search_params, having_part)
  103. for v in search_values:
  104. if v.strip():
  105. search_values_list.append(v.strip())
  106. self.q_dict['Search'] = search_values_list
  107. class QueryParser(BaseQueryParser):
  108. def __init__(self):
  109. super(QueryParser, self).__init__()
  110. def parse_query_string(self, q_str):
  111. """
  112. :param q_str: query string
  113. Example: doc(author:D. N. Adams) section(name:Stimulus) prop(name:Contrast, value:20, unit:%)
  114. :return: dict object
  115. Example: {'Sec': [('name', 'Stimulus')],
  116. 'Doc': [('author', 'D. N. Adams')],
  117. 'Prop': [('name', 'Contrast'), ('value':[20]), ('unit':'%')]}
  118. """
  119. doc_pattern = re.compile("(doc|document)\(.*?\)")
  120. doc = re.search(doc_pattern, q_str)
  121. if doc:
  122. self._parse_doc(doc)
  123. sec_pattern = re.compile("(sec|section)\(.*?\)")
  124. sec = re.search(sec_pattern, q_str)
  125. if sec:
  126. self._parse_sec(sec)
  127. prop_pattern = re.compile("(prop|property)\(.*?\)")
  128. prop = re.search(prop_pattern, q_str)
  129. if prop:
  130. self._parse_prop(prop)
  131. return self.q_dict
  132. def _parse_doc(self, doc):
  133. p = re.compile("[, |\(](id|author|date|version|repository|sections):(.*?)[,|\)]")
  134. if doc:
  135. self.q_dict['Doc'] = re.findall(p, doc.group(0))
  136. def _parse_sec(self, sec):
  137. p = re.compile("[, |\(](id|name|definition|type|repository|reference|sections|properties):(.*?)[,|\)]")
  138. if sec:
  139. self.q_dict['Sec'] = re.findall(p, sec.group(0))
  140. def _parse_prop(self, prop):
  141. p = re.compile("[, |\(](id|name|definition|dtype|unit|uncertainty|reference|value_origin):(.*?)[,|\)]")
  142. if prop:
  143. self.q_dict['Prop'] = re.findall(p, prop.group(0))
  144. p_value = re.compile("value:\[(.*)]")
  145. value_group = re.findall(p_value, prop.group(0))
  146. if value_group:
  147. values = re.split(", ?", value_group[0])
  148. self.q_dict['Prop'].append(('value', values))
  149. class QueryCreator(BaseQueryCreator):
  150. """
  151. Class for simplifying the creation of prepared SPARQL queries
  152. Usage:
  153. q = "doc(author:D. N. Adams) section(name:Stimulus) prop(name:Contrast, value:20, unit:%)"
  154. prepared_query = QueryCreator().get_query(q, QueryParser())
  155. q = "FIND sec(name, type) prop(name) HAVING Recording, Recording-2012-04-04-ab, Date"
  156. prepared_query = QueryCreator().get_query(q, QueryParser2())
  157. """
  158. def __init__(self, q_dict=None):
  159. """
  160. :param q_dict: dictionary with query parameters
  161. """
  162. super(QueryCreator, self).__init__(q_dict)
  163. def get_query(self, q_str=None, q_parser=None):
  164. """
  165. :param q_parser: one of possible query parsers.
  166. :param q_str: doc(author:D. N. Adams) section(name:Stimulus) prop(name:Contrast, value:20, unit:%)
  167. :return rdflib prepare query.
  168. """
  169. # TODO find out if the validation for the q_str is important
  170. # We can possibly warn about not used parts and print the parsed dictionary
  171. if not self.q_dict:
  172. if not q_str:
  173. raise AttributeError("Please fulfill q_str param (query string)")
  174. elif not q_parser:
  175. raise AttributeError("Please fulfill q_parser param (query parser)")
  176. self.q_dict = q_parser.parse_query_string(q_str)
  177. self._prepare_query()
  178. return prepareQuery(self.query, initNs={"odml": Namespace("https://g-node.org/projects/odml-rdf#"),
  179. "rdf": RDF})
  180. def _prepare_query(self):
  181. """
  182. Creates rdflib query using parameters from self.q_dict.
  183. :return: string representing rdflib query.
  184. """
  185. odml_uri = "https://g-node.org/projects/odml-rdf#"
  186. self.query = 'SELECT * WHERE {\n'
  187. if 'Doc' in self.q_dict.keys():
  188. doc_attrs = self.q_dict['Doc']
  189. if len(doc_attrs) > 0:
  190. self.query += '?d rdf:type odml:Document .\n'
  191. for i in doc_attrs:
  192. if len(i) > 2:
  193. raise ValueError("Attributes in the query \"{}\" are not valid.".format(i))
  194. else:
  195. attr = Document.rdf_map(i[0])
  196. if attr:
  197. self.query += '?d {0} \"{1}\" .\n'.format(re.sub(odml_uri,
  198. "odml:", attr), i[1])
  199. if 'Sec' in self.q_dict.keys():
  200. sec_attrs = self.q_dict['Sec']
  201. if len(sec_attrs) > 0:
  202. self.query += '?d odml:hasSection ?s .\n' \
  203. '?s rdf:type odml:Section .\n'
  204. for i in sec_attrs:
  205. if len(i) > 2:
  206. raise ValueError("Attributes in the query \"{}\" are not valid.".format(i))
  207. else:
  208. attr = Section.rdf_map(i[0])
  209. if attr:
  210. self.query += '?s {0} \"{1}\" .\n'.format(re.sub(odml_uri,
  211. "odml:", attr), i[1])
  212. if 'Prop' in self.q_dict.keys():
  213. prop_attrs = self.q_dict['Prop']
  214. if len(prop_attrs) > 0:
  215. self.query += '?s odml:hasProperty ?p .\n' \
  216. '?p rdf:type odml:Property .\n'
  217. for i in prop_attrs:
  218. if len(i) > 2:
  219. raise ValueError("Attributes in the query \"{}\" are not valid.".format(i))
  220. elif i[0] == 'value':
  221. values = i[1]
  222. if values:
  223. self.query += "?p odml:hasValue ?v .\n?v rdf:type rdf:Bag .\n"
  224. for v in values:
  225. self.query += '?v rdf:li \"{}\" .\n'.format(v)
  226. else:
  227. attr = Property.rdf_map(i[0])
  228. if attr:
  229. self.query += '?p {0} \"{1}\" .\n'.format(re.sub(odml_uri,
  230. "odml:", attr), i[1])
  231. self.query += '}\n'
  232. return self.query